// Copyright ManoMotion AB 2023 #include "OVBaseInference.hpp" using namespace std; using namespace manomotion; using namespace cv; OVBaseInference::OVBaseInference() { TAG = "OVBaseInference"; is_read_from_file = false; _batch_size = 1; } void OVBaseInference::normalizeImage(Mat image_frame, bool zero_center, float* tensor_buffer, bool pytorch_format) { const int height = image_frame.rows; const int width = image_frame.cols; float div, sub; if (zero_center) { // [-1,1] div = 127.5f; sub = 1.0f; } else { // [0,1] div = 255.0f; sub = 0.0f; } if (!pytorch_format) { for (int i = 0; i < height; ++i) { uchar* image_ptr = image_frame.data + i * width * channels; for (int j = 0; j < width; ++j) { for (int c = 0; c < channels; ++c) { *tensor_buffer++ = (float)(*image_ptr++) / div - sub; } } } } else { for (int c = 0; c < channels; ++c) { for (int i = 0; i < height; ++i) { for (int j = 0; j < width; ++j) { uchar* image_ptr = image_frame.data + i * width * channels + j * channels + c; *tensor_buffer++ = (float)(*image_ptr) / div - sub; } } } } return; } ov::InferRequest OVBaseInference::loadModel() { // Read a network in IR, PaddlePaddle, or ONNX format: if (is_read_from_file) { try { model = core.read_model(model_name); } catch (const std::exception& e) { cout << "Error while reading model : " << e.what() << endl; } } else { //// read from embedded files const char* embedded_file_name = model_name.c_str(); size_t file_size; const char* _model_data; if(is_multiple_embedded_files) _model_data = find_embedded_file_multi(embedded_file_name, &file_size); else _model_data = find_embedded_file(embedded_file_name, &file_size); // make string a from the read data std::string model_str(_model_data, _model_data + file_size); model = core.read_model(model_str, ov::Tensor()); } model->reshape({ input_shape_new }); try { /*compiled_model = core.compile_model(model, "AUTO", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY), // ov::inference_num_threads(8), ov::hint::inference_precision("f16"));*/ compiled_model = core.compile_model(model, "AUTO", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY), ov::inference_num_threads(8)); //compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); // crashes at infer } catch (const std::exception& e) { cout << "An exception occurred. Exception Nr. " << e.what() << '\n'; } auto input_shape = model->input(input_info_idx).get_shape(); int size_full = 1; for (int64_t _size : input_shape) { dim.push_back(_size); size_full *= _size; } input_name = *begin(model->input(input_info_idx).get_names()); output_name = *begin(model->output(output_info_idx).get_names()); assert(dim[2] == dim[3]); //ensure that we are processing a square inputs input_dim = dim[2]; input_data = new float[size_full]; infer_request = compiled_model.create_infer_request(); return infer_request; } float* OVBaseInference::executeModel(int mode) { // Get input port for model with one input auto input_port = compiled_model.input(); // Create tensor from external memory ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data); // Set input tensor for model with one input infer_request.set_input_tensor(input_tensor); switch (mode) { case 1:// sync infer_request.infer(); break; case 0: default: //async infer_request.start_async(); // there is a warning now"02 june 2023 infer_request.wait(); break; } // Get output tensor by tensor name auto output = infer_request.get_tensor(output_name); float* output_data = output.data(); return output_data; } OVBaseInference::~OVBaseInference() { }