ov_inf.cpp
Raw Download

// Copyright ManoMotion AB 2023
#include "OVBaseInference.hpp"
using namespace std;
using namespace manomotion;
using namespace cv;

OVBaseInference::OVBaseInference() {
 TAG = "OVBaseInference";
 is_read_from_file = false;
 _batch_size = 1;
}

void OVBaseInference::normalizeImage(Mat image_frame, bool zero_center, float* tensor_buffer, bool pytorch_format) {
	const int height = image_frame.rows;
	const int width = image_frame.cols;
	float div, sub;
	if (zero_center) {
		// [-1,1]
		div = 127.5f;
		sub = 1.0f;
	}
	else {
		// [0,1]
		div = 255.0f;
		sub = 0.0f;
	}

	if (!pytorch_format) {
		for (int i = 0; i < height; ++i) {
			uchar* image_ptr = image_frame.data + i * width * channels;
			for (int j = 0; j < width; ++j) {
				for (int c = 0; c < channels; ++c) {
					*tensor_buffer++ = (float)(*image_ptr++) / div - sub;
				}
			}
		}
	}
	else {
		for (int c = 0; c < channels; ++c) {
			for (int i = 0; i < height; ++i) {
				for (int j = 0; j < width; ++j) {
					uchar* image_ptr = image_frame.data + i * width * channels + j * channels + c;
					*tensor_buffer++ = (float)(*image_ptr) / div - sub;
				}
			}
		}
	}

	return;
}


ov::InferRequest OVBaseInference::loadModel()
{
	// Read a network in IR, PaddlePaddle, or ONNX format:


	if (is_read_from_file)
	{
		try
		{
			model = core.read_model(model_name);		}
		catch (const std::exception& e)
		{
			cout << "Error while reading model : " << e.what() << endl;
		}
	}
	else
	{
		//// read from  embedded files
		const char* embedded_file_name = model_name.c_str();
		size_t file_size;

		const char* _model_data;

		if(is_multiple_embedded_files)
			_model_data = find_embedded_file_multi(embedded_file_name, &file_size);
		else
			_model_data = find_embedded_file(embedded_file_name, &file_size);

		// make string a from the read data
		std::string model_str(_model_data, _model_data + file_size);

		model = core.read_model(model_str, ov::Tensor());

	}

	model->reshape({ input_shape_new });

	try
	{
		/*compiled_model = core.compile_model(model, "AUTO", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
	//		ov::inference_num_threads(8), ov::hint::inference_precision("f16"));*/

		compiled_model = core.compile_model(model, "AUTO", ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY), ov::inference_num_threads(8));
		//compiled_model = core.compile_model(model, "HETERO", ov::device::priorities("GPU,CPU"), ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY)); // crashes at infer
	}

	catch (const std::exception& e)
	{
		cout << "An exception occurred. Exception Nr. " << e.what() << '\n';
	}

	auto input_shape = model->input(input_info_idx).get_shape();
	int size_full = 1;
	for (int64_t _size : input_shape) {
		dim.push_back(_size);
		size_full *= _size;
	}

	input_name = *begin(model->input(input_info_idx).get_names());
	output_name = *begin(model->output(output_info_idx).get_names());

	assert(dim[2] == dim[3]); //ensure that we are processing a square inputs
	input_dim = dim[2];

	input_data = new float[size_full];

	infer_request = compiled_model.create_infer_request();

	return infer_request;
}

float* OVBaseInference::executeModel(int mode)
{
	// Get input port for model with one input
	auto input_port = compiled_model.input();
	// Create tensor from external memory
	ov::Tensor input_tensor(input_port.get_element_type(), input_port.get_shape(), input_data);
	// Set input tensor for model with one input
	infer_request.set_input_tensor(input_tensor);

	switch (mode)
	{
	case 1:// sync
		infer_request.infer();
		break;

	case 0:
	default: //async
		infer_request.start_async(); // there is a warning now"02 june 2023
		infer_request.wait();
		break;
	}

	// Get output tensor by tensor name
	auto output = infer_request.get_tensor(output_name);
	float* output_data = output.data<float>();

	return output_data;
}

OVBaseInference::~OVBaseInference() {
}