diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 02b568abdf8da..6700c96d84cb4 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1290,34 +1290,6 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DUSE_OPENVINO=1) - if (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt") - file(READ $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt VER) - endif() - - if (NOT DEFINED ENV{INTEL_OPENVINO_DIR}) - message(FATAL_ERROR "[Couldn't locate OpenVINO] OpenVINO may not have been initialized") - endif() - - # Check OpenVINO version for support - if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0") - set(OPENVINO_VERSION "2023.0") - add_definitions(-DOPENVINO_2023_0=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1") - set(OPENVINO_VERSION "2023.1") - add_definitions(-DOPENVINO_2023_1=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.2") - set(OPENVINO_VERSION "2023.2") - add_definitions(-DOPENVINO_2023_2=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.3") - set(OPENVINO_VERSION "2023.3") - add_definitions(-DOPENVINO_2023_3=1) - elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino") - set(OPENVINO_VERSION "2023.3") - add_definitions(-DOPENVINO_2023_3=1) - else() - message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}") - endif() - if (onnxruntime_USE_OPENVINO_GPU_FP32) add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1) endif() @@ -1334,6 +1306,10 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1) endif() + if (onnxruntime_USE_OPENVINO_NPU) + add_definitions(-DOPENVINO_CONFIG_NPU=1) + endif() + if (onnxruntime_USE_OPENVINO_GPU_FP32_NP) add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1) add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) @@ -1354,6 +1330,11 @@ if (onnxruntime_USE_OPENVINO) add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) endif() + if (onnxruntime_USE_OPENVINO_NPU_NP) + add_definitions(-DOPENVINO_CONFIG_NPU=1) + add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1) + endif() + if (onnxruntime_USE_OPENVINO_HETERO) add_definitions(-DOPENVINO_CONFIG_HETERO=1) add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}") diff --git a/cmake/onnxruntime_providers_openvino.cmake b/cmake/onnxruntime_providers_openvino.cmake index e26f0bfc0b751..5876b2b5c448b 100644 --- a/cmake/onnxruntime_providers_openvino.cmake +++ b/cmake/onnxruntime_providers_openvino.cmake @@ -16,23 +16,19 @@ endif() # Header paths - find_package(InferenceEngine REQUIRED) - find_package(ngraph REQUIRED) - - if (OPENVINO_2022_1 OR OPENVINO_2022_2) find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX) - list (OV_20_LIBS openvino::frontend::onnx openvino::runtime) + if(OpenVINO_VERSION VERSION_LESS 2023.0) + message(FATAL_ERROR "OpenVINO 2023.0 and newer are supported. Please, latest OpenVINO release") endif() if (WIN32) unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO) endif() + list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES}) if ((DEFINED ENV{OPENCL_LIBS}) AND (DEFINED ENV{OPENCL_INCS})) add_definitions(-DIO_BUFFER_ENABLED=1) - list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS} ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES}) - else() - list(APPEND OPENVINO_LIB_LIST ${OV_20_LIBS} ${InferenceEngine_LIBRARIES} ${NGRAPH_LIBRARIES} ngraph::onnx_importer ${PYTHON_LIBRARIES}) + list(APPEND OPENVINO_LIB_LIST $ENV{OPENCL_LIBS}) endif() source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_openvino_cc_srcs}) @@ -75,7 +71,14 @@ message(FATAL_ERROR "onnxruntime_providers_openvino unknown platform, need to specify shared library exports for it") endif() - install(TARGETS onnxruntime_providers_openvino - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) \ No newline at end of file + if (CMAKE_OPENVINO_LIBRARY_INSTALL_DIR) + install(TARGETS onnxruntime_providers_openvino + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_OPENVINO_LIBRARY_INSTALL_DIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + else() + install(TARGETS onnxruntime_providers_openvino + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + endif() diff --git a/dockerfiles/Dockerfile.openvino b/dockerfiles/Dockerfile.openvino index 78d04a51ba162..3e3c8cb34c551 100644 --- a/dockerfiles/Dockerfile.openvino +++ b/dockerfiles/Dockerfile.openvino @@ -17,7 +17,7 @@ ARG DEVICE=CPU_FP32 ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git ARG ONNXRUNTIME_BRANCH=main -ENV InferenceEngine_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake +ENV OpenVINO_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake USER root RUN apt update; apt install -y git protobuf-compiler libprotobuf-dev diff --git a/dockerfiles/Dockerfile.openvino-csharp b/dockerfiles/Dockerfile.openvino-csharp index 2529ef4b73209..cd77c11e60375 100644 --- a/dockerfiles/Dockerfile.openvino-csharp +++ b/dockerfiles/Dockerfile.openvino-csharp @@ -47,7 +47,7 @@ ARG DEVICE=CPU_FP32 ARG ONNXRUNTIME_REPO=https://github.com/microsoft/onnxruntime.git ARG ONNXRUNTIME_BRANCH=main -ENV InferenceEngine_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake +ENV OpenVINO_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake ENV LANG en_US.UTF-8 USER root diff --git a/dockerfiles/Dockerfile.openvino-rhel8 b/dockerfiles/Dockerfile.openvino-rhel8 index 5c504cfa553a1..a326c7211c00d 100644 --- a/dockerfiles/Dockerfile.openvino-rhel8 +++ b/dockerfiles/Dockerfile.openvino-rhel8 @@ -10,9 +10,8 @@ ARG ONNXRUNTIME_BRANCH=main ENV INTEL_OPENVINO_DIR=/opt/intel/openvino_2022.3.0 -ENV InferenceEngine_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake +ENV OpenVINO_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake ENV IE_PLUGINS_PATH=${INTEL_OPENVINO_DIR}/runtime/lib/intel64/ -ENV ngraph_DIR=${INTEL_OPENVINO_DIR}/runtime/cmake ENV LD_LIBRARY_PATH=${INTEL_OPENVINO_DIR}/runtime/3rdparty/tbb/lib/:${IE_PLUGINS_PATH}:${LD_LIBRARY_PATH} ENV OpenCV_DIR=${INTEL_OPENVINO_DIR}/extras/opencv/cmake ENV LD_LIBRARY_PATH=${INTEL_OPENVINO_DIR}/extras/opencv/lib:${LD_LIBRARY_PATH} diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 330b464ffd1bb..6fcfe4b76171e 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -24,15 +24,6 @@ BackendManager::BackendManager(const GlobalContext& global_context, global_context_ = global_context; auto prec_str = GetGlobalContext().precision_str; - if (prec_str == "FP32") { - subgraph_context_.precision = "FP32"; - } else if (prec_str == "FP16") { - subgraph_context_.precision = "FP16"; - } else if (prec_str == "U8") { - subgraph_context_.precision = "U8"; - } else { - throw std::string("Invalid OpenVINO Precision type: " + prec_str); - } // Save the indexes of graph inputs among fused_node's inputDefs // (which also contains initializers). @@ -47,7 +38,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, for (auto input : graph_inputs) { auto it = subgraph_context_.input_names.find(input->Name()); if (it == subgraph_context_.input_names.end()) { - throw std::string("Input not found in the input defs list"); + ORT_THROW("Input not found in the input defs list"); } int index = it->second; subgraph_context_.input_indexes.push_back(index); @@ -61,6 +52,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, } subgraph_context_.subgraph_name = fused_node.Name(); model_proto_ = GetModelProtoFromFusedNode(fused_node, subgraph, logger); + std::string device_type = openvino_ep::BackendManager::GetGlobalContext().device_type; if (ModelHasSymbolicInputDims(subgraph)) { subgraph_context_.has_dynamic_input_shape = true; @@ -75,7 +67,7 @@ BackendManager::BackendManager(const GlobalContext& global_context, GetGlobalContext(), subgraph_context_); } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " << "Backend created for graph " << subgraph_context_.subgraph_name; @@ -87,12 +79,29 @@ BackendManager::BackendManager(const GlobalContext& global_context, << subgraph_context_.subgraph_name; subgraph_context_.has_dynamic_input_shape = false; + + // OV NPU plugin is supported with fallback to OV CPU upon compilation failures. try { concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), subgraph_context_); } catch (std::string const& msg) { - throw msg; + if (device_type.find("NPU") != std::string::npos) { + LOGS_DEFAULT(WARNING) << msg; + LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." + << "Falling back to OV CPU for execution"; + GetGlobalContext().device_type = "CPU"; + GetGlobalContext().precision_str = "FP32"; + try { + concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, + GetGlobalContext(), + subgraph_context_); + } catch (std::string const& msg) { + ORT_THROW(msg); + } + } else { + ORT_THROW(msg); + } } } } @@ -254,8 +263,13 @@ void BackendManager::Compute(OrtKernelContext* context) { LOGS_DEFAULT(INFO) << "Start Compute"; } #endif + // OV NPU doesn't support dynamic shaped model inference. + // if disable_dynamic_shapes is set to true then execution of dynamic model is done + // by rewriting the model to static shaped model at runtime based on input shape. + // disable_dynamic_shapes is always set to true for OV NPU plugin. bool use_dynamic_backend = true; - if (!GetGlobalContext().disable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape && + if (subgraph_context_.has_dynamic_input_shape && + !GetGlobalContext().disable_dynamic_shapes && (GetGlobalContext().device_type.find("CPU") != std::string::npos || GetGlobalContext().device_type.find("GPU") != std::string::npos)) { concrete_backend_->Infer(context); @@ -263,12 +277,11 @@ void BackendManager::Compute(OrtKernelContext* context) { } else if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) { std::vector> tensor_shapes = GetInputTensorShapes(ctx); auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type); - std::shared_ptr dynamic_backend; auto search = backend_map_.find(key); if (search == backend_map_.end()) { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " - << "Creating concrete backend for key: " << key; + << "Creating dynamic backend for key: " << key; LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " << "Backend created for graph " << subgraph_context_.subgraph_name; auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes); @@ -277,7 +290,21 @@ void BackendManager::Compute(OrtKernelContext* context) { GetGlobalContext(), subgraph_context_); } catch (std::string const& msg) { - throw msg; + if (GetGlobalContext().device_type.find("NPU") != std::string::npos) { + LOGS_DEFAULT(WARNING) << msg; + LOGS_DEFAULT(WARNING) << "Model compilation failed at OV NPU." + << "Falling back to OV CPU for execution"; + GetGlobalContext().device_type = "CPU"; + GetGlobalContext().precision_str = "FP32"; + key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type); + try { + dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, + GetGlobalContext(), + subgraph_context_); + } catch (std::string const& msg) { + ORT_THROW(msg); + } + } } backend_map_.insert({key, dynamic_backend}); } else { diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 50c839017df2a..2db2eb9bec75e 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -11,12 +11,7 @@ #include "core/providers/shared_library/provider_api.h" #include "backend_utils.h" -#if defined(OV_API_20) using Exception = ov::Exception; -#else -using Exception = InferenceEngine::details::InferenceEngineException; -using WaitMode = InferenceEngine::IInferRequest::WaitMode; -#endif namespace onnxruntime { namespace openvino_ep { @@ -47,7 +42,6 @@ struct static_cast_int64 { std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, - const SubGraphContext& subgraph_context, std::map>& const_outputs_map) { if (IsCILogEnabled()) { std::cout << "CreateNgraphFunc" << std::endl; @@ -55,28 +49,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext const std::string model = model_proto.SerializeAsString(); try { auto cnn_network = global_context.ie_core.ReadModel(model, global_context.onnx_model_path_name); - if ((subgraph_context.precision == "FP16") && - (global_context.device_type.find("NPU") == std::string::npos)) { - // FP16 transformations - ov::pass::ConvertFP32ToFP16 pass_obj; - pass_obj.run_on_model(cnn_network); - cnn_network->validate_nodes_and_infer_types(); - - auto proc = ov::preprocess::PrePostProcessor(cnn_network); - for (size_t i = 0; i < cnn_network->inputs().size(); i++) { - if (cnn_network->inputs()[i].get_element_type() == ov::element::f16) { - proc.input(i).tensor().set_element_type(ov::element::f32); - proc.input(i).preprocess().convert_element_type(ov::element::f16); - } - } - - for (size_t i = 0; i < cnn_network->outputs().size(); i++) { - if (cnn_network->outputs()[i].get_element_type() == ov::element::f16) { - proc.output(i).postprocess().convert_element_type(ov::element::f32); - } - } - cnn_network = proc.build(); - } // Check for Constant Folding if (!global_context.is_wholly_supported_graph) { @@ -103,7 +75,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext #endif return cnn_network; } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } } @@ -127,7 +99,7 @@ GetOutputTensor(Ort::KernelContext& context, size_t batch_size, } auto it = output_names.find(output_name); if (it == output_names.end()) { - throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX"); + ORT_THROW(log_tag + "Output names mismatch between OpenVINO and ONNX"); } int index = it->second; return context.GetOutput(index, output_shape.get(), num_dims); @@ -145,7 +117,7 @@ GetOutputTensor(Ort::KernelContext& context, auto it = output_names.find(output_name); if (it == output_names.end()) { - throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX"); + ORT_THROW(log_tag + "Output names mismatch between OpenVINO and ONNX"); } int index = it->second; auto shape = node->get_shape(); @@ -204,7 +176,7 @@ void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedVal break; } default: - throw std::string(log_tag + "Unsupported output data type"); + ORT_THROW(log_tag + "Unsupported output data type"); } } @@ -232,7 +204,7 @@ void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx, auto tensor = context.GetInput(subgraph_context.input_names.at(input_name)); auto mem_info = tensor.GetTensorMemoryInfo(); if (mem_info.GetAllocatorName() == OpenVINO_GPU) { - throw std::string(log_tag + "IO Buffering is not enabled, Please enable Input on CPU"); + ORT_THROW(log_tag + "IO Buffering is not enabled, Please enable Input on CPU"); } // Copy input data into OpenVINO's input buffer const char* tensor_data = tensor.GetTensorData(); diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index 82b0351e87da5..811db8d3c0f6d 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -65,7 +65,6 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor, std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, - const SubGraphContext& subgraph_context, std::map>& const_outputs_map); void printPerformanceCounts(const std::vector& performanceMap, diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc index c586dd8b38af9..d37c854f84cc3 100644 --- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc +++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc @@ -24,11 +24,11 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto, try { concrete_backend_ = std::make_shared(model_proto, global_context, subgraph_context); } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } return concrete_backend_; } else { - throw std::string("[OpenVINO-EP] Backend factory error: Unknown backend type: " + type); + ORT_THROW("[OpenVINO-EP] Backend factory error: Unknown backend type: " + type); } } } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 0779940983aea..5a641c2a8c65d 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -79,20 +79,20 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, subgraph_context_.subgraph_name); LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } else { - ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); + ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.LoadNetwork( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } #endif } else { - ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_); + ie_cnn_network_ = CreateOVModel(model_proto, global_context_, const_outputs_map_); exe_network_ = global_context_.ie_core.LoadNetwork( ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name); LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network_, 1)); @@ -125,21 +125,17 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) { if (global_context_.device_type.find("NPU") != std::string::npos) { std::pair device_property; device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER"); + + const std::string env_npu_compiler_type = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_NPU_COMPILER_TYPE"); + if (!env_npu_compiler_type.empty()) { + device_property = std::make_pair("NPU_COMPILER_TYPE", env_npu_compiler_type); + } device_config.emplace(ov::device::properties("NPU", device_property)); } } void BasicBackend::EnableCaching() { if (!global_context_.cache_dir.empty()) { - if (global_context_.is_wholly_supported_graph) { -#if defined(OPENVINO_2022_3) -#if defined(_WIN32) || defined(WIN32) || defined(__CYGWIN__) || defined(__MINGW32__) || defined(__BORLANDC__) - _putenv_s("OV_GPU_CACHE_MODEL", "1"); -#else - setenv("OV_GPU_CACHE_MODEL", "1", 1); -#endif -#endif - } LOGS_DEFAULT(INFO) << log_tag << "Enables Caching"; global_context_.ie_core.SetCache(global_context_.cache_dir); } @@ -162,7 +158,7 @@ void BasicBackend::EnableStreams() { (global_context_.device_type.find("HETERO") != std::string::npos) || (global_context_.device_type.find("AUTO") != std::string::npos)) { if (global_context_.num_streams != 1) { - throw(log_tag + "Cannot set NUM_STREAMS to " + std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type); + ORT_THROW(log_tag + "Cannot set NUM_STREAMS to " + std::to_string(global_context_.num_streams) + " for device " + global_context_.device_type); } // Do nothing } else { @@ -198,9 +194,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque if (input_names.find(onnx_input_name) != input_names.end()) { input_name = onnx_input_name; } else { - throw(log_tag + - "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + - " doesn't exist in the list of OpenVINO input tensor names"); + ORT_THROW(log_tag + + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + + " doesn't exist in the list of OpenVINO input tensor names"); } size_t batch_slice_idx = 0; if (subgraph_context_.has_dynamic_input_shape && @@ -232,14 +228,14 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque try { infer_request->SetTensor(input_name, tensor_ptr); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } } else { OVTensorPtr graph_input_blob; try { graph_input_blob = infer_request->GetTensor(input_name); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } FillInputBlob(graph_input_blob, batch_slice_idx, input_name, context, subgraph_context_); } @@ -248,7 +244,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque // Start Async inference infer_request->StartAsync(); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } } @@ -274,10 +270,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe if (input_names.find(onnx_input_name) != input_names.end()) { input_name = onnx_input_name; } else { - throw(log_tag + - "Input names mismatch between OpenVINO and ONNX. " + - onnx_input_name + - " doesn't exist in the list of OpenVINO input tensor names"); + ORT_THROW(log_tag + + "Input names mismatch between OpenVINO and ONNX. " + + onnx_input_name + + " doesn't exist in the list of OpenVINO input tensor names"); } input_idx++; // Kernel Context Input Buffer @@ -322,7 +318,7 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe } } if (!output_name_found) { - throw std::string( + ORT_THROW( log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names"); @@ -344,7 +340,7 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe try { infer_request->SetTensor(output_name, tensor_ptr); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } } } @@ -352,7 +348,7 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe // Start Async inference infer_request->StartAsync(); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } } #endif @@ -382,17 +378,18 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe } } if (!output_name_found) { - throw(log_tag + - "Output names mismatch between OpenVINO and ONNX. " - "[ONNX Output: ] " + - onnx_output_name + - " doesn't exist in the " - "list of OpenVINO output tensor names"); + ORT_THROW( + log_tag + + "Output names mismatch between OpenVINO and ONNX. " + "[ONNX Output: ] " + + onnx_output_name + + " doesn't exist in the " + "list of OpenVINO output tensor names"); } try { graph_output_blob = infer_request->GetTensor(output_name); } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } size_t batch_size = 1; auto output_tensor = @@ -413,14 +410,14 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe auto output_tensor = GetOutputTensor(context, out_name, subgraph_context_.output_names, node); auto mem_info = output_tensor.GetTensorMemoryInfo(); if (mem_info.GetAllocatorName() == OpenVINO_GPU) { - throw(log_tag + "IO Buffering is not supported for constant subgraphs"); + ORT_THROW(log_tag + "IO Buffering is not supported for constant subgraphs"); } else { FillOutputsWithConstantData(node, output_tensor); } } } } catch (const char* msg) { - throw(msg); + ORT_THROW(msg); } } @@ -440,7 +437,7 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { auto output_tensor = GetOutputTensor(context, out_name, subgraph_context_.output_names, node); FillOutputsWithConstantData(node, output_tensor); } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } } // Get Output tensors @@ -461,26 +458,26 @@ void BasicBackend::Infer(OrtKernelContext* ctx) { try { StartRemoteAsyncInference(context, infer_request); } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } } else { try { StartAsyncInference(context, infer_request); } catch (std::string const& msg) { - throw msg; + ORT_THROW(msg); } } #else try { StartAsyncInference(context, infer_request); - } catch (std::string const& msg) { - throw msg; + } catch (const std::runtime_error& e) { + ORT_THROW(log_tag + " Exception at StartAsyncInference: " + e.what()); } #endif try { CompleteAsyncInference(context, infer_request); - } catch (std::string const& msg) { - throw msg; + } catch (const std::runtime_error& e) { + ORT_THROW(log_tag + " Exception at CompleteAsyncInference: " + e.what()); } // Get Output tensors diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 5f19c71683f24..ae13321010bab 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -31,6 +31,7 @@ struct GlobalContext { int onnx_opset_version; void* context = 0; bool use_api_2; + std::vector OpenVINO_Version = {}; // Ov Major and OV minor version from OV headers }; // Holds context specific to subgraph. @@ -44,7 +45,6 @@ struct SubGraphContext { std::vector input_indexes; std::unordered_map input_names; std::unordered_map output_names; - std::string precision; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index e3948cc94b348..b64c07214e64d 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -6,6 +6,7 @@ #include "contexts.h" #include "backend_manager.h" #include "ov_versions/capability.h" +#include "openvino/core/version.hpp" #define MEMCPY_S(dest, src, destsz, srcsz) memcpy(dest, src, std::min(destsz, srcsz)) @@ -25,6 +26,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv global_context_->enable_opencl_throttling = info.enable_opencl_throttling_; global_context_->disable_dynamic_shapes = info.disable_dynamic_shapes_; global_context_->num_of_threads = info.num_of_threads_; + global_context_->OpenVINO_Version = {OPENVINO_VERSION_MAJOR, OPENVINO_VERSION_MINOR}; // to check if target device is available // using ie_core capability GetAvailableDevices to fetch list of devices plugged in @@ -50,8 +52,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv device_found = true; break; } - if ((info.device_type_.find("NPU") != std::string::npos) && - (info.precision_ == "FP16" || info.precision_ == "U8")) { + if (info.device_type_.find("NPU") != std::string::npos) { device_found = true; break; } @@ -113,27 +114,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, global_context_->onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain); -#if defined(OPENVINO_2023_0) openvino_ep::GetCapability obj(graph_viewer, global_context_->device_type, - global_context_->precision_str, "V_2023_0"); + global_context_->precision_str); result = obj.Execute(); -#elif defined(OPENVINO_2023_1) - openvino_ep::GetCapability obj(graph_viewer, - global_context_->device_type, - global_context_->precision_str, "V_2023_1"); - result = obj.Execute(); -#elif defined(OPENVINO_2023_2) - openvino_ep::GetCapability obj(graph_viewer, - global_context_->device_type, - global_context_->precision_str, "V_2023_2"); - result = obj.Execute(); -#elif defined(OPENVINO_2023_3) - openvino_ep::GetCapability obj(graph_viewer, - global_context_->device_type, - global_context_->precision_str, "V_2023_3"); - result = obj.Execute(); -#endif global_context_->is_wholly_supported_graph = obj.IsWhollySupportedGraph(); diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index b0c92828d8a38..d115102b0a1a6 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -20,7 +20,7 @@ static void print_build_options() { << "you want to build" << std::endl; std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build " - << "are ['CPU','GPU']" + << "are ['CPU','GPU','NPU']" << std::endl; std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. " << "Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" @@ -48,7 +48,7 @@ static std::vector parseDevices(const std::string& device_string) { print_build_options(); ORT_THROW("Invalid device string: " + device_string); } - std::vector dev_options = {"CPU", "GPU"}; + std::vector dev_options = {"CPU", "GPU", "NPU"}; for (std::string dev : devices) { if (!std::count(dev_options.begin(), dev_options.end(), dev)) { print_build_options(); @@ -98,12 +98,9 @@ struct OpenVINOExecutionProviderInfo { #elif defined OPENVINO_CONFIG_GPU_FP16 device_type_ = "GPU"; precision_ = "FP16"; -#elif defined OPENVINO_CONFIG_NPU_FP16 +#elif defined OPENVINO_CONFIG_NPU device_type_ = "NPU"; - precision_ = "FP16"; -#elif defined OPENVINO_CONFIG_NPU_U8 - device_type_ = "NPU"; - precision_ = "U8"; + precision_ = ""; #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO #ifdef DEVICE_NAME #define DEVICE DEVICE_NAME @@ -142,12 +139,9 @@ struct OpenVINOExecutionProviderInfo { } else if (dev_type == "GPU.1_FP16") { device_type_ = "GPU.1"; precision_ = "FP16"; - } else if (dev_type == "NPU_FP16") { - device_type_ = "NPU"; - precision_ = "FP16"; - } else if (dev_type == "NPU_U8") { + } else if (dev_type == "NPU") { device_type_ = "NPU"; - precision_ = "U8"; + precision_ = ""; } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) { std::vector devices = parseDevices(dev_type); precision_ = "FP16"; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 068456777bece..83a27aee649c4 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -78,7 +78,6 @@ struct OpenVINO_Provider : Provider { // with this value at runtime. bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU // device (Reduces CPU Utilization when using GPU) - bool disable_dynamic_shapes = false; // [disable_dynamic_shapes]: Execute model with default static shape for optimal performance. void* context = nullptr; if (provider_options_map.find("device_type") != provider_options_map.end()) { @@ -86,7 +85,7 @@ struct OpenVINO_Provider : Provider { std::set ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16"}; + "GPU.0_FP16", "GPU.1_FP16", "NPU"}; if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) || (device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || @@ -94,7 +93,7 @@ struct OpenVINO_Provider : Provider { ORT_THROW( "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', " - "'GPU.0_FP16', 'GPU.1_FP16' or from" + "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from" " HETERO/MULTI/AUTO options available. \n"); } } @@ -147,12 +146,24 @@ struct OpenVINO_Provider : Provider { bool_flag = ""; } + // [disable_dynamic_shapes]: Rewrite dynamic shaped models to static shape at runtime and execute. + // Always true for NPU plugin. + bool disable_dynamic_shapes = false; + if (device_type.find("NPU") != std::string::npos) { + disable_dynamic_shapes = true; + } if (provider_options_map.find("disable_dynamic_shapes") != provider_options_map.end()) { bool_flag = provider_options_map.at("disable_dynamic_shapes"); if (bool_flag == "true" || bool_flag == "True") disable_dynamic_shapes = true; - else if (bool_flag == "false" || bool_flag == "False") - disable_dynamic_shapes = false; + else if (bool_flag == "false" || bool_flag == "False") { + if (device_type.find("NPU") != std::string::npos) { + disable_dynamic_shapes = true; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] The value for the key 'disable_dynamic_shapes' will be set to TRUE for NPU backend.\n "; + } else { + disable_dynamic_shapes = false; + } + } } return std::make_shared(const_cast(device_type.c_str()), enable_npu_fast_compile, diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index ea481791111fc..c594624d26169 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -8,12 +8,7 @@ #include "core/providers/shared_library/provider_api.h" #include "backend_utils.h" -#if defined(OV_API_20) using Exception = ov::Exception; -#else -using Exception = InferenceEngine::details::InferenceEngineException; -using WaitMode = InferenceEngine::IInferRequest::WaitMode; -#endif namespace onnxruntime { namespace openvino_ep { @@ -36,9 +31,9 @@ std::shared_ptr OVCore::ReadModel(const std::string& model, const std } return FE->convert(inputModel); } catch (const Exception& e) { - throw std::string(log_tag + "[OpenVINO-EP] Exception while Reading network: " + std::string(e.what())); + ORT_THROW(log_tag + "[OpenVINO-EP] Exception while Reading network: " + std::string(e.what())); } catch (...) { - throw std::string(log_tag + "[OpenVINO-EP] Unknown exception while Reading network"); + ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while Reading network"); } } @@ -81,9 +76,9 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& ie_cnn_network, OVExeNetwork exe(obj); return exe; } catch (const Exception& e) { - throw std::string(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); } catch (...) { - throw std::string(log_tag + " Exception while Loading Network for graph " + name); + ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); } } @@ -113,9 +108,9 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& model, OVRemoteCont auto obj = oe.compile_model(model, *context); return OVExeNetwork(obj); } catch (const Exception& e) { - throw std::string(log_tag + " Exception while Loading Network for graph: " + name + e.what()); + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + name + e.what()); } catch (...) { - throw std::string(log_tag + " Exception while Loading Network for graph " + name); + ORT_THROW(log_tag + " Exception while Loading Network for graph " + name); } } #endif @@ -135,9 +130,9 @@ OVInferRequest OVExeNetwork::CreateInferRequest() { OVInferRequest inf_obj(infReq); return inf_obj; } catch (const Exception& e) { - throw std::string(log_tag + "Exception while creating InferRequest object: " + e.what()); + ORT_THROW(log_tag + "Exception while creating InferRequest object: " + e.what()); } catch (...) { - throw std::string(log_tag + "Exception while creating InferRequest object."); + ORT_THROW(log_tag + "Exception while creating InferRequest object."); } } @@ -147,9 +142,9 @@ OVTensorPtr OVInferRequest::GetTensor(const std::string& input_name) { OVTensorPtr blob = std::make_shared(tobj); return blob; } catch (const Exception& e) { - throw std::string(log_tag + " Cannot access IE Blob for input: " + input_name + e.what()); + ORT_THROW(log_tag + " Cannot access IE Blob for input: " + input_name + e.what()); } catch (...) { - throw std::string(log_tag + " Cannot access IE Blob for input: " + input_name); + ORT_THROW(log_tag + " Cannot access IE Blob for input: " + input_name); } } @@ -157,9 +152,9 @@ void OVInferRequest::SetTensor(const std::string& name, OVTensorPtr& blob) { try { ovInfReq.set_tensor(name, *(blob.get())); } catch (const Exception& e) { - throw std::string(log_tag + " Cannot set Remote Blob for output: " + name + e.what()); + ORT_THROW(log_tag + " Cannot set Remote Blob for output: " + name + e.what()); } catch (...) { - throw std::string(log_tag + " Cannot set Remote Blob for output: " + name); + ORT_THROW(log_tag + " Cannot set Remote Blob for output: " + name); } } @@ -167,9 +162,9 @@ void OVInferRequest::StartAsync() { try { ovInfReq.start_async(); } catch (const Exception& e) { - throw std::string(log_tag + " Couldn't start Inference: " + e.what()); + ORT_THROW(log_tag + " Couldn't start Inference: " + e.what()); } catch (...) { - throw std::string(log_tag + " In Error Couldn't start Inference"); + ORT_THROW(log_tag + " In Error Couldn't start Inference"); } } @@ -177,9 +172,9 @@ void OVInferRequest::Infer() { try { ovInfReq.infer(); } catch (const Exception& e) { - throw std::string(log_tag + " Couldn't start Inference: " + e.what()); + ORT_THROW(log_tag + " Couldn't start Inference: " + e.what()); } catch (...) { - throw std::string(log_tag + " In Error Couldn't start Inference"); + ORT_THROW(log_tag + " In Error Couldn't start Inference"); } } @@ -187,9 +182,9 @@ void OVInferRequest::WaitRequest() { try { ovInfReq.wait(); } catch (const Exception& e) { - throw std::string(log_tag + " Wait Model Failed: " + e.what()); + ORT_THROW(log_tag + " Wait Model Failed: " + e.what()); } catch (...) { - throw std::string(log_tag + " Wait Mode Failed"); + ORT_THROW(log_tag + " Wait Mode Failed"); } } diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index cf4d867d4df55..f98c11d794091 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -6,14 +6,11 @@ #include #include -#define OV_API_20 #include "openvino/openvino.hpp" #include "openvino/pass/convert_fp32_to_fp16.hpp" #include "openvino/frontend/manager.hpp" #ifdef IO_BUFFER_ENABLED -#include -#include #include #endif diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 11c8a1629b073..7f68a39b62e53 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -6,6 +6,7 @@ #include "../backend_manager.h" #include "capability.h" #include "utils.h" +#include "openvino/core/version.hpp" #if defined(_MSC_VER) #pragma warning(disable : 4244 4245 5208) @@ -25,20 +26,22 @@ namespace openvino_ep { // Constructor GetCapability::GetCapability(const GraphViewer& graph_viewer_param, const std::string device_type_param, - const std::string device_precision, - const std::string version_param) + const std::string device_precision) : graph_viewer_(graph_viewer_param), device_type_(device_type_param), device_precision_(device_precision) { - if (version_param == "V_2023_0") { - data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_, device_precision_); - } else if (version_param == "V_2023_1") { - data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, device_precision_); - } else if (version_param == "V_2023_2") { - data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, device_precision_); - } else if (version_param == "V_2023_3") { - data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); - } else { - data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); + if (device_type_.find("NPU") != std::string::npos) { + device_type_ = "CPU_FP32"; } +#if OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 1 + data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_, device_precision_); +#elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 2 + data_ops_ = new DataOps(graph_viewer_, V_2023_2, device_type_, device_precision_); +#elif OPENVINO_VERSION_MAJOR == 2023 && OPENVINO_VERSION_MINOR == 3 + data_ops_ = new DataOps(graph_viewer_, V_2023_3, device_type_, device_precision_); +#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 0 + data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_); +#else + data_ops_ = new DataOps(graph_viewer_, V_2024_0, device_type_, device_precision_); +#endif } std::vector> GetCapability::Execute() { diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.h b/onnxruntime/core/providers/openvino/ov_versions/capability.h index 2040634cc45d9..c44cc8546f498 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.h +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.h @@ -21,8 +21,7 @@ class GetCapability { public: GetCapability(const GraphViewer& graph_viewer_param, const std::string device_type_param, - const std::string precision, - const std::string version_param); + const std::string precision); virtual std::vector> Execute(); bool IsWhollySupportedGraph() { return is_wholly_supported_graph_; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index e829bf377b195..d0ac608e8c922 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -14,6 +14,7 @@ #include "data_ops.h" #include "capability.h" #include "utils.h" +#include "../ov_interface.h" #if defined(_MSC_VER) #pragma warning(disable : 4244 4245 5208) @@ -36,6 +37,7 @@ namespace openvino_ep { std::set ops_supported_only_in_model = { "Add", "Cast", + "Celu", "Concat", "ConstantOfShape", "DequantizeLinear", @@ -46,6 +48,7 @@ std::set ops_supported_only_in_model = { "EyeLike", "GatherElements", "GatherND", + "GridSample", "Identity", "LayerNormalization", "Loop", @@ -72,293 +75,171 @@ std::set ops_supported_only_in_model = { std::set ops_supported_as_function = { "LessOrEqual", "GreaterOrEqual", - "LayerNormalization"}; + "LayerNormalization", + "Celu"}; std::vector supported_op_mode = { {"Abs", V_2020_4, {"CPU", "GPU"}}, - {"Abs", V_2023_0, {"NPU"}}, {"Acos", V_2020_4, {"CPU"}}, {"Acos", V_2022_1, {"GPU"}}, - {"Acos", V_2023_1, {"NPU"}}, {"Acosh", V_2020_4, {"CPU"}}, {"Acosh", V_2022_1, {"GPU"}}, - {"Acosh", V_2023_1, {"NPU"}}, {"Add", V_2020_4, {"CPU", "GPU"}}, - {"Add", V_2023_0, {"NPU"}}, {"And", V_2020_4, {"CPU", "GPU"}}, - {"And", V_2023_1, {"NPU"}}, {"ArgMax", V_2020_4, {"CPU"}}, {"ArgMax", V_2021_1, {"GPU"}}, {"ArgMin", V_2020_4, {"CPU"}}, {"ArgMin", V_2022_1, {"GPU"}}, {"Asin", V_2020_4, {"CPU", "GPU"}}, - {"Asin", V_2023_1, {"NPU"}}, {"Asinh", V_2020_4, {"CPU", "GPU"}}, - {"Asinh", V_2023_1, {"NPU"}}, {"Atan", V_2020_4, {"CPU", "GPU"}}, - {"Atan", V_2023_1, {"NPU"}}, {"Atanh", V_2020_4, {"CPU"}}, {"Atanh", V_2022_1, {"GPU"}}, - {"Atanh", V_2023_1, {"NPU"}}, {"AveragePool", V_2020_4, {"CPU", "GPU"}}, - {"AveragePool", V_2023_0, {"NPU"}}, {"BatchNormalization", V_2020_4, {"CPU", "GPU"}}, - {"BatchNormalization", V_2023_0, {"NPU"}}, {"BitShift", V_2022_1, {"CPU"}}, - {"BitShift", V_2023_1, {"NPU"}}, {"Cast", V_2020_4, {"CPU", "GPU"}}, - {"Cast", V_2023_0, {"NPU"}}, - {"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}}, + {"CastLike", V_2023_1, {"CPU", "GPU"}}, {"Ceil", V_2020_4, {"GPU"}}, {"Ceil", V_2021_4, {"CPU"}}, - {"Ceil", V_2023_1, {"NPU"}}, {"Celu", V_2022_1, {"CPU", "GPU"}}, {"Clip", V_2020_4, {"CPU", "GPU"}}, - {"Clip", V_2023_0, {"NPU"}}, {"Compress", V_2023_1, {"CPU", "GPU"}}, {"Concat", V_2020_4, {"CPU", "GPU"}}, - {"Concat", V_2023_0, {"NPU"}}, {"Constant", V_2020_4, {"CPU", "GPU"}}, - {"Constant", V_2023_0, {"NPU"}}, {"ConstantOfShape", V_2020_4, {"CPU", "GPU"}}, - {"ConstantOfShape", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op in the plugin. {"Conv", V_2020_4, {"CPU", "GPU"}}, - {"Conv", V_2023_0, {"NPU"}}, {"ConvInteger", V_2022_1, {"CPU", "GPU"}}, - {"ConvInteger", V_2023_1, {"NPU"}}, {"ConvTranspose", V_2020_4, {"CPU", "GPU"}}, - {"ConvTranspose", V_2023_1, {"NPU"}}, {"Cos", V_2020_4, {"CPU"}}, {"Cos", V_2022_1, {"GPU"}}, - {"Cos", V_2023_0, {"NPU"}}, {"Cosh", V_2020_4, {"CPU"}}, {"Cosh", V_2022_1, {"GPU"}}, - {"Cosh", V_2023_1, {"NPU"}}, {"CumSum", V_2022_1, {"CPU", "GPU"}}, - {"CumSum", V_2023_0, {"NPU"}}, {"DepthToSpace", V_2020_4, {"CPU", "GPU"}}, - {"DepthToSpace", V_2023_0, {"NPU"}}, {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}}, - {"DequantizeLinear", V_2023_0, {"NPU"}}, {"Div", V_2020_4, {"CPU", "GPU"}}, - {"Div", V_2023_0, {"NPU"}}, {"Dropout", V_2020_4, {"CPU", "GPU"}}, - {"Dropout", V_2023_0, {"NPU"}}, {"Elu", V_2020_4, {"CPU", "GPU"}}, - {"Elu", V_2023_0, {"NPU"}}, {"Einsum", V_2023_1, {"CPU", "GPU"}}, {"Equal", V_2020_4, {"CPU", "GPU"}}, - {"Equal", V_2023_0, {"NPU"}}, // Added for whisper decoder model. {"Erf", V_2020_4, {"CPU", "GPU"}}, - {"Erf", V_2023_0, {"NPU"}}, {"Exp", V_2020_4, {"CPU", "GPU"}}, - {"Exp", V_2023_0, {"NPU"}}, {"Expand", V_2022_1, {"CPU", "GPU"}}, - {"Expand", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op and multiply op in the plugin. {"EyeLike", V_2022_1, {"CPU"}}, - {"EyeLike", V_2023_0, {"NPU"}}, // NoOP {"Flatten", V_2020_4, {"CPU", "GPU"}}, - {"Flatten", V_2023_0, {"NPU"}}, {"Floor", V_2020_4, {"CPU", "GPU"}}, - {"Floor", V_2023_1, {"NPU"}}, {"Gather", V_2020_4, {"CPU", "GPU"}}, - {"Gather", V_2023_0, {"NPU"}}, {"GatherElements", V_2022_2, {"CPU", "GPU"}}, - {"GatherElements", V_2023_1, {"NPU"}}, {"GatherND", V_2021_4, {"CPU", "GPU"}}, - {"GatherND", V_2023_1, {"NPU"}}, + {"Gelu", V_2023_1, {"CPU", "GPU"}}, {"Gemm", V_2020_4, {"CPU", "GPU"}}, - {"Gemm", V_2023_0, {"NPU"}}, {"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}}, - {"GlobalAveragePool", V_2023_0, {"NPU"}}, {"GlobalLpPool", V_2020_4, {"CPU", "GPU"}}, - {"GlobalLpPool", V_2023_1, {"NPU"}}, {"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}}, - {"GlobalMaxPool", V_2023_1, {"NPU"}}, {"Greater", V_2020_4, {"CPU", "GPU"}}, - {"Greater", V_2023_0, {"NPU"}}, {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, - {"GreaterOrEqual", V_2023_0, {"NPU"}}, {"GridSample", V_2022_3, {"CPU"}}, {"GridSample", V_2023_0, {"GPU"}}, - {"GridSample", V_2023_1, {"NPU"}}, - {"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}}, + {"HardMax", V_2023_1, {"CPU", "GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}}, - {"Identity", V_2023_0, {"NPU"}}, // NoOP {"If", V_2022_3, {"CPU", "GPU"}}, - {"If", V_2023_1, {"NPU"}}, {"ImageScaler", V_2022_1, {"CPU", "GPU"}}, - {"ImageScaler", V_2023_0, {"NPU"}}, {"InstanceNormalization", V_2020_4, {"CPU", "GPU"}}, - {"InstanceNormalization", V_2023_0, {"NPU"}}, {"HardSigmoid", V_2020_4, {"CPU", "GPU"}}, - {"HardSigmoid", V_2023_1, {"NPU"}}, {"HardMax", V_2022_1, {"CPU", "GPU"}}, + {"LayerNormalization", V_2023_0, {"CPU", "GPU"}}, {"LeakyRelu", V_2020_4, {"CPU", "GPU"}}, - {"LeakyRelu", V_2023_0, {"NPU"}}, {"Less", V_2020_4, {"CPU", "GPU"}}, - {"Less", V_2023_0, {"NPU"}}, // Added for whisper decoder model. {"LessOrEqual", V_2022_1, {"CPU", "GPU"}}, - {"LessOrEqual", V_2023_0, {"NPU"}}, {"Log", V_2020_4, {"CPU", "GPU"}}, - {"Log", V_2023_0, {"NPU"}}, {"LogSoftMax", V_2022_1, {"CPU", "GPU"}}, {"Loop", V_2021_4, {"CPU", "GPU"}}, - {"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}}, - {"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}}, + {"LpNormalization", V_2023_1, {"CPU", "GPU"}}, {"LRN", V_2020_4, {"CPU", "GPU"}}, - {"LRN", V_2023_0, {"NPU"}}, {"LSTM", V_2020_4, {"CPU", "GPU"}}, - {"LSTM", V_2023_1, {"NPU"}}, {"MatMul", V_2020_4, {"CPU", "GPU"}}, - {"MatMul", V_2023_0, {"NPU"}}, {"MatMulInteger", V_2022_1, {"CPU"}}, - {"MatMulInteger", V_2023_1, {"NPU"}}, {"Max", V_2020_4, {"CPU", "GPU"}}, - {"Max", V_2023_0, {"NPU"}}, {"MaxPool", V_2020_4, {"CPU", "GPU"}}, - {"MaxPool", V_2023_0, {"NPU"}}, {"Mean", V_2020_4, {"CPU", "GPU"}}, - {"Mean", V_2023_0, {"NPU"}}, {"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}}, - {"MeanVarianceNormalization", V_2023_1, {"NPU"}}, {"Min", V_2020_4, {"CPU", "GPU"}}, - {"Min", V_2023_0, {"NPU"}}, {"Mod", V_2022_1, {"CPU", "GPU"}}, {"Mul", V_2020_4, {"CPU", "GPU"}}, - {"Mul", V_2023_0, {"NPU"}}, {"Neg", V_2020_4, {"CPU", "GPU"}}, - {"Neg", V_2023_0, {"NPU"}}, {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}}, - {"NonMaxSuppression", V_2023_1, {"NPU"}}, {"NonZero", V_2021_1, {"CPU"}}, {"NonZero", V_2023_0, {"GPU"}}, {"Not", V_2021_1, {"CPU", "GPU"}}, {"Not", V_2020_4, {"CPU", "GPU"}}, - {"Not", V_2023_1, {"NPU"}}, {"OneHot", V_2020_4, {"CPU", "GPU"}}, - {"OneHot", V_2023_1, {"NPU"}}, {"Or", V_2022_1, {"CPU", "GPU"}}, - {"Or", V_2023_1, {"NPU"}}, {"Pad", V_2020_4, {"CPU", "GPU"}}, - {"Pad", V_2023_0, {"NPU"}}, {"Pow", V_2020_4, {"CPU", "GPU"}}, - {"Pow", V_2023_0, {"NPU"}}, {"PRelu", V_2020_4, {"CPU", "GPU"}}, - {"PRelu", V_2023_0, {"NPU"}}, {"QLinearMatMul", V_2022_3, {"CPU"}}, - // {"QLinearMatMul", V_2023_1, {"NPU"}}, {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}}, - {"QuantizeLinear", V_2023_0, {"NPU"}}, {"RNN", V_2023_1, {"CPU", "GPU"}}, {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}}, {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}}, - {"RandomNormalLike", V_2023_1, {"NPU"}}, {"RandomNormal", V_2023_0, {"CPU", "GPU"}}, - {"RandomNormal", V_2023_1, {"NPU"}}, {"Range", V_2022_1, {"CPU", "GPU"}}, - {"Range", V_2023_0, {"NPU"}}, {"Reciprocal", V_2020_4, {"CPU", "GPU"}}, - {"Reciprocal", V_2023_0, {"NPU"}}, {"ReduceL1", V_2022_1, {"CPU", "GPU"}}, - {"ReduceL1", V_2023_1, {"NPU"}}, {"ReduceL2", V_2022_1, {"CPU", "GPU"}}, - {"ReduceL2", V_2023_1, {"NPU"}}, {"ReduceLogSum", V_2020_4, {"CPU"}}, {"ReduceLogSum", V_2022_1, {"CPU", "GPU"}}, - {"ReduceLogSum", V_2023_1, {"NPU"}}, {"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}}, - {"ReduceLogSumExp", V_2023_1, {"NPU"}}, {"ReduceMax", V_2020_4, {"CPU", "GPU"}}, - {"ReduceMax", V_2023_1, {"NPU"}}, {"ReduceMean", V_2020_4, {"CPU", "GPU"}}, - {"ReduceMean", V_2023_0, {"NPU"}}, {"ReduceMin", V_2020_4, {"CPU", "GPU"}}, - {"ReduceMin", V_2023_1, {"NPU"}}, {"ReduceProd", V_2020_4, {"CPU"}}, {"ReduceProd", V_2022_1, {"GPU"}}, - {"ReduceProd", V_2023_1, {"NPU"}}, {"ReduceSum", V_2020_4, {"CPU", "GPU"}}, - // {"ReduceSum", V_2023_1, {"NPU"}}, {"ReduceSumSquare", V_2020_4, {"CPU"}}, {"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}}, - {"ReduceSumSquare", V_2023_1, {"NPU"}}, {"Relu", V_2020_4, {"CPU", "GPU"}}, - {"Relu", V_2023_0, {"NPU"}}, {"Resize", V_2020_4, {"CPU"}}, {"Resize", V_2022_1, {"GPU"}}, - {"Resize", V_2023_1, {"NPU"}}, {"Reshape", V_2020_4, {"CPU", "GPU"}}, - {"Reshape", V_2023_0, {"NPU"}}, {"ReverseSequence", V_2022_1, {"CPU", "GPU"}}, {"RoiAlign", V_2021_1, {"CPU", "GPU"}}, - {"RoiAlign", V_2023_1, {"NPU"}}, {"Round", V_2021_4, {"CPU", "GPU"}}, - {"Round", V_2023_1, {"NPU"}}, {"Scatter", V_2022_1, {"CPU", "GPU"}}, - {"Scatter", V_2023_1, {"NPU"}}, {"ScatterElements", V_2022_1, {"CPU", "GPU"}}, - {"ScatterElements", V_2023_1, {"NPU"}}, {"ScatterND", V_2022_1, {"CPU", "GPU"}}, - {"ScatterND", V_2023_1, {"NPU"}}, {"Selu", V_2020_4, {"CPU", "GPU"}}, - {"Selu", V_2023_1, {"NPU"}}, {"Shape", V_2020_4, {"CPU", "GPU"}}, - {"Shape", V_2023_0, {"NPU"}}, {"Shrink", V_2022_1, {"CPU", "GPU"}}, - {"Shrink", V_2023_0, {"NPU"}}, {"Sigmoid", V_2020_4, {"CPU", "GPU"}}, - {"Sigmoid", V_2023_0, {"NPU"}}, {"Sign", V_2020_4, {"CPU"}}, {"Sign", V_2022_1, {"GPU"}}, - {"Sign", V_2023_0, {"NPU"}}, {"Sin", V_2022_1, {"CPU", "GPU"}}, - {"Sin", V_2023_0, {"NPU"}}, {"Sinh", V_2020_4, {"CPU"}}, - {"Sinh", V_2023_1, {"NPU"}}, {"Size", V_2022_1, {"CPU", "GPU"}}, - {"Size", V_2023_1, {"NPU"}}, {"Slice", V_2020_4, {"CPU", "GPU"}}, - {"Slice", V_2023_0, {"NPU"}}, {"Softmax", V_2020_4, {"CPU", "GPU"}}, - {"Softmax", V_2023_0, {"NPU"}}, {"Softplus", V_2022_1, {"CPU", "GPU"}}, - {"Softplus", V_2023_0, {"NPU"}}, {"Softsign", V_2022_1, {"CPU", "GPU"}}, {"SpaceToDepth", V_2020_4, {"CPU", "GPU"}}, - {"SpaceToDepth", V_2023_0, {"NPU"}}, {"Split", V_2020_4, {"CPU", "GPU"}}, - {"Split", V_2023_0, {"NPU"}}, {"Sqrt", V_2020_4, {"CPU", "GPU"}}, - {"Sqrt", V_2023_0, {"NPU"}}, {"Squeeze", V_2020_4, {"CPU", "GPU"}}, - {"Squeeze", V_2023_0, {"NPU"}}, {"Softsign", V_2020_4, {"CPU"}}, {"Sub", V_2020_4, {"CPU", "GPU"}}, - {"Sub", V_2023_0, {"NPU"}}, {"Sum", V_2020_4, {"CPU", "GPU"}}, - {"Sum", V_2023_0, {"NPU"}}, {"Tan", V_2020_4, {"CPU", "GPU"}}, - {"Tan", V_2023_1, {"NPU"}}, {"Tanh", V_2020_4, {"CPU", "GPU"}}, - {"Tanh", V_2023_0, {"NPU"}}, {"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}}, - {"ThresholdedRelu", V_2023_0, {"NPU"}}, {"Tile", V_2021_3, {"CPU", "GPU"}}, - {"Tile", V_2023_0, {"NPU"}}, {"Transpose", V_2020_4, {"CPU", "GPU"}}, - {"Transpose", V_2023_0, {"NPU"}}, {"Trilu", V_2023_0, {"CPU", "GPU"}}, - {"Trilu", V_2023_1, {"NPU"}}, {"TopK", V_2020_4, {"CPU", "GPU"}}, - {"TopK", V_2023_0, {"NPU"}}, {"Upsample", V_2020_4, {"CPU", "GPU"}}, {"Unsqueeze", V_2020_4, {"CPU", "GPU"}}, - {"Unsqueeze", V_2023_0, {"NPU"}}, {"Where", V_2022_1, {"CPU", "GPU"}}, - {"Where", V_2023_0, {"NPU"}}, // Added for whisper decoder model. {"Xor", V_2022_1, {"CPU", "GPU"}}, - {"Xor", V_2023_1, {"NPU"}}, }; void DataOps::populate_types_supported() { @@ -370,6 +251,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)); supported_types_initializer_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64)); + supported_types_initializer_.insert( + std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16)); supported_types_initializer_.insert( std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)); supported_types_initializer_.insert( @@ -387,6 +270,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8)); supported_types_npu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16)); + supported_types_npu_.insert( + std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16)); supported_types_npu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)); supported_types_npu_.insert( @@ -402,6 +287,8 @@ void DataOps::populate_types_supported() { std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)); supported_types_cpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16)); + supported_types_cpu_.insert( + std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16)); supported_types_cpu_.insert( std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8)); supported_types_cpu_.insert( @@ -437,13 +324,12 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"DequantizeLinear", V_2021_4, {"All"}}); no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}}); no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}}); + no_dimension_supported_.push_back({"Expand", V_2023_3, {"CPU"}}); no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}}); - no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}}); no_dimension_supported_.push_back({"Identity", V_2023_0, {"All"}}); no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}}); no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}}); - no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}}); no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Neg", V_2023_0, {"CPU", "GPU"}}); @@ -476,9 +362,8 @@ void DataOps::populate_op_mode_supported() { { UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3}, [this](const Node* node, const InitializedTensorSet&) { - // Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU - if ((device_id_.find("GPU") != std::string::npos) || - (device_id_.find("NPU") != std::string::npos)) { + // Abs is not supproted with INT8 or INT32 as input data type on GPU + if ((device_id_.find("GPU") != std::string::npos)) { for (size_t i = 0; i < node->InputDefs().size(); i++) { if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 || @@ -706,7 +591,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"PRelu", obj}); } { - UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, [this](const Node* node, const InitializedTensorSet&) { const auto& input_arg = node->InputDefs()[1]; auto shape = input_arg->Shape(); @@ -821,7 +706,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Squeeze", obj}); } { - UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, [this](const Node* node, const InitializedTensorSet&) { // If the operator is unsqueeze // If axes is an input, then we cannot produce a static graph. @@ -836,7 +721,7 @@ void DataOps::populate_op_mode_supported() { op_list_.insert({"Unsqueeze", obj}); } { - UnsupportedOpMode obj = {{V_2023_0, V_2023_1, V_2023_2, V_2023_3}, + UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0}, [this](const Node* node, const InitializedTensorSet&) { // check for attributes auto& upsample_attr = node->GetAttributes(); @@ -961,7 +846,7 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) { } else { auto dtype = type_proto->tensor_type().elem_type(); - if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos || + if (device_id_.find("HETERO") != std::string::npos || device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) { for (auto const& var : supported_types_npu_) { if ((var.first <= version_id_) && @@ -1063,8 +948,7 @@ bool DataOps::dimension_unsupported(const Node* node) { return true; } -bool DataOps::node_is_supported(const std::map>& op_map, - const NodeIndex node_idx) { +bool DataOps::node_is_supported(const NodeIndex node_idx) { const auto& node = graph_viewer_.GetNode(node_idx); const auto& optype = node->OpType(); @@ -1174,37 +1058,14 @@ bool DataOps::node_is_supported(const std::mapOpType()); - if (opset == op_map.end()) { -#ifndef NDEBUG - if (openvino_ep::backend_utils::IsDebugEnabled()) { - std::cout << "Failed in Unsupported onnx model domain" << std::endl; - } -#endif - return false; - } - if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) { -#ifndef NDEBUG - if (openvino_ep::backend_utils::IsDebugEnabled()) { - std::cout << "The operator is not available in OpenVINO ngraph operators list" - << "nor the operator is a special ONNX function" - << std::endl; - } -#endif - return false; - } return true; } std::vector DataOps::GetUnsupportedNodeIndices(std::unordered_set& ng_required_initializers) { - const auto ng_supported_ops = GetNgSupportedOps(GetOnnxOpSet(graph_viewer_)); - std::vector unsupported_nodes_idx; for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) { - if (node_is_supported(ng_supported_ops, node_idx)) { + if (node_is_supported(node_idx)) { // Collect inputs that are initializers graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) { diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index 87688601ad692..4a498170f4782 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -26,7 +26,8 @@ enum versionNum { V_2023_0, V_2023_1, V_2023_2, - V_2023_3 + V_2023_3, + V_2024_0 }; using VersionNum = enum versionNum; @@ -67,9 +68,7 @@ class DataOps { bool dimension_unsupported(const Node* node); bool unsupported_op_mode(const Node* node); bool type_is_supported(const NodeArg* node_arg, bool is_initializer); - bool node_is_supported(const std::map>& op_map, - const NodeIndex node_idx); + bool node_is_supported(const NodeIndex node_idx); public: DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, const std::string dev_id, const std::string device_precision) diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.cc b/onnxruntime/core/providers/openvino/ov_versions/utils.cc index ee0bfddb7dc83..d83c726fadc90 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc @@ -11,14 +11,6 @@ #pragma GCC diagnostic ignored "-Wunused-parameter" #endif -#include "openvino/core/deprecated.hpp" -#define IN_OV_COMPONENT -#define NGRAPH_LEGACY_HEADER_INCLUDED -#include - -#undef NGRAPH_LEGACY_HEADER_INCLUDED -#undef IN_OV_COMPONENT - #if defined(_MSC_VER) #pragma warning(default : 4244 4245) #elif __GNUC__ @@ -95,20 +87,6 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer) { return dm_to_ver.at(kOnnxDomain); } -std::map> GetNgSupportedOps(const int onnx_opset) { - std::map> ng_supported_ops; - OPENVINO_SUPPRESS_DEPRECATED_START - ng_supported_ops.emplace(kOnnxDomain, ngraph::onnx_import::get_supported_operators(onnx_opset, kOnnxDomain)); - - const std::set ng_disabled_ops = {"LSTM"}; // Place-holder for ops not supported. - - for (const auto& disabled_op : ng_disabled_ops) { - ng_supported_ops.at(kOnnxDomain).erase(disabled_op); - } - OPENVINO_SUPPRESS_DEPRECATED_END - return ng_supported_ops; -} - /** * Returns a vector clusters(or node_idx). For each unsupported node, the graph is split into 3 parts. * supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 6827f2c9dfd91..22314610dbee9 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -60,11 +60,8 @@ struct OrtStatus { #elif OPENVINO_CONFIG_GPU_FP16 #define BACKEND_OPENVINO "-OPENVINO_GPU_FP16" -#elif OPENVINO_CONFIG_NPU_FP16 -#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16" - -#elif OPENVINO_CONFIG_NPU_U8 -#define BACKEND_OPENVINO "-OPENVINO_NPU_U8" +#elif OPENVINO_CONFIG_NPU +#define BACKEND_OPENVINO "-OPENVINO_NPU" #elif OPENVINO_CONFIG_MULTI #define BACKEND_OPENVINO "-OPENVINO_MULTI" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 71d260a18ce7b..1164b939153cd 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -247,7 +247,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device if (key == "device_type") { std::set ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16", - "GPU.0_FP16", "GPU.1_FP16"}; + "GPU.0_FP16", "GPU.1_FP16", "NPU"}; if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { ov_options[key] = value; } else if (value.find("HETERO:") == 0) { @@ -260,7 +260,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ORT_THROW( "[ERROR] [OpenVINO] You have selected a wrong configuration value for the key 'device_type'. " "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', " - "'GPU.0_FP16', 'GPU.1_FP16' or from" + "'GPU.0_FP16', 'GPU.1_FP16', 'NPU' or from" " HETERO/MULTI/AUTO options available. \n"); } } else if (key == "device_id") { diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index c48b07422d452..e441230537410 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -140,8 +140,8 @@ def create_backend_test(test_name=None): if backend.supports_device("OPENVINO_CPU_FP16"): current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16") - if backend.supports_device("OPENVINO_NPU_FP16"): - current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16") + if backend.supports_device("OPENVINO_NPU"): + current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU") if backend.supports_device("OPENVINO"): current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18") diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index ca089c42032b1..cb85caa3c9089 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -495,7 +495,7 @@ "test_scan9_sum_cpu", // Disabled due to output mismatch with tolerance. "test_reduce_max_bool_inputs_cpu" ], - "current_failing_tests_OPENVINO_NPU_FP16": [ + "current_failing_tests_OPENVINO_NPU": [ "^test_prelu_broadcast", "test_loop11_cpu" ], diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 1056c4ed84510..99ea53c2222a1 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -75,13 +75,14 @@ def _str_to_bool(s): def _openvino_verify_device_type(device_read): - choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"] + choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "NPU"] choices1 = [ "CPU_FP32_NO_PARTITION", "CPU_FP16_NO_PARTITION", "GPU_FP32_NO_PARTITION", "GPU_FP16_NO_PARTITION", + "NPU_NO_PARTITION", ] status_hetero = True res = False @@ -96,7 +97,7 @@ def _openvino_verify_device_type(device_read): if len(comma_separated_devices) < 2: print("At least two devices required in Hetero/Multi/Auto Mode") status_hetero = False - dev_options = ["CPU", "GPU"] + dev_options = ["CPU", "GPU", "NPU"] for dev in comma_separated_devices: if dev not in dev_options: status_hetero = False @@ -107,7 +108,7 @@ def invalid_hetero_build(): print("specify the keyword HETERO or MULTI or AUTO followed by the devices ") print("in the order of priority you want to build\n") print("The different hardware devices that can be added in HETERO or MULTI or AUTO") - print("are ['CPU','GPU'] \n") + print("are ['CPU','GPU','NPU'] \n") print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n") print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n") print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n") @@ -1222,6 +1223,7 @@ def generate_build_tree( "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"), "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"), "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"), + "-Donnxruntime_USE_OPENVINO_NPU=" + ("ON" if args.use_openvino == "NPU" else "OFF"), "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP=" + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"), "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP=" @@ -1230,6 +1232,8 @@ def generate_build_tree( + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"), "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP=" + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"), + "-Donnxruntime_USE_OPENVINO_NPU_NP=" + + ("ON" if args.use_openvino == "NPU_NO_PARTITION" else "OFF"), "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"), "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino), "-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"), diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino index a0ba5ea232ca3..57c630027e75b 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_openvino @@ -14,8 +14,7 @@ RUN apt update && apt install -y libnuma1 ocl-icd-libopencl1 && \ ENV INTEL_OPENVINO_DIR /opt/intel/openvino_${OPENVINO_VERSION} ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$INTEL_OPENVINO_DIR/runtime/3rdparty/tbb/lib:/usr/local/openblas/lib:$LD_LIBRARY_PATH -ENV InferenceEngine_DIR $INTEL_OPENVINO_DIR/runtime/cmake -ENV ngraph_DIR $INTEL_OPENVINO_DIR/runtime/cmake +ENV OpenVINO_DIR $INTEL_OPENVINO_DIR/runtime/cmake ENV IE_PLUGINS_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64 ENV DEBIAN_FRONTEND=noninteractive diff --git a/tools/ci_build/github/linux/docker/Dockerfile_manylinux2014_openvino_multipython b/tools/ci_build/github/linux/docker/Dockerfile_manylinux2014_openvino_multipython index bc0b412773286..c20366515ea94 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile_manylinux2014_openvino_multipython +++ b/tools/ci_build/github/linux/docker/Dockerfile_manylinux2014_openvino_multipython @@ -48,8 +48,7 @@ RUN cd $WORKDIR && cd openvino && mkdir build && cd build && \ ENV INTEL_OPENVINO_DIR /home/onnxruntimedev/openvino_$OV_VERSION ENV LD_LIBRARY_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64:$INTEL_OPENVINO_DIR/runtime/3rdparty/tbb/lib:/usr/local/openblas/lib:$LD_LIBRARY_PATH ENV TBB_LIBS $INTEL_OPENVINO_DIR/runtime/3rdparty/tbb/lib -ENV InferenceEngine_DIR $INTEL_OPENVINO_DIR/runtime/cmake -ENV ngraph_DIR $INTEL_OPENVINO_DIR/runtime/cmake +ENV OpenVINO_DIR $INTEL_OPENVINO_DIR/runtime/cmake ENV IE_PLUGINS_PATH $INTEL_OPENVINO_DIR/runtime/lib/intel64 ENV OPENVINO_MANYLINUX 1 diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 09fe99d36cc34..4462a2a8530c7 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -750,32 +750,6 @@ def generate_files(line_list, args): + '\\native" />' ) - if is_windows(): - dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\") - tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\") - - for dll_element in os.listdir(dll_list_path): - if dll_element.endswith("dll"): - files_list.append( - "' - ) - - for tbb_element in os.listdir(tbb_list_path): - if tbb_element.endswith("dll"): - files_list.append( - "' - ) - if args.execution_provider == "cuda" or is_cuda_gpu_win_sub_package and not is_ado_packaging_build: files_list.append( "