diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 99f28439db53a..6f8c027c1261e 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -394,6 +394,7 @@ static bool IsModelBF16(const onnxruntime::GraphViewer& graph_viewer) { return false; } +#if ((OPENVINO_VERSION_MAJOR < 2025) || ((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR < 4))) static bool Is16BitTensor(const onnxruntime::NodeArg* node_arg) { const auto* type_proto = node_arg ? node_arg->TypeAsProto() : nullptr; return type_proto && type_proto->has_tensor_type() && @@ -431,6 +432,7 @@ static bool IsQDQGraphWithUint16OrInt16(const onnxruntime::GraphViewer& graph_vi } return false; } +#endif static void DumpOpenVINOEPModel([[maybe_unused]] const std::filesystem::path& onnx_model_path_name, [[maybe_unused]] ONNX_NAMESPACE::ModelProto* model_proto, @@ -490,10 +492,6 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, } #endif - // Check if the graph is QDQ and has int16 or uint16 quantization - // If so, we will apply the QDQ scales fix transformation (for GPU device only) - bool is_qdq_graph_uint16_or_int16 = IsQDQGraphWithUint16OrInt16(subgraph); - const auto& onnx_model_path_name = subgraph.ModelPath(); // QDQ stripping enabled only for the NPU and experimentally on the GPU if ((session_context_.device_type.find("NPU") != std::string::npos) && @@ -506,8 +504,11 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); return model_proto; - } else if ((session_context_.device_type.find("GPU") != std::string::npos) && - is_qdq_graph_uint16_or_int16) { + } +#if ((OPENVINO_VERSION_MAJOR < 2025) || ((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR < 4))) + // Enable OVEP-level QDQ stripping only for OV versions that don't have it + else if ((session_context_.device_type.find("GPU") != std::string::npos) && + IsQDQGraphWithUint16OrInt16(subgraph)) { // Create a copy of the model std::unique_ptr model; Status status = qdq_scales_fix::Transform(subgraph, logger, model); @@ -517,7 +518,9 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); return model_proto; - } else if (IsModelBF16(subgraph)) { + } +#endif + else if (IsModelBF16(subgraph)) { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP bfloat16->float16 optimization pass is enabled"; std::unique_ptr model; Status status = bfloat16_fix::Transform(subgraph, logger, model);