diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 684f94eed54c3..8887b183c4396 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -43,6 +43,9 @@ BackendManager::BackendManager(SessionContext& session_context, session_context_(session_context), shared_context_{shared_context} { subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph); + // If the graph contains a OVIR wrapped node, we check if it has matching xml file name attribute + subgraph_context_.is_ep_ctx_ovir_encapsulated = ep_ctx_handle_.CheckEPCacheContextAttribute(subgraph, + session_context_.onnx_model_path_name.filename().replace_extension("xml").string()); subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) { // return empty if graph has no inputs or if types are not one of FP32/FP16 @@ -192,9 +195,10 @@ BackendManager::BackendManager(SessionContext& session_context, } } } - if (session_context_.so_context_enable && !subgraph_context_.is_ep_ctx_graph) { + if (session_context_.so_context_enable && + (subgraph_context_.is_ep_ctx_ovir_encapsulated || !subgraph_context_.is_ep_ctx_graph)) { auto status = onnxruntime::openvino_ep::BackendManager::ExportCompiledBlobAsEPCtxNode(subgraph); - if ((!status.IsOK())) { + if (!status.IsOK()) { ORT_THROW(status); } } diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 7598f7cfffba5..73fbe9a0fa76f 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -400,6 +400,33 @@ void DestroyOVTensors(SharedContext::SharedWeights::Metadata::Map& metadata_map) metadata_map.clear(); } +bool IsModelStreamXML(std::istream& model_stream) { + std::streampos originalPos = model_stream.tellg(); + + // first, get the total size of model_stream in bytes + model_stream.seekg(0, std::ios::end); + auto end_pos = model_stream.tellg(); + // Restore the stream position + model_stream.seekg(originalPos); + auto total_size = end_pos - originalPos; + + // Choose 32 bytes to hold content of: + // ' header_check_len); + + // read 32 bytes into header + std::string header(header_check_len, '\0'); + model_stream.read(&header[0], header_check_len); + // Clear any read errors + model_stream.clear(); + // Restore the stream position + model_stream.seekg(originalPos); + + // return true if the header starts with '& performanceMap, void printPerformanceCounts(OVInferRequestPtr request, std::ostream& stream, std::string deviceName); +bool IsModelStreamXML(std::istream& model_stream); + } // namespace backend_utils } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 1b7ba1a1b5a82..00a18bb0a45b6 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -72,12 +72,38 @@ BasicBackend::BasicBackend(std::unique_ptr& model_pr !session_context_.so_disable_cpu_ep_fallback && !subgraph_context_.is_ep_ctx_graph); if (subgraph_context_.is_ep_ctx_graph) { - // If the blob is held in an EPContext node, then skip FE+Compile - // and directly move on to creating a backend with the executable blob - exe_network_ = OVCore::Get()->ImportModel(*model_stream, - hw_target, - device_config, - subgraph_context_.subgraph_name); + if (subgraph_context_.is_ep_ctx_ovir_encapsulated) { + // model_file_path will use so_context_file_path if the onnx_model_path_name is not available, + // especially in case of CreateSessionFormArray() where user must explicitly + // specify absolute path for so_context_file_path. + auto model_file_path = [this]() { + if (!session_context_.onnx_model_path_name.empty() && + std::filesystem::exists(session_context_.onnx_model_path_name)) return session_context_.onnx_model_path_name; + + ORT_ENFORCE(!session_context_.so_context_file_path.empty() && + std::filesystem::path(session_context_.so_context_file_path).is_absolute() && + std::filesystem::exists(session_context_.so_context_file_path), log_tag + + "Context file path must be non-empty & absolute, when using CreateSessionFormArray() API explicitly." + " Please set a valid absolute path for ep.context_file_path in session options."); + // Return absolute context file path as input to ImportEPCtxOVIREncapsulation() function. + return session_context_.so_context_file_path; + + }; + // If the EPContext node with OVIR Encapsulation, then create + // an executable network from EP_CACHE_CONTEXT using read_model() & compile_model() + exe_network_ = OVCore::Get()->ImportEPCtxOVIREncapsulation(*model_stream, + hw_target, + device_config, + enable_causallm, + model_file_path()); + } else { + // If the blob is held in an EPContext node, then skip FE+Compile + // and directly move on to creating a backend with the executable blob + exe_network_ = OVCore::Get()->ImportModel(*model_stream, + hw_target, + device_config, + subgraph_context_.subgraph_name); + } model_stream.reset(); // Delete stream after it is no longer needed } else if (!session_context_.has_external_weights && !subgraph_context_.has_dynamic_input_shape && diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 09d48a5e916e1..e2369cf728ea6 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -137,6 +137,7 @@ struct SubGraphContext { string_index_map_t output_names; std::string model_precision; bool is_ep_ctx_graph = false; + bool is_ep_ctx_ovir_encapsulated = false; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc index 7bd4f8d96cc55..49a4cb0a7e95a 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc @@ -7,6 +7,7 @@ #include #include "core/providers/openvino/onnx_ctx_model_helper.h" +#include "core/providers/openvino/backend_utils.h" namespace onnxruntime { namespace openvino_ep { @@ -123,6 +124,16 @@ std::unique_ptr EPCtxHandler::GetModelBlobStream(const std::filesy ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string()); result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in)); } + + bool isXML = backend_utils::IsModelStreamXML(*result); + if (!isXML) { + // If the model stream is not an XML (i.e. precompiled blob), the OpenVINO SDK version that it was + // exported with must match the version that is currently running. + ORT_ENFORCE((attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_), + "EPCtx blob was exported / is compatible with OpenVINO SDK version " + attrs.at(EP_SDK_VER).s() + + ", but OpenVINO SDK version currently in use is " + openvino_sdk_version_); + } + LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node"; return result; } @@ -142,7 +153,6 @@ bool EPCtxHandler::CheckForOVEPCtxNode(const Node& node) const { if (node.OpType() == EPCONTEXT_OP) { auto& attrs = node.GetAttributes(); bool result = (attrs.count(SOURCE) == 1) && (attrs.at(SOURCE).s() == kOpenVINOExecutionProvider); - result &= (attrs.count(EP_SDK_VER) == 1) && (attrs.at(EP_SDK_VER).s() == openvino_sdk_version_); result &= attrs.count(EMBED_MODE) == 1; result &= attrs.count(EP_CACHE_CONTEXT) == 1; return result; @@ -155,5 +165,32 @@ InlinedVector EPCtxHandler::GetEPCtxNodes() const { return InlinedVector(epctx_nodes.begin(), epctx_nodes.end()); } +// Check if graph's only node is EPContext & EP_CACHE_CONTEXT attribute has target extension. +// @param graph_viewer: The graph to inspect. +// @param target_attr_extn: The string to search for in the EP_CACHE_CONTEXT attribute. +// @return true if the node exists, is of the correct type, and the attribute contains the extension; false otherwise. +bool EPCtxHandler::CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const { + // Only check if the graph has exactly one node + if (graph_viewer.NumberOfNodes() != 1) { + return false; + } + // Get the first node in topological order + auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin(); + const Node* node = graph_viewer.GetNode(first_index); + if (!node) { + return false; + } + // Check OpType and required attributes + if (node->OpType() != EPCONTEXT_OP) { + return false; + } + const auto& attrs = node->GetAttributes(); + auto it = attrs.find(EP_CACHE_CONTEXT); + if (it != attrs.end()) { + return it->second().s().find(target_attr_extn) != std::string::npos; + } + return false; +} + } // namespace openvino_ep } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h index ff978bd6534d8..b9ddb40a7a233 100644 --- a/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h +++ b/onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h @@ -33,6 +33,7 @@ class EPCtxHandler { std::string&& model_blob_str) const; std::unique_ptr GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const; InlinedVector GetEPCtxNodes() const; + bool CheckEPCacheContextAttribute(const GraphViewer& graph_viewer, const std::string& target_attr_extn) const; private: const std::string openvino_sdk_version_; diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 38b5f9a52eb3e..306fa6113b347 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -47,7 +47,6 @@ void printDebugInfo(const ov::CompiledModel& obj) { continue; OPENVINO_SUPPRESS_DEPRECATED_END std::cout << " " << item2.first << ": " << item2.second.as() << std::endl; - } } } else { std::cout << " " << cfg << ": " << prop.as() << std::endl; @@ -101,10 +100,10 @@ OVExeNetwork OVCore::StatefulCompileModel(std::shared_ptr& model, LogBasicModelInfo(model); } - LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl; bool model_status = IsStateful(model); LOGS_DEFAULT(INFO) << log_tag << "Model IsStateful() Status:\t" << (model_status ? "True" : "False"); if (!model_status) { + LOGS_DEFAULT(INFO) << log_tag << "Converting from Stateless OV Model to Stateful OV Model" << std::endl; PatchStatefulDecoder(model); } @@ -198,15 +197,69 @@ OVExeNetwork OVCore::ImportModel(std::istream& model_stream, return OvExceptionBoundary([&]() { ov::CompiledModel obj; obj = core.import_model(model_stream, hw_target, device_config); + OVExeNetwork exe(obj, hw_target); #ifndef NDEBUG printDebugInfo(exe.Get()); #endif - OVExeNetwork exe(obj, hw_target); return exe; }, "Exception while Loading Network for graph {}", name); } +OVExeNetwork OVCore::ImportEPCtxOVIREncapsulation(std::istream& model_stream, + std::string& hw_target, + const ov::AnyMap& device_config, + bool enable_causallm, + std::filesystem::path model_file_path) { + return OvExceptionBoundary([&]() { + OVExeNetwork exe; + + bool isXML = backend_utils::IsModelStreamXML(model_stream); + + // Helper function to check if file exists and is readable + const auto check_file_access = [&model_file_path](const std::filesystem::path& path) { + try { + if (!std::filesystem::exists(path) || std::filesystem::is_empty(path)) { + ORT_THROW(log_tag + "Required file missing or empty: " + path.string()); + } + std::ifstream file(path); + if (!file) { + ORT_THROW(log_tag + "Required file not readable: " + path.string()); + } + } catch (const std::exception& e) { + ORT_THROW(log_tag + "Exception while checking file access for: " + path.string() + " - " + e.what()); + } + }; + + if (isXML) { + // If the model is XML, we need to load it with the XML content in read_model() + // where weights from bin file is directly consumed + auto xml_file_path = model_file_path.parent_path() / (model_file_path.stem().string() + ".xml"); + + check_file_access(xml_file_path); + + LOGS_DEFAULT(INFO) << log_tag << "Reading OVIR from XML file path: " << xml_file_path.string(); + + // Load the model explicitly with XML contents + std::shared_ptr model = core.read_model(xml_file_path.string()); + + if (enable_causallm) { + exe = OVCore::Get()->StatefulCompileModel(model, hw_target, device_config); + } else { + auto obj = core.compile_model(model, hw_target, device_config); + exe = OVExeNetwork(obj, hw_target); + } + } + +#ifndef NDEBUG + printDebugInfo(exe.Get()); +#endif + return exe; + }, + "Exception while Loading Network from OVIR model file: {}", model_file_path.string()); +} + + void OVCore::SetCache(const std::string& cache_dir_path) { core.set_property(ov::cache_dir(cache_dir_path)); } diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 581da59bb4cae..0e019342bc86e 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -86,6 +86,12 @@ struct OVCore : WeakSingleton { std::string hw_target, const ov::AnyMap& device_config, std::string name); + OVExeNetwork ImportEPCtxOVIREncapsulation(std::istream& model_stream, + std::string& hw_target, + const ov::AnyMap& device_config, + bool enable_causallm, + std::filesystem::path model_file_path); + std::vector GetAvailableDevices() const; std::vector GetAvailableDevices(const std::string& device_type) const; void SetCache(const std::string& cache_dir_path);