Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 59 additions & 19 deletions onnxruntime/core/providers/openvino/backend_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@
namespace onnxruntime {
namespace openvino_ep {

SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary) {
SharedContext::SharedWeights::WeightsFile::WeightsFile(std::filesystem::path filename) : file_(filename, std::ios::in | std::ios::binary), file_path_(filename) {
try {
file_.exceptions(std::ifstream::failbit | std::ifstream::badbit);
weights_size_ = file_.seekg(0, std::ios::end).tellg();
} catch (std::ifstream::failure& e) {
weights_size_ = std::filesystem::file_size(filename);
} catch (std::exception& e) {
ORT_THROW("Error: Failed to open weight file at ", filename.string(), " ", e.what());
}
}
Expand All @@ -35,6 +35,32 @@
file_.read(reinterpret_cast<char*>(data), size);
}

void* SharedContext::SharedWeights::WeightsFile::TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context) {
std::string dev_name{};
if (remote_context) {
dev_name = remote_context->get_device_name();
}

auto [it, inserted] = imported_device_tensors_.emplace(dev_name, MappingContainer{});
if (inserted) {
if (dev_name == "NPU") {
#if OPENVINO_VERSION_AT_LEAST(2025, 3)
// try to import the memory mapped file to remote tensor
ORT_ENFORCE(remote_context, "Error: Remote context is required for NPU device.");
auto npu_context = remote_context->as<ov::intel_npu::level_zero::ZeroContext>();
auto&& l0_tensor = npu_context.create_tensor(ov::element::Type_t::u8, {weights_size_}, ov::intel_npu::FileDescriptor(file_path_));
it->second = MappingContainer{.ptr_ = l0_tensor.get(), .tensor_ = l0_tensor};
#endif
} else if (dev_name.empty()) {
// CPU/virtual device case, create a CPU tensor memory mapped from file
auto&& mmaped_tensor = ov::read_tensor_data(file_path_);
it->second = MappingContainer{.ptr_ = mmaped_tensor.data(), .tensor_ = mmaped_tensor};
}
}

return it->second.ptr_;
}

std::ostream& operator<<(std::ostream& stream, const SharedContext::SharedWeights::Metadata::Map& metadata) {
try {
stream << metadata.size();
Expand Down Expand Up @@ -405,29 +431,43 @@
void CreateOVTensors(const std::string& device_name,
SharedContext::SharedWeights::Metadata::Map& metadata_map,
SharedContext::SharedWeights::WeightsFile& weights) {
// Get remote context if available
std::optional<ov::RemoteContext> opt_remote_ctx;
try {
opt_remote_ctx = OVCore::Get()->core.get_default_context(device_name);
} catch (const std::exception&) {
// Remote context not available
}

for (auto& [key, value] : metadata_map) {
if (value.tensor) continue;

// Get element data type
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;

ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type

// Create OpenVINO Tensor
if (device_name == "NPU") {
// Use remote tensors
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);

// Copy data to remote tensor
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type);

// Try to get memory-mapped weights
ov::Tensor tensor;
uint8_t* mmaped_weights = static_cast<uint8_t*>(weights.TryGetOrCreateDeviceMapping(opt_remote_ctx));

if (mmaped_weights) {
// We have memory mapped weights. Create a Tensor view into it for this value.
ORT_ENFORCE(value.data_offset < weights.Size() &&
value.size <= weights.Size() &&
(value.data_offset <= weights.Size() - value.size),
"File offset + size outside of external initializer file");
void* mmapped_offset = static_cast<void*>(mmaped_weights + value.data_offset);
tensor = ov::Tensor(ov_elementType, value.dimensions, mmapped_offset);
} else {
// Use vanilla tensors
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);
weights.load_weights(value.data_offset, value.tensor->data(), value.size);
ORT_ENFORCE(opt_remote_ctx, "Unexpected: Don't have remote context and memory mapped weights is null!");
// Can't mmap the file to device tensor, create a host tensor and copy the data
tensor = opt_remote_ctx->create_host_tensor(ov_elementType, value.dimensions);
ORT_ENFORCE(tensor.get_byte_size() == value.size, "Remote tensor size mismatch");
weights.load_weights(value.data_offset, tensor.data(), value.size);
}
ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");

ORT_ENFORCE(tensor.get_byte_size() == value.size, "Unexpected tensor size mismatch");
value.tensor = std::make_shared<ov::Tensor>(std::move(tensor));

Check warning on line 470 in onnxruntime/core/providers/openvino/backend_utils.cc

View workflow job for this annotation

GitHub Actions / Optional Lint C++

[cpplint] reported by reviewdog 🐶 Add #include <memory> for make_shared<> [build/include_what_you_use] [4] Raw Output: onnxruntime/core/providers/openvino/backend_utils.cc:470: Add #include <memory> for make_shared<> [build/include_what_you_use] [4]
}
}

Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,18 @@ class SharedContext : public WeakSingleton<SharedContext> {
explicit WeightsFile(std::filesystem::path filename);

void load_weights(size_t file_offset, void* data, size_t size);
void* TryGetOrCreateDeviceMapping(std::optional<ov::RemoteContext>& remote_context);
size_t Size() const { return weights_size_; }

private:
std::ifstream file_;
std::filesystem::path file_path_;
size_t weights_size_;
struct MappingContainer {
void* ptr_{nullptr};
ov::Tensor tensor_;
};
std::map<std::string, MappingContainer> imported_device_tensors_;
};

fs::path external_weight_filename;
Expand Down
11 changes: 11 additions & 0 deletions onnxruntime/core/providers/openvino/ov_interface.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,17 @@

#include <string>

// Helper macro to test OpenVINO version at compile time.
// Usage: #if OPENVINO_VERSION_AT_LEAST(2025, 3)
// Falls back to 0 if OPENVINO_VERSION_MAJOR/MINOR are not defined.
#if defined(OPENVINO_VERSION_MAJOR) && defined(OPENVINO_VERSION_MINOR)
#define OPENVINO_VERSION_AT_LEAST(major, minor) \
((OPENVINO_VERSION_MAJOR > (major)) || \
(OPENVINO_VERSION_MAJOR == (major) && OPENVINO_VERSION_MINOR >= (minor)))
#else
#define OPENVINO_VERSION_AT_LEAST(major, minor) 0
#endif

namespace onnxruntime {
namespace openvino_ep {
class OVCore;
Expand Down
Loading