From c4f7cdd6e9d2fb43a5aaf9c2bbfb8dc8d5f87bd9 Mon Sep 17 00:00:00 2001 From: Kotomi-Du Date: Tue, 2 Sep 2025 12:10:30 -0700 Subject: [PATCH] support GQA --- onnxruntime/core/providers/openvino/backends/basic_backend.h | 1 + onnxruntime/core/providers/openvino/ov_versions/data_ops.cc | 1 + 2 files changed, 2 insertions(+) diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 2cf3d3faa8b47..5013869b4ca99 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -57,6 +57,7 @@ struct OnnxToOvNetworkBindings { "beam_idx", "past_key_values", "present", + "total_seq_len", }; OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context, SessionContext& session_context) { diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index f848b89ed10c8..c472037769255 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -96,6 +96,7 @@ std::vector supported_op_mode = { {"Atanh", V_2020_4, {"CPU"}}, {"Atanh", V_2022_1, {"GPU"}}, {"Attention", V_2023_0, {"CPU", "GPU"}}, + {"GroupQueryAttention", V_2025_1, {"CPU", "GPU"}}, {"AveragePool", V_2020_4, {"CPU", "GPU"}}, {"BatchNormalization", V_2020_4, {"CPU", "GPU"}}, {"BiasGelu", V_2023_0, {"CPU", "GPU"}},