From eb7b3b76c9fed196b5228681c62740cac6453470 Mon Sep 17 00:00:00 2001 From: Kotomi-Du Date: Tue, 2 Sep 2025 12:10:30 -0700 Subject: [PATCH 1/2] support GQA --- onnxruntime/core/providers/openvino/backends/basic_backend.h | 5 +++-- onnxruntime/core/providers/openvino/ov_versions/data_ops.cc | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 2cf3d3faa8b47..6801678e0532b 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -57,6 +57,7 @@ struct OnnxToOvNetworkBindings { "beam_idx", "past_key_values", "present", + "total_seq_len", }; OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context, SessionContext& session_context) { @@ -72,7 +73,7 @@ struct OnnxToOvNetworkBindings { // stateful representation has introduced these new tensors, creating a name mismatch (matched_names=false). // So, if there is a name mismatch, or the name matches our special io list, we simply continue processing // here to prevent runtime exceptions. - if (session_context.enable_causallm) { + //if (session_context.enable_causallm) { if (!matched_names || std::any_of(special_io_names_.begin(), special_io_names_.end(), [&onnx_name](const std::string& name) { return onnx_name.find(name) != std::string::npos; })) { @@ -80,7 +81,7 @@ struct OnnxToOvNetworkBindings { has_dynamic_io_ = true; continue; } - } + //} ORT_ENFORCE(matched_names, log_tag, "Input names mismatch between OpenVINO and ONNX. ", onnx_name, diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index f848b89ed10c8..d2ee8317e0249 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -96,6 +96,7 @@ std::vector supported_op_mode = { {"Atanh", V_2020_4, {"CPU"}}, {"Atanh", V_2022_1, {"GPU"}}, {"Attention", V_2023_0, {"CPU", "GPU"}}, + {"GroupQueryAttention", V_2023_0, {"CPU", "GPU"}}, {"AveragePool", V_2020_4, {"CPU", "GPU"}}, {"BatchNormalization", V_2020_4, {"CPU", "GPU"}}, {"BiasGelu", V_2023_0, {"CPU", "GPU"}}, From a3244407a487af2a70890cd7a3ea98b5c83d2a91 Mon Sep 17 00:00:00 2001 From: Kotomi-Du Date: Tue, 23 Sep 2025 13:46:08 -0700 Subject: [PATCH 2/2] dump CPU result --- onnxruntime/contrib_ops/cpu/utils/debug_macros.h | 2 +- tools/ci_build/build.py | 3 +++ tools/ci_build/build_args.py | 6 ++++++ 3 files changed, 10 insertions(+), 1 deletion(-) diff --git a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h index 47d0fc5e4008c..47a73cb9528fd 100644 --- a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h +++ b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h @@ -6,7 +6,7 @@ #ifdef DEBUG_GENERATION #define DUMP_TENSOR_LEVEL 2 #else -#define DUMP_TENSOR_LEVEL 0 // change it to 1 or 2 if want to enable dumping for code not in generation. +#define DUMP_TENSOR_LEVEL 1 // change it to 1 or 2 if want to enable dumping for code not in generation. #endif #define DUMP_CPU_TENSOR_LEVEL DUMP_TENSOR_LEVEL diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index edceae55ddda4..c00dfc8959f50 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -386,6 +386,9 @@ def generate_build_tree( "-Donnxruntime_ENABLE_PIX_FOR_WEBGPU_EP=" + ("ON" if args.enable_pix_capture else "OFF"), ] + if 1: #args.dump_node_input_output: + cmake_args.append("-Donnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=ON") + if args.caller_framework: cmake_args.append("-Donnxruntime_CALLER_FRAMEWORK=" + args.caller_framework) if args.winml_root_namespace_override: diff --git a/tools/ci_build/build_args.py b/tools/ci_build/build_args.py index de538604aac75..0704aa47c9f7f 100644 --- a/tools/ci_build/build_args.py +++ b/tools/ci_build/build_args.py @@ -842,6 +842,12 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]: # Use list[str] fromfile_prefix_chars="@", # Allow args from file (@filename) ) + # dump_node_input_output + parser.add_argument( + "--dump_node_input_output", + type=str, + help="Dump node input/output data to files in the specified directory.", + ) # Add arguments by category add_core_build_args(parser) add_cmake_build_config_args(parser)