From eb7b3b76c9fed196b5228681c62740cac6453470 Mon Sep 17 00:00:00 2001
From: Kotomi-Du <yaru.du@intel.com>
Date: Tue, 2 Sep 2025 12:10:30 -0700
Subject: [PATCH 1/2] support GQA

---
 onnxruntime/core/providers/openvino/backends/basic_backend.h | 5 +++--
 onnxruntime/core/providers/openvino/ov_versions/data_ops.cc  | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 2cf3d3faa8b47..6801678e0532b 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -57,6 +57,7 @@ struct OnnxToOvNetworkBindings {
       "beam_idx",
       "past_key_values",
       "present",
+       "total_seq_len",
   };
 
   OnnxToOvNetworkBindings(OVExeNetwork& exec_network, SubGraphContext& subgraph_context, SessionContext& session_context) {
@@ -72,7 +73,7 @@ struct OnnxToOvNetworkBindings {
         // stateful representation has introduced these new tensors, creating a name mismatch (matched_names=false).
         // So, if there is a name mismatch, or the name matches our special io list, we simply continue processing
         // here to prevent runtime exceptions.
-        if (session_context.enable_causallm) {
+        //if (session_context.enable_causallm) {
           if (!matched_names ||
               std::any_of(special_io_names_.begin(), special_io_names_.end(),
                           [&onnx_name](const std::string& name) { return onnx_name.find(name) != std::string::npos; })) {
@@ -80,7 +81,7 @@ struct OnnxToOvNetworkBindings {
             has_dynamic_io_ = true;
             continue;
           }
-        }
+        //}
 
         ORT_ENFORCE(matched_names, log_tag,
                     "Input names mismatch between OpenVINO and ONNX. ", onnx_name,
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index f848b89ed10c8..d2ee8317e0249 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -96,6 +96,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Atanh", V_2020_4, {"CPU"}},
     {"Atanh", V_2022_1, {"GPU"}},
     {"Attention", V_2023_0, {"CPU", "GPU"}},
+    {"GroupQueryAttention", V_2023_0, {"CPU", "GPU"}},
     {"AveragePool", V_2020_4, {"CPU", "GPU"}},
     {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
     {"BiasGelu", V_2023_0, {"CPU", "GPU"}},

From a3244407a487af2a70890cd7a3ea98b5c83d2a91 Mon Sep 17 00:00:00 2001
From: Kotomi-Du <yaru.du@intel.com>
Date: Tue, 23 Sep 2025 13:46:08 -0700
Subject: [PATCH 2/2] dump CPU result

---
 onnxruntime/contrib_ops/cpu/utils/debug_macros.h | 2 +-
 tools/ci_build/build.py                          | 3 +++
 tools/ci_build/build_args.py                     | 6 ++++++
 3 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h
index 47d0fc5e4008c..47a73cb9528fd 100644
--- a/onnxruntime/contrib_ops/cpu/utils/debug_macros.h
+++ b/onnxruntime/contrib_ops/cpu/utils/debug_macros.h
@@ -6,7 +6,7 @@
 #ifdef DEBUG_GENERATION
 #define DUMP_TENSOR_LEVEL 2
 #else
-#define DUMP_TENSOR_LEVEL 0  // change it to 1 or 2 if want to enable dumping for code not in generation.
+#define DUMP_TENSOR_LEVEL 1  // change it to 1 or 2 if want to enable dumping for code not in generation.
 #endif
 
 #define DUMP_CPU_TENSOR_LEVEL DUMP_TENSOR_LEVEL
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index edceae55ddda4..c00dfc8959f50 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -386,6 +386,9 @@ def generate_build_tree(
             "-Donnxruntime_ENABLE_PIX_FOR_WEBGPU_EP=" + ("ON" if args.enable_pix_capture else "OFF"),
         ]
 
+        if 1: #args.dump_node_input_output:
+            cmake_args.append("-Donnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=ON")
+
         if args.caller_framework:
             cmake_args.append("-Donnxruntime_CALLER_FRAMEWORK=" + args.caller_framework)
         if args.winml_root_namespace_override:
diff --git a/tools/ci_build/build_args.py b/tools/ci_build/build_args.py
index de538604aac75..0704aa47c9f7f 100644
--- a/tools/ci_build/build_args.py
+++ b/tools/ci_build/build_args.py
@@ -842,6 +842,12 @@ def convert_arg_line_to_args(self, arg_line: str) -> list[str]:  # Use list[str]
         fromfile_prefix_chars="@",  # Allow args from file (@filename)
     )
 
+    # dump_node_input_output
+    parser.add_argument(
+        "--dump_node_input_output",
+        type=str,
+        help="Dump node input/output data to files in the specified directory.",
+    )
     # Add arguments by category
     add_core_build_args(parser)
     add_cmake_build_config_args(parser)