Fix lint

MasterJH5574 · MasterJH5574 · commit a533a11b9003 · 2025-02-20T10:51:28.000-05:00
diff --git a/python/tvm/relax/frontend/nn/llm/kv_cache.py b/python/tvm/relax/frontend/nn/llm/kv_cache.py
@@ -2461,10 +2461,14 @@ def batch_prefill_ragged_kv(  # pylint: disable=too-many-branches
 
     return batch_prefill_ragged_kv
 
+
 def _attention_prefill_ragged(h_kv, h_q, d, dtype, rope_scaling: Dict[str, Any], target: Target):
     return _attention_prefill_ragged_generic(h_kv, h_q, d, d, dtype, rope_scaling, target)
 
-def _attention_prefill_ragged_generic(h_kv, h_q, d_qk, d_v, dtype, rope_scaling: Dict[str, Any], target: Target):
+
+def _attention_prefill_ragged_generic(
+    h_kv, h_q, d_qk, d_v, dtype, rope_scaling: Dict[str, Any], target: Target
+):
     # pylint: disable=line-too-long
     (
         NUM_BLKS,
diff --git a/src/runtime/relax_vm/kv_state.cc b/src/runtime/relax_vm/kv_state.cc
@@ -90,12 +90,14 @@ TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_mla_absorbed")
                             std::move(k_pe_data), std::move(o_data), attn_score_scaling_factor);
     });
 
-    TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_mla_normal")
+TVM_REGISTER_GLOBAL("vm.builtin.attention_kv_cache_mla_normal")
     .set_body_typed([](AttentionKVCache kv_cache, int64_t layer_id,
-                       double attn_score_scaling_factor, NDArray q_data, NDArray k_data, NDArray v_data, NDArray compressed_kv_data,
-                       NDArray k_pe_data, NDArray o_data) {
-      kv_cache->MLANormal(layer_id, std::move(q_data), std::move(k_data), std::move(v_data), std::move(compressed_kv_data),
-                            std::move(k_pe_data), std::move(o_data), attn_score_scaling_factor);
+                       double attn_score_scaling_factor, NDArray q_data, NDArray k_data,
+                       NDArray v_data, NDArray compressed_kv_data, NDArray k_pe_data,
+                       NDArray o_data) {
+      kv_cache->MLANormal(layer_id, std::move(q_data), std::move(k_data), std::move(v_data),
+                          std::move(compressed_kv_data), std::move(k_pe_data), std::move(o_data),
+                          attn_score_scaling_factor);
     });
 
 // RNN State methods
diff --git a/src/runtime/relax_vm/paged_kv_cache.cc b/src/runtime/relax_vm/paged_kv_cache.cc
@@ -2288,7 +2288,6 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
     CHECK_EQ(v_data->shape[2], v_head_dim_);
     CHECK_EQ(o_data->shape[2], v_head_dim_);
 
-    
     // Part 2: Synchronize streams and update auxiliary data.
     ComputeStreamWaitForCopyStream();
     ICHECK(!dirty_aux_data_device_);
@@ -2303,20 +2302,20 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
     // Here, we use f_mla_prefill_ragged_normal_, which is designed to work for both decode
     // and normal prefill cases. Optionally, you could check a flag like `use_decode_kernel_[0]`
     // to adjust parameters; here we assume the kernel internally supports both cases.
-    f_mla_prefill_ragged_normal_(q_data, 
-                                cur_append_length_indptr_view_,
-                                k_data,
-                                v_data,
-                                cur_append_length_indptr_view_,
-                                q_rope_position_map_view_,
-                                k_ragged_rope_pos_offset_view_,
-                                o_data,                  // output tensor
-                                merged_attn_scores_view_,
-                                /*causal=*/1,
-                                static_cast<int>(RoPEMode::kNone),        // Rope changes have already been applied before the kernel
-                                0,                      // Rope param, not important
-                                0,                      // Rope param, not important
-                                attn_score_scaling_factor);
+    f_mla_prefill_ragged_normal_(q_data,
+                                 cur_append_length_indptr_view_,
+                                 k_data,
+                                 v_data,
+                                 cur_append_length_indptr_view_,
+                                 q_rope_position_map_view_,
+                                 k_ragged_rope_pos_offset_view_,
+                                 o_data,                  // output tensor
+                                 merged_attn_scores_view_,
+                                 /*causal=*/1,
+                                 static_cast<int>(RoPEMode::kNone),
+                                 0,                      // Rope param, not important
+                                 0,                      // Rope param, not important
+                                 attn_score_scaling_factor);
 
     // Part 5: If appending is to occur after attention, call the append kernel.
     if (!append_before_attn_) {