@@ -2288,7 +2288,6 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
22882288 CHECK_EQ (v_data->shape [2 ], v_head_dim_);
22892289 CHECK_EQ (o_data->shape [2 ], v_head_dim_);
22902290
2291-
22922291 // Part 2: Synchronize streams and update auxiliary data.
22932292 ComputeStreamWaitForCopyStream ();
22942293 ICHECK (!dirty_aux_data_device_);
@@ -2303,27 +2302,21 @@ class PagedAttentionKVCacheObj : public AttentionKVCacheObj {
23032302 // Here, we use f_mla_prefill_ragged_normal_, which is designed to work for both decode
23042303 // and normal prefill cases. Optionally, you could check a flag like `use_decode_kernel_[0]`
23052304 // to adjust parameters; here we assume the kernel internally supports both cases.
2306- f_mla_prefill_ragged_normal_ (q_data,
2307- cur_append_length_indptr_view_,
2308- k_data,
2309- v_data,
2310- cur_append_length_indptr_view_,
2311- q_rope_position_map_view_,
2312- k_ragged_rope_pos_offset_view_,
2313- o_data, // output tensor
2314- merged_attn_scores_view_,
2315- /* causal=*/ 1 ,
2316- static_cast <int >(RoPEMode::kNone ), // Rope changes have already been applied before the kernel
2317- 0 , // Rope param, not important
2318- 0 , // Rope param, not important
2319- attn_score_scaling_factor);
2305+ f_mla_prefill_ragged_normal_ (q_data, cur_append_length_indptr_view_, k_data, v_data,
2306+ cur_append_length_indptr_view_, q_rope_position_map_view_,
2307+ k_ragged_rope_pos_offset_view_,
2308+ o_data, // output tensor
2309+ merged_attn_scores_view_,
2310+ /* causal=*/ 1 , static_cast <int >(RoPEMode::kNone ),
2311+ 0 , // Rope param, not important
2312+ 0 , // Rope param, not important
2313+ attn_score_scaling_factor);
23202314
23212315 // Part 5: If appending is to occur after attention, call the append kernel.
23222316 if (!append_before_attn_) {
23232317 f_transpose_append_mla_ (pages_[local_layer_id], compressed_kv_data, k_pe_data,
23242318 append_position_map_view_);
23252319 }
2326-
23272320 }
23282321
23292322 void LinearAttention (int64_t layer_id, NDArray q_data, NDArray k_data, NDArray v_data,
0 commit comments