From b939b96bb8c329cadec53217c39f2f2806d49ef4 Mon Sep 17 00:00:00 2001 From: yechank <161688079+yechank-nvidia@users.noreply.github.com> Date: Mon, 2 Jun 2025 16:38:18 +0900 Subject: [PATCH] fix: [nvbugs/5298600] fix illegal memory access on mrope_position_deltas Signed-off-by: yechank <161688079+yechank-nvidia@users.noreply.github.com> --- .../decoderMaskedMultiheadAttentionTemplate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderMaskedMultiheadAttentionTemplate.h b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderMaskedMultiheadAttentionTemplate.h index d7f9c58b274..bc3b483c0d7 100644 --- a/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderMaskedMultiheadAttentionTemplate.h +++ b/cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention/decoderMaskedMultiheadAttentionTemplate.h @@ -1687,7 +1687,7 @@ __global__ void __launch_bounds__(MAX_THEADS_PER_BLOCK, MIN_BLOCKS_PER_SM) maske int const smem_pitch = half_rotary_dim; // TODO: adjust for bank conflicts assert(half_rotary_dim % QK_VEC_SIZE == 0); - if (params.position_embedding_type == PositionEmbeddingType::kROPE_M) + if (params.position_embedding_type == PositionEmbeddingType::kROPE_M && params.mrope_position_deltas != nullptr) { current_pos_idx += params.mrope_position_deltas[batch_idx]; }