File tree Expand file tree Collapse file tree 1 file changed +5
-0
lines changed
cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention Expand file tree Collapse file tree 1 file changed +5
-0
lines changed Original file line number Diff line number Diff line change @@ -297,6 +297,11 @@ void FusedMHARunnerV2::setupLaunchParams(MHARunnerParams runnerParams)
297
297
= mFixedParams .isSPadded ? runnerParams.b * runnerParams.qSeqLen : runnerParams.totalQSeqLen ;
298
298
mLaunchParams .total_kv_seqlen
299
299
= mFixedParams .isSPadded ? runnerParams.b * runnerParams.kvSeqLen : runnerParams.totalKvSeqLen ;
300
+ // Workaround for nvbug 5412456: total_kv_seqlen fallbacks to total_q_seqlen if it's zero.
301
+ if (mLaunchParams .total_kv_seqlen == 0 )
302
+ {
303
+ mLaunchParams .total_kv_seqlen = mLaunchParams .total_q_seqlen ;
304
+ }
300
305
301
306
TLLM_CHECK_WITH_INFO (mFixedParams .headSize > 0 , " Head size should be greater than 0." );
302
307
// Pad head size to next power of 2.
You can’t perform that action at this time.
0 commit comments