We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 8a52015 commit 3f91c20Copy full SHA for 3f91c20
tensorrt_llm/_torch/attention_backend/trtllm.py
@@ -843,8 +843,10 @@ def prepare_flash_mla(self) -> None:
843
block_ids_per_seq = self.kv_cache_manager.get_block_ids_per_seq(
844
self.request_ids).pin_memory()
845
num_blocks = block_ids_per_seq.shape[1]
846
+ self.kv_block_ids_per_seq.fill_(0)
847
self.kv_block_ids_per_seq[:self.num_seqs, :num_blocks].copy_(
848
block_ids_per_seq, non_blocking=True)
849
+ self.block_ids_per_seq.fill_(0)
850
self.block_ids_per_seq[:self.num_generations, :num_blocks].copy_(
851
block_ids_per_seq[self.num_contexts:], non_blocking=True)
852
0 commit comments