We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d5be2c3 commit d49c9e5Copy full SHA for d49c9e5
tensorrt_llm/_torch/pyexecutor/model_engine.py
@@ -1216,7 +1216,8 @@ def _prepare_tp_inputs(
1216
if next_draft_tokens_device is None or request.is_dummy or request.py_batch_idx is None:
1217
# get token ids, including input token ids and draft token ids. For these dummy requests,
1218
# no need to copy the token ids.
1219
- if not request.is_dummy:
+ if not (request.is_attention_dp_dummy
1220
+ or request.is_cuda_graph_dummy):
1221
input_ids.append(request.get_last_tokens(0))
1222
input_ids.extend(request.py_draft_tokens)
1223
draft_tokens.extend(request.py_draft_tokens)
0 commit comments