Skip to content

Commit d9d1734

Browse files
ShunkangShunkang
authored andcommitted
Fix error
Signed-off-by: Shunkang <[email protected]>
1 parent b2c1ce5 commit d9d1734

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ def __init__(
136136
self.pytorch_backend_config.attention_dp_enable_balance = False
137137
self.pytorch_backend_config.attention_dp_time_out_iters = 50
138138
self.pytorch_backend_config.attention_dp_batching_wait_iters = 10
139+
self.pytorch_backend_config.batch_wait_timeout = 0
139140
self.iter_counter = 0
140141

141142
# NOTE (lucaslie): not a declared base member in the base class; required by PyExecutor...

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -96,18 +96,18 @@ def _get_from_request_queue(
9696
if len(items) >= self.max_batch_size:
9797
return items
9898

99-
start_time = time.time()
99+
deadline = time.monotonic() + self.batch_wait_timeout
100100
while len(items) < self.max_batch_size:
101-
if time.time() - start_time >= self.batch_wait_timeout:
101+
remaining_timeout = deadline - time.monotonic()
102+
103+
if remaining_timeout <= 0:
102104
break
103105

104106
try:
105-
remaining_timeout = max(
106-
0.001, self.batch_wait_timeout - (time.time() - start_time))
107-
queue_item = self.request_queue.get(timeout=remaining_timeout)
108-
items.append(queue_item)
107+
item = self.request_queue.get(timeout=remaining_timeout)
108+
items.append(item)
109109
except queue.Empty:
110-
continue
110+
break
111111

112112
return items
113113

tensorrt_llm/llmapi/llm_args.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2078,7 +2078,9 @@ class TorchLlmArgs(BaseLlmArgs):
20782078

20792079
batch_wait_timeout: float = Field(
20802080
default=0,
2081-
description="The timeout for batching requests. If 0, no wait.",
2081+
description=
2082+
"If greater than 0, returns immediately when fetched requests exceed max_batch_size; "
2083+
"otherwise, waits up to batch_wait_timeout to gather more. If 0, no waiting occurs.",
20822084
status="prototype")
20832085

20842086
torch_compile_config: Optional[TorchCompileConfig] = Field(

0 commit comments

Comments
 (0)