Fix error

Shunkang · Shunkang · commit d9d17349bf44 · 2025-08-18T02:36:14.000Z
Signed-off-by: Shunkang &lt;182541032+Shunkangz@users.noreply.github.co&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py b/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py
@@ -136,6 +136,7 @@ def __init__(
         self.pytorch_backend_config.attention_dp_enable_balance = False
         self.pytorch_backend_config.attention_dp_time_out_iters = 50
         self.pytorch_backend_config.attention_dp_batching_wait_iters = 10
+        self.pytorch_backend_config.batch_wait_timeout = 0
         self.iter_counter = 0
 
         # NOTE (lucaslie): not a declared base member in the base class; required by PyExecutor...
diff --git a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
@@ -96,18 +96,18 @@ def _get_from_request_queue(
         if len(items) >= self.max_batch_size:
             return items
 
-        start_time = time.time()
+        deadline = time.monotonic() + self.batch_wait_timeout
         while len(items) < self.max_batch_size:
-            if time.time() - start_time >= self.batch_wait_timeout:
+            remaining_timeout = deadline - time.monotonic()
+
+            if remaining_timeout <= 0:
                 break
 
             try:
-                remaining_timeout = max(
-                    0.001, self.batch_wait_timeout - (time.time() - start_time))
-                queue_item = self.request_queue.get(timeout=remaining_timeout)
-                items.append(queue_item)
+                item = self.request_queue.get(timeout=remaining_timeout)
+                items.append(item)
             except queue.Empty:
-                continue
+                break
 
         return items
 
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -2078,7 +2078,9 @@ class TorchLlmArgs(BaseLlmArgs):
 
     batch_wait_timeout: float = Field(
         default=0,
-        description="The timeout for batching requests. If 0, no wait.",
+        description=
+        "If greater than 0, returns immediately when fetched requests exceed max_batch_size; "
+        "otherwise, waits up to batch_wait_timeout to gather more. If 0, no waiting occurs.",
         status="prototype")
 
     torch_compile_config: Optional[TorchCompileConfig] = Field(