Change var name

Shunkang · Shunkang · commit 8d6a1d15d9d4 · 2025-08-18T02:42:39.000Z
Signed-off-by: Shunkang &lt;182541032+Shunkangz@users.noreply.github.co&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py b/tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py
@@ -136,7 +136,7 @@ def __init__(
         self.pytorch_backend_config.attention_dp_enable_balance = False
         self.pytorch_backend_config.attention_dp_time_out_iters = 50
         self.pytorch_backend_config.attention_dp_batching_wait_iters = 10
-        self.pytorch_backend_config.batch_wait_timeout = 0
+        self.pytorch_backend_config.batch_wait_timeout_ms = 0
         self.iter_counter = 0
 
         # NOTE (lucaslie): not a declared base member in the base class; required by PyExecutor...
diff --git a/tensorrt_llm/_torch/pyexecutor/config.py b/tensorrt_llm/_torch/pyexecutor/config.py
@@ -50,7 +50,7 @@ class PyTorchConfig:
     attention_dp_time_out_iters: int = 50
     attention_dp_batching_wait_iters: int = 10
 
-    batch_wait_timeout: float = 0
+    batch_wait_timeout_ms: float = 0
 
     attn_backend: str = 'TRTLLM'
     moe_backend: str = 'CUTLASS'
diff --git a/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py b/tensorrt_llm/_torch/pyexecutor/executor_request_queue.py
@@ -45,7 +45,7 @@ class ExecutorRequestQueue:
     def __init__(self, dist: Distributed, enable_attention_dp: bool,
                  max_batch_size: int, max_beam_width: int,
                  max_num_active_requests: int, enable_iter_perf_stats: bool,
-                 batch_wait_timeout: float, is_disaggregated: bool):
+                 batch_wait_timeout_ms: float, is_disaggregated: bool):
         self.dist = dist
         self.request_queue: queue.Queue[RequestQueueItem] = queue.Queue()
         self.waiting_queue: deque[RequestQueueItem] = deque()
@@ -60,7 +60,7 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool,
         self.enable_iter_perf_stats = enable_iter_perf_stats
         self.start_times = {}
         self.active = True
-        self.batch_wait_timeout = batch_wait_timeout
+        self.batch_wait_timeout_ms = batch_wait_timeout_ms
 
         # State tracking
         self.num_fetch_requests = 0
@@ -90,13 +90,13 @@ def _get_from_request_queue(
         except queue.Empty:
             pass
 
-        if self.batch_wait_timeout == 0:
+        if self.batch_wait_timeout_ms == 0:
             return items
 
         if len(items) >= self.max_batch_size:
             return items
 
-        deadline = time.monotonic() + self.batch_wait_timeout
+        deadline = time.monotonic() + self.batch_wait_timeout_ms / 1000.0
         while len(items) < self.max_batch_size:
             remaining_timeout = deadline - time.monotonic()
 
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor.py b/tensorrt_llm/_torch/pyexecutor/py_executor.py
@@ -187,7 +187,7 @@ def __init__(self,
         self.attention_dp_enable_balance = model_engine.pytorch_backend_config.attention_dp_enable_balance
         self.attention_dp_time_out_iters = model_engine.pytorch_backend_config.attention_dp_time_out_iters
         self.attention_dp_batching_wait_iters = model_engine.pytorch_backend_config.attention_dp_batching_wait_iters
-        self.batch_wait_timeout = model_engine.pytorch_backend_config.batch_wait_timeout
+        self.batch_wait_timeout_ms = model_engine.pytorch_backend_config.batch_wait_timeout_ms
         self.num_fetch_requests_cur_rank = 0
         self.num_fetch_requests = 0
         self.shutdown_event = threading.Event()
@@ -238,7 +238,7 @@ def __init__(self,
             max_beam_width=self.max_beam_width,
             max_num_active_requests=self.max_num_active_requests,
             enable_iter_perf_stats=self.enable_iter_perf_stats,
-            batch_wait_timeout=self.batch_wait_timeout,
+            batch_wait_timeout_ms=self.batch_wait_timeout_ms,
             is_disaggregated=kv_cache_transceiver is not None,
         )
         self.executor_request_queue.set_exclude_last_generation_logits(
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -2076,11 +2076,11 @@ class TorchLlmArgs(BaseLlmArgs):
                                  description="Print iteration logs.",
                                  status="beta")
 
-    batch_wait_timeout: float = Field(
+    batch_wait_timeout_ms: float = Field(
         default=0,
         description=
         "If greater than 0, returns immediately when fetched requests exceed max_batch_size; "
-        "otherwise, waits up to batch_wait_timeout to gather more. If 0, no waiting occurs.",
+        "otherwise, waits up to batch_wait_timeout_ms to gather more. If 0, no waiting occurs.",
         status="prototype")
 
     torch_compile_config: Optional[TorchCompileConfig] = Field(
@@ -2330,10 +2330,10 @@ def validate_attention_dp_config(self) -> 'TorchLlmArgs':
         return self
 
     @model_validator(mode='after')
-    def validate_batch_wait_timeout(self) -> 'TorchLlmArgs':
+    def validate_batch_wait_timeout_ms(self) -> 'TorchLlmArgs':
         """Validate batch wait timeout."""
-        if self.batch_wait_timeout < 0:
-            raise ValueError("batch_wait_timeout must be greater than 0")
+        if self.batch_wait_timeout_ms < 0:
+            raise ValueError("batch_wait_timeout_ms must be greater than 0")
         return self
 
     # TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
@@ -2398,7 +2398,7 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
             attention_dp_batching_wait_iters=self.attention_dp_config.
             batching_wait_iters if self.attention_dp_config is not None else
             AttentionDpConfig.model_fields['batching_wait_iters'].default,
-            batch_wait_timeout=self.batch_wait_timeout)
+            batch_wait_timeout_ms=self.batch_wait_timeout_ms)
 
 
 def update_llm_args_with_extra_dict(
diff --git a/tests/unittest/_torch/test_executor_request_queue.py b/tests/unittest/_torch/test_executor_request_queue.py
@@ -40,7 +40,7 @@ def executor_queue(mock_dist):
                                 max_beam_width=1,
                                 max_num_active_requests=16,
                                 enable_iter_perf_stats=True,
-                                batch_wait_timeout=0.0,
+                                batch_wait_timeout_ms=0.0,
                                 is_disaggregated=False)
 
 
@@ -53,7 +53,7 @@ def integration_queue(mock_dist):
                                 max_beam_width=2,
                                 max_num_active_requests=8,
                                 enable_iter_perf_stats=True,
-                                batch_wait_timeout=0.0,
+                                batch_wait_timeout_ms=0.0,
                                 is_disaggregated=False)
 
 
@@ -228,8 +228,8 @@ def add_requests_after_delay(delay, num_requests):
             item = RequestQueueItem(i + 10, Mock())
             executor_queue.request_queue.put(item)
 
-    # Test 1: Without batch_wait_timeout (should only get initial requests)
-    executor_queue.batch_wait_timeout = 0.0
+    # Test 1: Without batch_wait_timeout_ms (should only get initial requests)
+    executor_queue.batch_wait_timeout_ms = 0.0
 
     initial_requests = 3
     for i in range(initial_requests):
@@ -250,8 +250,8 @@ def add_requests_after_delay(delay, num_requests):
 
     thread.join()
 
-    # Test 2: With batch_wait_timeout (should wait and get all requests)
-    executor_queue.batch_wait_timeout = 0.2
+    # Test 2: With batch_wait_timeout_ms (should wait and get all requests)
+    executor_queue.batch_wait_timeout_ms = 200.0
 
     # Clear the queue and add initial requests again
     while not executor_queue.request_queue.empty():
@@ -268,7 +268,7 @@ def add_requests_after_delay(delay, num_requests):
     thread = threading.Thread(target=add_requests_after_delay, args=(0.05, 3))
     thread.start()
 
-    # Get requests with batch_wait_timeout - should wait and get all
+    # Get requests with batch_wait_timeout_ms - should wait and get all
     start_time = time.time()
     items = executor_queue._get_from_request_queue(None)
     elapsed = time.time() - start_time
@@ -442,7 +442,7 @@ def attention_dp_queue(mock_dist_attention_dp):
                                  max_beam_width=2,
                                  max_num_active_requests=8,
                                  enable_iter_perf_stats=True,
-                                 batch_wait_timeout=0.0,
+                                 batch_wait_timeout_ms=0.0,
                                  is_disaggregated=False)
     # Initialize all_ranks_num_active_requests
     return queue
diff --git a/tests/unittest/api_stability/references/llm.yaml b/tests/unittest/api_stability/references/llm.yaml
@@ -123,7 +123,7 @@ methods:
         annotation: bool
         default: False
         status: prototype
-      batch_wait_timeout:
+      batch_wait_timeout_ms:
         annotation: float
         default: 0
         status: prototype