Skip to content

Commit 8d6a1d1

Browse files
ShunkangShunkang
authored andcommitted
Change var name
Signed-off-by: Shunkang <[email protected]>
1 parent d9d1734 commit 8d6a1d1

File tree

7 files changed

+23
-23
lines changed

7 files changed

+23
-23
lines changed

tensorrt_llm/_torch/auto_deploy/shim/ad_executor.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ def __init__(
136136
self.pytorch_backend_config.attention_dp_enable_balance = False
137137
self.pytorch_backend_config.attention_dp_time_out_iters = 50
138138
self.pytorch_backend_config.attention_dp_batching_wait_iters = 10
139-
self.pytorch_backend_config.batch_wait_timeout = 0
139+
self.pytorch_backend_config.batch_wait_timeout_ms = 0
140140
self.iter_counter = 0
141141

142142
# NOTE (lucaslie): not a declared base member in the base class; required by PyExecutor...

tensorrt_llm/_torch/pyexecutor/config.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class PyTorchConfig:
5050
attention_dp_time_out_iters: int = 50
5151
attention_dp_batching_wait_iters: int = 10
5252

53-
batch_wait_timeout: float = 0
53+
batch_wait_timeout_ms: float = 0
5454

5555
attn_backend: str = 'TRTLLM'
5656
moe_backend: str = 'CUTLASS'

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ class ExecutorRequestQueue:
4545
def __init__(self, dist: Distributed, enable_attention_dp: bool,
4646
max_batch_size: int, max_beam_width: int,
4747
max_num_active_requests: int, enable_iter_perf_stats: bool,
48-
batch_wait_timeout: float, is_disaggregated: bool):
48+
batch_wait_timeout_ms: float, is_disaggregated: bool):
4949
self.dist = dist
5050
self.request_queue: queue.Queue[RequestQueueItem] = queue.Queue()
5151
self.waiting_queue: deque[RequestQueueItem] = deque()
@@ -60,7 +60,7 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool,
6060
self.enable_iter_perf_stats = enable_iter_perf_stats
6161
self.start_times = {}
6262
self.active = True
63-
self.batch_wait_timeout = batch_wait_timeout
63+
self.batch_wait_timeout_ms = batch_wait_timeout_ms
6464

6565
# State tracking
6666
self.num_fetch_requests = 0
@@ -90,13 +90,13 @@ def _get_from_request_queue(
9090
except queue.Empty:
9191
pass
9292

93-
if self.batch_wait_timeout == 0:
93+
if self.batch_wait_timeout_ms == 0:
9494
return items
9595

9696
if len(items) >= self.max_batch_size:
9797
return items
9898

99-
deadline = time.monotonic() + self.batch_wait_timeout
99+
deadline = time.monotonic() + self.batch_wait_timeout_ms / 1000.0
100100
while len(items) < self.max_batch_size:
101101
remaining_timeout = deadline - time.monotonic()
102102

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def __init__(self,
187187
self.attention_dp_enable_balance = model_engine.pytorch_backend_config.attention_dp_enable_balance
188188
self.attention_dp_time_out_iters = model_engine.pytorch_backend_config.attention_dp_time_out_iters
189189
self.attention_dp_batching_wait_iters = model_engine.pytorch_backend_config.attention_dp_batching_wait_iters
190-
self.batch_wait_timeout = model_engine.pytorch_backend_config.batch_wait_timeout
190+
self.batch_wait_timeout_ms = model_engine.pytorch_backend_config.batch_wait_timeout_ms
191191
self.num_fetch_requests_cur_rank = 0
192192
self.num_fetch_requests = 0
193193
self.shutdown_event = threading.Event()
@@ -238,7 +238,7 @@ def __init__(self,
238238
max_beam_width=self.max_beam_width,
239239
max_num_active_requests=self.max_num_active_requests,
240240
enable_iter_perf_stats=self.enable_iter_perf_stats,
241-
batch_wait_timeout=self.batch_wait_timeout,
241+
batch_wait_timeout_ms=self.batch_wait_timeout_ms,
242242
is_disaggregated=kv_cache_transceiver is not None,
243243
)
244244
self.executor_request_queue.set_exclude_last_generation_logits(

tensorrt_llm/llmapi/llm_args.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2076,11 +2076,11 @@ class TorchLlmArgs(BaseLlmArgs):
20762076
description="Print iteration logs.",
20772077
status="beta")
20782078

2079-
batch_wait_timeout: float = Field(
2079+
batch_wait_timeout_ms: float = Field(
20802080
default=0,
20812081
description=
20822082
"If greater than 0, returns immediately when fetched requests exceed max_batch_size; "
2083-
"otherwise, waits up to batch_wait_timeout to gather more. If 0, no waiting occurs.",
2083+
"otherwise, waits up to batch_wait_timeout_ms to gather more. If 0, no waiting occurs.",
20842084
status="prototype")
20852085

20862086
torch_compile_config: Optional[TorchCompileConfig] = Field(
@@ -2330,10 +2330,10 @@ def validate_attention_dp_config(self) -> 'TorchLlmArgs':
23302330
return self
23312331

23322332
@model_validator(mode='after')
2333-
def validate_batch_wait_timeout(self) -> 'TorchLlmArgs':
2333+
def validate_batch_wait_timeout_ms(self) -> 'TorchLlmArgs':
23342334
"""Validate batch wait timeout."""
2335-
if self.batch_wait_timeout < 0:
2336-
raise ValueError("batch_wait_timeout must be greater than 0")
2335+
if self.batch_wait_timeout_ms < 0:
2336+
raise ValueError("batch_wait_timeout_ms must be greater than 0")
23372337
return self
23382338

23392339
# TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
@@ -2398,7 +2398,7 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
23982398
attention_dp_batching_wait_iters=self.attention_dp_config.
23992399
batching_wait_iters if self.attention_dp_config is not None else
24002400
AttentionDpConfig.model_fields['batching_wait_iters'].default,
2401-
batch_wait_timeout=self.batch_wait_timeout)
2401+
batch_wait_timeout_ms=self.batch_wait_timeout_ms)
24022402

24032403

24042404
def update_llm_args_with_extra_dict(

tests/unittest/_torch/test_executor_request_queue.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def executor_queue(mock_dist):
4040
max_beam_width=1,
4141
max_num_active_requests=16,
4242
enable_iter_perf_stats=True,
43-
batch_wait_timeout=0.0,
43+
batch_wait_timeout_ms=0.0,
4444
is_disaggregated=False)
4545

4646

@@ -53,7 +53,7 @@ def integration_queue(mock_dist):
5353
max_beam_width=2,
5454
max_num_active_requests=8,
5555
enable_iter_perf_stats=True,
56-
batch_wait_timeout=0.0,
56+
batch_wait_timeout_ms=0.0,
5757
is_disaggregated=False)
5858

5959

@@ -228,8 +228,8 @@ def add_requests_after_delay(delay, num_requests):
228228
item = RequestQueueItem(i + 10, Mock())
229229
executor_queue.request_queue.put(item)
230230

231-
# Test 1: Without batch_wait_timeout (should only get initial requests)
232-
executor_queue.batch_wait_timeout = 0.0
231+
# Test 1: Without batch_wait_timeout_ms (should only get initial requests)
232+
executor_queue.batch_wait_timeout_ms = 0.0
233233

234234
initial_requests = 3
235235
for i in range(initial_requests):
@@ -250,8 +250,8 @@ def add_requests_after_delay(delay, num_requests):
250250

251251
thread.join()
252252

253-
# Test 2: With batch_wait_timeout (should wait and get all requests)
254-
executor_queue.batch_wait_timeout = 0.2
253+
# Test 2: With batch_wait_timeout_ms (should wait and get all requests)
254+
executor_queue.batch_wait_timeout_ms = 200.0
255255

256256
# Clear the queue and add initial requests again
257257
while not executor_queue.request_queue.empty():
@@ -268,7 +268,7 @@ def add_requests_after_delay(delay, num_requests):
268268
thread = threading.Thread(target=add_requests_after_delay, args=(0.05, 3))
269269
thread.start()
270270

271-
# Get requests with batch_wait_timeout - should wait and get all
271+
# Get requests with batch_wait_timeout_ms - should wait and get all
272272
start_time = time.time()
273273
items = executor_queue._get_from_request_queue(None)
274274
elapsed = time.time() - start_time
@@ -442,7 +442,7 @@ def attention_dp_queue(mock_dist_attention_dp):
442442
max_beam_width=2,
443443
max_num_active_requests=8,
444444
enable_iter_perf_stats=True,
445-
batch_wait_timeout=0.0,
445+
batch_wait_timeout_ms=0.0,
446446
is_disaggregated=False)
447447
# Initialize all_ranks_num_active_requests
448448
return queue

tests/unittest/api_stability/references/llm.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ methods:
123123
annotation: bool
124124
default: False
125125
status: prototype
126-
batch_wait_timeout:
126+
batch_wait_timeout_ms:
127127
annotation: float
128128
default: 0
129129
status: prototype

0 commit comments

Comments
 (0)