File tree Expand file tree Collapse file tree 1 file changed +9
-0
lines changed
tensorrt_llm/_torch/pyexecutor Expand file tree Collapse file tree 1 file changed +9
-0
lines changed Original file line number Diff line number Diff line change @@ -236,6 +236,8 @@ def __init__(self,
236
236
self .ctx_in_transmission_requests = []
237
237
self .previous_batch : Optional [BatchState ] = None
238
238
self .num_scheduled_requests : int = 0
239
+ self .benchmark_req_queues_size = int (
240
+ os .environ .get ("TLLM_BENCHMARK_REQ_QUEUES_SIZE" , 0 ))
239
241
240
242
# list of requests in each PP micro batch
241
243
self .num_micro_batches = self .dist .pp_size
@@ -996,6 +998,13 @@ def _prepare_draft_requests(self):
996
998
997
999
def _executor_loop_overlap (self ):
998
1000
torch .cuda .set_device (self .device_id )
1001
+ if self .dist .rank == 0 and not self .is_warmup and self .benchmark_req_queues_size > 0 and self .kv_cache_transceiver :
1002
+ while self .request_queue .qsize () < self .benchmark_req_queues_size :
1003
+ logger .info (
1004
+ f"sleep 5 seconds, num_request_queue: { self .request_queue .qsize ()} "
1005
+ )
1006
+ time .sleep (5 )
1007
+
999
1008
with self ._profiler () as profile_step :
1000
1009
iter_start_time = time .time ()
1001
1010
iter_stats = None
You can’t perform that action at this time.
0 commit comments