Skip to content

Commit 1e4fa13

Browse files
authored
Add sleep function for disagg gen-only benchmarking (#5398)
Signed-off-by: Xianjie <[email protected]>
1 parent feaf789 commit 1e4fa13

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -236,6 +236,8 @@ def __init__(self,
236236
self.ctx_in_transmission_requests = []
237237
self.previous_batch: Optional[BatchState] = None
238238
self.num_scheduled_requests: int = 0
239+
self.benchmark_req_queues_size = int(
240+
os.environ.get("TLLM_BENCHMARK_REQ_QUEUES_SIZE", 0))
239241

240242
# list of requests in each PP micro batch
241243
self.num_micro_batches = self.dist.pp_size
@@ -996,6 +998,13 @@ def _prepare_draft_requests(self):
996998

997999
def _executor_loop_overlap(self):
9981000
torch.cuda.set_device(self.device_id)
1001+
if self.dist.rank == 0 and not self.is_warmup and self.benchmark_req_queues_size > 0 and self.kv_cache_transceiver:
1002+
while self.request_queue.qsize() < self.benchmark_req_queues_size:
1003+
logger.info(
1004+
f"sleep 5 seconds, num_request_queue: {self.request_queue.qsize()}"
1005+
)
1006+
time.sleep(5)
1007+
9991008
with self._profiler() as profile_step:
10001009
iter_start_time = time.time()
10011010
iter_stats = None

0 commit comments

Comments
 (0)