Skip to content

Commit 89889fb

Browse files
authored
[https://nvbugs/5369366] [fix] Report failing requests (#7060)
Signed-off-by: Rashid Kaleem <[email protected]>
1 parent 08a0e06 commit 89889fb

File tree

2 files changed

+30
-4
lines changed

2 files changed

+30
-4
lines changed

tensorrt_llm/serve/scripts/backend_request_func.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ class RequestFuncOutput:
4646
prompt_len: int = 0
4747
error: str = ""
4848
avg_decoded_tokens_per_iter: float = 0.0 # Average tokens decoded per iteration
49+
exception_type: str = None # unset
4950

5051

5152
async def async_request_trt_llm(
@@ -132,10 +133,11 @@ async def async_request_trt_llm(
132133
else:
133134
output.error = response.reason or ""
134135
output.success = False
135-
except Exception:
136+
except Exception as e:
136137
output.success = False
137138
exc_info = sys.exc_info()
138139
output.error = "".join(traceback.format_exception(*exc_info))
140+
output.exception_type = e.__class__.__name__
139141
finally:
140142
if session is None:
141143
await request_session.close()
@@ -259,12 +261,14 @@ async def async_request_openai_completions(
259261
output.avg_decoded_tokens_per_iter = choice[
260262
"avg_decoded_tokens_per_iter"]
261263
else:
264+
print(f"HTTP Error {response.status}: {response}")
262265
output.error = response.reason or ""
263266
output.success = False
264-
except Exception:
267+
except Exception as e:
265268
output.success = False
266269
exc_info = sys.exc_info()
267270
output.error = "".join(traceback.format_exception(*exc_info))
271+
output.exception_type = e.__class__.__name__
268272
finally:
269273
if session is None:
270274
await request_session.close()
@@ -392,12 +396,14 @@ async def async_request_openai_chat_completions(
392396
"avg_decoded_tokens_per_iter"]
393397

394398
else:
399+
# TODO: Need to store the status code to debug and report
395400
output.error = response.reason or ""
396401
output.success = False
397-
except Exception:
402+
except Exception as e:
398403
output.success = False
399404
exc_info = sys.exc_info()
400405
output.error = "".join(traceback.format_exception(*exc_info))
406+
output.exception_type = e.__class__.__name__
401407
finally:
402408
if session is None:
403409
await request_session.close()

tensorrt_llm/serve/scripts/benchmark_serving.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,12 @@ def calculate_metrics(
144144
e2els: list[float] = []
145145
tput_user: list[float] = []
146146
latest_avg_decoded_tokens_per_iter: float = 0.0
147+
error_counts: dict[str, int] = {}
147148
for i in range(len(outputs)):
149+
if outputs[i].exception_type:
150+
exception_type = outputs[i].exception_type
151+
error_counts[exception_type] = error_counts.get(exception_type,
152+
0) + 1
148153
if outputs[i].success:
149154
output_len = outputs[i].output_tokens
150155
if not output_len:
@@ -179,6 +184,11 @@ def calculate_metrics(
179184
else:
180185
actual_output_lens.append(0)
181186

187+
total_error_count = sum(error_counts.values())
188+
for exception_type, count in error_counts.items():
189+
print(f"Error type: {exception_type}, Count: {count} requests")
190+
print(f"Total failed requests: {total_error_count}")
191+
182192
if goodput_config_dict:
183193
valid_metrics = []
184194
slo_values = []
@@ -336,7 +346,8 @@ async def benchmark(
336346
print(f"Burstiness factor: {burstiness} ({distribution})")
337347
print(f"Maximum request concurrency: {max_concurrency}")
338348

339-
pbar = None if disable_tqdm else tqdm(total=len(input_requests))
349+
pbar = None if disable_tqdm else tqdm(total=len(input_requests),
350+
desc="Benchmarking")
340351

341352
# This can be used once the minimum Python version is 3.10 or higher,
342353
# and it will simplify the code in limited_request_func.
@@ -433,7 +444,10 @@ async def limited_request_func(request_func_input, streaming, pbar,
433444
)
434445

435446
print("{s:{c}^{n}}".format(s=' Serving Benchmark Result ', n=50, c='='))
447+
print("{:<40} {:<10}".format("Total requests:", len(outputs)))
436448
print("{:<40} {:<10}".format("Successful requests:", metrics.completed))
449+
print("{:<40} {:<10}".format("Failed requests:",
450+
len(outputs) - metrics.completed))
437451
print("{:<40} {:<10.2f}".format("Benchmark duration (s):",
438452
benchmark_duration))
439453
print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input))
@@ -455,6 +469,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
455469
if metrics.avg_decoded_tokens_per_iter > 0.0:
456470
print("{:<40} {:<10.2f}".format("Avg Decoded Tokens per Iter:",
457471
metrics.avg_decoded_tokens_per_iter))
472+
if len(outputs) - metrics.completed > 0:
473+
print(
474+
f"=======================!FAILED REQUESTS!=======================")
475+
print(f"Total failed requests: {len(outputs) - metrics.completed}")
476+
print(
477+
f"=====================!CHECK LOG FOR ERRORS!====================")
458478

459479
result = {
460480
"duration": benchmark_duration,

0 commit comments

Comments
 (0)