@@ -144,7 +144,12 @@ def calculate_metrics(
144144 e2els : list [float ] = []
145145 tput_user : list [float ] = []
146146 latest_avg_decoded_tokens_per_iter : float = 0.0
147+ error_counts : dict [str , int ] = {}
147148 for i in range (len (outputs )):
149+ if outputs [i ].exception_type :
150+ exception_type = outputs [i ].exception_type
151+ error_counts [exception_type ] = error_counts .get (exception_type ,
152+ 0 ) + 1
148153 if outputs [i ].success :
149154 output_len = outputs [i ].output_tokens
150155 if not output_len :
@@ -179,6 +184,11 @@ def calculate_metrics(
179184 else :
180185 actual_output_lens .append (0 )
181186
187+ total_error_count = sum (error_counts .values ())
188+ for exception_type , count in error_counts .items ():
189+ print (f"Error type: { exception_type } , Count: { count } requests" )
190+ print (f"Total failed requests: { total_error_count } " )
191+
182192 if goodput_config_dict :
183193 valid_metrics = []
184194 slo_values = []
@@ -336,7 +346,8 @@ async def benchmark(
336346 print (f"Burstiness factor: { burstiness } ({ distribution } )" )
337347 print (f"Maximum request concurrency: { max_concurrency } " )
338348
339- pbar = None if disable_tqdm else tqdm (total = len (input_requests ))
349+ pbar = None if disable_tqdm else tqdm (total = len (input_requests ),
350+ desc = "Benchmarking" )
340351
341352 # This can be used once the minimum Python version is 3.10 or higher,
342353 # and it will simplify the code in limited_request_func.
@@ -433,7 +444,10 @@ async def limited_request_func(request_func_input, streaming, pbar,
433444 )
434445
435446 print ("{s:{c}^{n}}" .format (s = ' Serving Benchmark Result ' , n = 50 , c = '=' ))
447+ print ("{:<40} {:<10}" .format ("Total requests:" , len (outputs )))
436448 print ("{:<40} {:<10}" .format ("Successful requests:" , metrics .completed ))
449+ print ("{:<40} {:<10}" .format ("Failed requests:" ,
450+ len (outputs ) - metrics .completed ))
437451 print ("{:<40} {:<10.2f}" .format ("Benchmark duration (s):" ,
438452 benchmark_duration ))
439453 print ("{:<40} {:<10}" .format ("Total input tokens:" , metrics .total_input ))
@@ -455,6 +469,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
455469 if metrics .avg_decoded_tokens_per_iter > 0.0 :
456470 print ("{:<40} {:<10.2f}" .format ("Avg Decoded Tokens per Iter:" ,
457471 metrics .avg_decoded_tokens_per_iter ))
472+ if len (outputs ) - metrics .completed > 0 :
473+ print (
474+ f"=======================!FAILED REQUESTS!=======================" )
475+ print (f"Total failed requests: { len (outputs ) - metrics .completed } " )
476+ print (
477+ f"=====================!CHECK LOG FOR ERRORS!====================" )
458478
459479 result = {
460480 "duration" : benchmark_duration ,
0 commit comments