1111import torch
1212import torch .nn .functional as F
1313
14- from tensorrt_llm .llmapi .otel_tracing import (
15- SpanAttributes , SpanKind , extract_trace_context , global_otlp_tracer ,
16- insufficient_request_metrics_warning )
14+ from tensorrt_llm .llmapi import tracing
1715
1816from .._utils import nvtx_range_debug
1917from ..bindings import executor as tllm
@@ -400,23 +398,23 @@ def do_tracing(
400398 output : CompletionOutput ,
401399 req_perf_metrics_dict : Optional [dict [str , float ]] = None ,
402400 ):
403- if not global_otlp_tracer ():
401+ if not tracing . global_otlp_tracer ():
404402 return
405403
406404 metrics_dict = self .metrics_dict
407405 if not metrics_dict or not req_perf_metrics_dict :
408406 # Insufficient request metrics available; trace generation aborted.
409- insufficient_request_metrics_warning ()
407+ tracing . insufficient_request_metrics_warning ()
410408 return
411409
412- trace_context = extract_trace_context (self .trace_headers )
410+ trace_context = tracing . extract_trace_context (self .trace_headers )
413411 sampling_params = self .sampling_params
414412
415413 # TODO: Add request arrival time
416414 arrival_time = time .time () - metrics_dict .get (MetricNames .E2E , - 1 )
417- with global_otlp_tracer ().start_as_current_span (
415+ with tracing . global_otlp_tracer ().start_as_current_span (
418416 "llm_request" ,
419- kind = SpanKind .SERVER ,
417+ kind = tracing . SpanKind .SERVER ,
420418 context = trace_context ,
421419 start_time = int (arrival_time * 1e9 ),
422420 ) as span :
@@ -428,38 +426,41 @@ def safe_set_attr(span, attr, value):
428426 e2e_time = metrics_dict .get (MetricNames .E2E , - 1 )
429427 safe_set_attr (
430428 span ,
431- SpanAttributes .GEN_AI_REQUEST_TEMPERATURE ,
429+ tracing . SpanAttributes .GEN_AI_REQUEST_TEMPERATURE ,
432430 sampling_params .temperature ,
433431 )
434- safe_set_attr (span , SpanAttributes .GEN_AI_REQUEST_TOP_P ,
432+ safe_set_attr (span , tracing . SpanAttributes .GEN_AI_REQUEST_TOP_P ,
435433 sampling_params .top_p )
436434 safe_set_attr (
437435 span ,
438- SpanAttributes .GEN_AI_REQUEST_MAX_TOKENS ,
436+ tracing . SpanAttributes .GEN_AI_REQUEST_MAX_TOKENS ,
439437 sampling_params .max_tokens ,
440438 )
441- safe_set_attr (span , SpanAttributes .GEN_AI_REQUEST_N ,
439+ safe_set_attr (span , tracing . SpanAttributes .GEN_AI_REQUEST_N ,
442440 sampling_params .n )
443441 # TODO: Add prompt info in result base
444442 safe_set_attr (
445443 span ,
446- SpanAttributes .GEN_AI_USAGE_PROMPT_TOKENS ,
444+ tracing . SpanAttributes .GEN_AI_USAGE_PROMPT_TOKENS ,
447445 getattr (self .postproc_params .postproc_args , "num_prompt_tokens" ,
448446 None ) if self .postproc_params
449447 and self .postproc_params .postproc_args else None ,
450448 )
451- safe_set_attr (span , SpanAttributes .GEN_AI_USAGE_COMPLETION_TOKENS ,
449+ safe_set_attr (span ,
450+ tracing .SpanAttributes .GEN_AI_USAGE_COMPLETION_TOKENS ,
452451 output .length )
453452 safe_set_attr (
454453 span ,
455- SpanAttributes .GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN ,
454+ tracing . SpanAttributes .GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN ,
456455 metrics_dict .get (MetricNames .TTFT , - 1 ),
457456 )
458- safe_set_attr (span , SpanAttributes .GEN_AI_LATENCY_E2E , e2e_time )
459- safe_set_attr (span , SpanAttributes .GEN_AI_REQUEST_ID , self .id )
457+ safe_set_attr (span , tracing .SpanAttributes .GEN_AI_LATENCY_E2E ,
458+ e2e_time )
459+ safe_set_attr (span , tracing .SpanAttributes .GEN_AI_REQUEST_ID ,
460+ self .id )
460461 safe_set_attr (
461462 span ,
462- SpanAttributes .GEN_AI_LATENCY_TIME_IN_QUEUE ,
463+ tracing . SpanAttributes .GEN_AI_LATENCY_TIME_IN_QUEUE ,
463464 metrics_dict .get (MetricNames .REQUEST_QUEUE_TIME , - 1 ),
464465 )
465466
0 commit comments