Skip to content

Commit 9f51b6d

Browse files
author
Zhang Haotong
committed
modular otel_trace
Signed-off-by: Zhang Haotong <[email protected]>
1 parent 3eccbe7 commit 9f51b6d

File tree

4 files changed

+42
-29
lines changed

4 files changed

+42
-29
lines changed

tensorrt_llm/executor/result.py

Lines changed: 19 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,7 @@
1111
import torch
1212
import torch.nn.functional as F
1313

14-
from tensorrt_llm.llmapi.otel_tracing import (
15-
SpanAttributes, SpanKind, extract_trace_context, global_otlp_tracer,
16-
insufficient_request_metrics_warning)
14+
from tensorrt_llm.llmapi import tracing
1715

1816
from .._utils import nvtx_range_debug
1917
from ..bindings import executor as tllm
@@ -400,23 +398,23 @@ def do_tracing(
400398
output: CompletionOutput,
401399
req_perf_metrics_dict: Optional[dict[str, float]] = None,
402400
):
403-
if not global_otlp_tracer():
401+
if not tracing.global_otlp_tracer():
404402
return
405403

406404
metrics_dict = self.metrics_dict
407405
if not metrics_dict or not req_perf_metrics_dict:
408406
# Insufficient request metrics available; trace generation aborted.
409-
insufficient_request_metrics_warning()
407+
tracing.insufficient_request_metrics_warning()
410408
return
411409

412-
trace_context = extract_trace_context(self.trace_headers)
410+
trace_context = tracing.extract_trace_context(self.trace_headers)
413411
sampling_params = self.sampling_params
414412

415413
# TODO: Add request arrival time
416414
arrival_time = time.time() - metrics_dict.get(MetricNames.E2E, -1)
417-
with global_otlp_tracer().start_as_current_span(
415+
with tracing.global_otlp_tracer().start_as_current_span(
418416
"llm_request",
419-
kind=SpanKind.SERVER,
417+
kind=tracing.SpanKind.SERVER,
420418
context=trace_context,
421419
start_time=int(arrival_time * 1e9),
422420
) as span:
@@ -428,38 +426,41 @@ def safe_set_attr(span, attr, value):
428426
e2e_time = metrics_dict.get(MetricNames.E2E, -1)
429427
safe_set_attr(
430428
span,
431-
SpanAttributes.GEN_AI_REQUEST_TEMPERATURE,
429+
tracing.SpanAttributes.GEN_AI_REQUEST_TEMPERATURE,
432430
sampling_params.temperature,
433431
)
434-
safe_set_attr(span, SpanAttributes.GEN_AI_REQUEST_TOP_P,
432+
safe_set_attr(span, tracing.SpanAttributes.GEN_AI_REQUEST_TOP_P,
435433
sampling_params.top_p)
436434
safe_set_attr(
437435
span,
438-
SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS,
436+
tracing.SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS,
439437
sampling_params.max_tokens,
440438
)
441-
safe_set_attr(span, SpanAttributes.GEN_AI_REQUEST_N,
439+
safe_set_attr(span, tracing.SpanAttributes.GEN_AI_REQUEST_N,
442440
sampling_params.n)
443441
# TODO: Add prompt info in result base
444442
safe_set_attr(
445443
span,
446-
SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS,
444+
tracing.SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS,
447445
getattr(self.postproc_params.postproc_args, "num_prompt_tokens",
448446
None) if self.postproc_params
449447
and self.postproc_params.postproc_args else None,
450448
)
451-
safe_set_attr(span, SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS,
449+
safe_set_attr(span,
450+
tracing.SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS,
452451
output.length)
453452
safe_set_attr(
454453
span,
455-
SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN,
454+
tracing.SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN,
456455
metrics_dict.get(MetricNames.TTFT, -1),
457456
)
458-
safe_set_attr(span, SpanAttributes.GEN_AI_LATENCY_E2E, e2e_time)
459-
safe_set_attr(span, SpanAttributes.GEN_AI_REQUEST_ID, self.id)
457+
safe_set_attr(span, tracing.SpanAttributes.GEN_AI_LATENCY_E2E,
458+
e2e_time)
459+
safe_set_attr(span, tracing.SpanAttributes.GEN_AI_REQUEST_ID,
460+
self.id)
460461
safe_set_attr(
461462
span,
462-
SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE,
463+
tracing.SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE,
463464
metrics_dict.get(MetricNames.REQUEST_QUEUE_TIME, -1),
464465
)
465466

tensorrt_llm/llmapi/llm.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from tensorrt_llm.inputs.data import TextPrompt
1717
from tensorrt_llm.inputs.multimodal import MultimodalParams
1818
from tensorrt_llm.inputs.registry import DefaultInputProcessor
19-
from tensorrt_llm.llmapi.otel_tracing import init_tracer
19+
from tensorrt_llm.llmapi import tracing
2020

2121
from .._utils import nvtx_range_debug
2222
from ..bindings import executor as tllm
@@ -213,7 +213,7 @@ def __init__(self,
213213

214214
try:
215215
if self.args.otlp_traces_endpoint:
216-
init_tracer("trt.llm", self.args.otlp_traces_endpoint)
216+
tracing.init_tracer("trt.llm", self.args.otlp_traces_endpoint)
217217
logger.info(
218218
f"Initialized OTLP tracer successfully, endpoint: {self.args.otlp_traces_endpoint}"
219219
)

tensorrt_llm/llmapi/otel_tracing.py renamed to tensorrt_llm/llmapi/tracing.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,27 @@
11
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
22

3+
__all__ = [
4+
'SpanAttributes',
5+
'contains_trace_headers',
6+
'extract_trace_context',
7+
'extract_trace_headers',
8+
'get_span_exporter',
9+
'global_otlp_tracer',
10+
'init_tracer',
11+
'insufficient_request_metrics_warning',
12+
'is_otel_available',
13+
'is_tracing_enabled',
14+
'log_tracing_disabled_warning',
15+
]
16+
317
import os
418
from collections.abc import Mapping
519
from typing import Optional
620

721
from tensorrt_llm._utils import run_once
822
from tensorrt_llm.logger import logger
923

24+
# Adapted from https://github.com/vllm-project/vllm/blob/v0.10.0rc1/vllm/tracing.py#L11
1025
TRACE_HEADERS = ["traceparent", "tracestate"]
1126

1227
_global_tracer_ = None
@@ -19,7 +34,7 @@
1934
OTEL_EXPORTER_OTLP_TRACES_PROTOCOL
2035
from opentelemetry.sdk.trace import TracerProvider
2136
from opentelemetry.sdk.trace.export import BatchSpanProcessor
22-
from opentelemetry.trace import SpanKind, Tracer, set_tracer_provider
37+
from opentelemetry.trace import Tracer, set_tracer_provider
2338
from opentelemetry.trace.propagation.tracecontext import \
2439
TraceContextTextMapPropagator
2540

tensorrt_llm/serve/openai_server.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,9 @@
2626
from tensorrt_llm.inputs import prompt_inputs
2727
from tensorrt_llm.inputs.utils import ConversationMessage, apply_chat_template
2828
from tensorrt_llm.llmapi import DisaggregatedParams as LlmDisaggregatedParams
29+
from tensorrt_llm.llmapi import tracing
2930
from tensorrt_llm.llmapi.disagg_utils import MetadataServerConfig, ServerRole
3031
from tensorrt_llm.llmapi.llm import RequestOutput
31-
from tensorrt_llm.llmapi.otel_tracing import (contains_trace_headers,
32-
extract_trace_headers,
33-
is_tracing_enabled,
34-
log_tracing_disabled_warning)
3532
from tensorrt_llm.logger import logger
3633
from tensorrt_llm.metrics.collector import MetricsCollector
3734
from tensorrt_llm.serve.chat_utils import (check_multiple_response,
@@ -529,8 +526,8 @@ async def _get_trace_headers(
529526
self,
530527
headers: Headers,
531528
) -> Optional[Mapping[str, str]]:
532-
if is_tracing_enabled():
533-
return extract_trace_headers(headers)
534-
if contains_trace_headers(headers):
535-
log_tracing_disabled_warning()
529+
if tracing.is_tracing_enabled():
530+
return tracing.extract_trace_headers(headers)
531+
if tracing.contains_trace_headers(headers):
532+
tracing.log_tracing_disabled_warning()
536533
return None

0 commit comments

Comments
 (0)