Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion model-engine/model_engine_server/common/datadog_utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
from typing import Optional

from ddtrace import tracer


def add_trace_request_id(request_id: str):
def add_trace_request_id(request_id: Optional[str]):
"""Adds a custom tag to a given dd trace corresponding to the request id
so that we can filter in Datadog easier
"""
if not request_id:
return

current_span = tracer.current_span()
if current_span:
current_span.set_tag("launch.request_id", request_id)
4 changes: 2 additions & 2 deletions model-engine/model_engine_server/common/dtos/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ class CompletionSyncV1Response(BaseModel):
Response object for a synchronous prompt completion task.
"""

request_id: str
request_id: Optional[str]
output: Optional[CompletionOutput] = None


Expand Down Expand Up @@ -273,7 +273,7 @@ class CompletionStreamV1Response(BaseModel):
Response object for a stream prompt completion task.
"""

request_id: str
request_id: Optional[str]
output: Optional[CompletionStreamOutput] = None
error: Optional[StreamError] = None
"""Error of the response (if any)."""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import os
from dataclasses import asdict
from typing import Any, AsyncIterable, Dict, List, Optional, Union
from uuid import uuid4

from model_engine_server.common.config import hmi_config
from model_engine_server.common.dtos.llms import (
Expand All @@ -35,7 +34,12 @@
from model_engine_server.common.dtos.tasks import SyncEndpointPredictV1Request, TaskStatus
from model_engine_server.common.resource_limits import validate_resource_requests
from model_engine_server.core.auth.authentication_repository import User
from model_engine_server.core.loggers import logger_name, make_logger
from model_engine_server.core.loggers import (
LoggerTagKey,
LoggerTagManager,
logger_name,
make_logger,
)
from model_engine_server.domain.entities import (
LLMInferenceFramework,
LLMMetadata,
Expand Down Expand Up @@ -1448,7 +1452,7 @@ async def execute(
ObjectNotAuthorizedException: If the owner does not own the model endpoint.
"""

request_id = str(uuid4())
request_id = LoggerTagManager.get(LoggerTagKey.REQUEST_ID)
add_trace_request_id(request_id)

model_endpoints = await self.llm_model_endpoint_service.list_llm_model_endpoints(
Expand Down Expand Up @@ -1736,7 +1740,7 @@ async def execute(
ObjectNotAuthorizedException: If the owner does not own the model endpoint.
"""

request_id = str(uuid4())
request_id = LoggerTagManager.get(LoggerTagKey.REQUEST_ID)
add_trace_request_id(request_id)

model_endpoints = await self.llm_model_endpoint_service.list_llm_model_endpoints(
Expand Down
3 changes: 0 additions & 3 deletions model-engine/tests/unit/domain/test_llm_use_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,6 @@ async def test_completion_stream_use_case_success(
output_texts = ["I", " am", " a", " new", "bie", ".", "I am a newbie."]
i = 0
async for message in response_1:
assert message.dict()["request_id"]
assert message.dict()["output"]["text"] == output_texts[i]
if i == 6:
assert message.dict()["output"]["num_prompt_tokens"] == 7
Expand Down Expand Up @@ -1016,7 +1015,6 @@ async def test_completion_stream_text_generation_inference_use_case_success(
output_texts = ["I", " am", " a", " new", "bie", ".", "I am a newbie."]
i = 0
async for message in response_1:
assert message.dict()["request_id"]
assert message.dict()["output"]["text"] == output_texts[i]
if i == 5:
assert message.dict()["output"]["num_prompt_tokens"] == 7
Expand Down Expand Up @@ -1079,7 +1077,6 @@ async def test_completion_stream_trt_llm_use_case_success(
output_texts = ["Machine", "learning", "is", "a", "branch"]
i = 0
async for message in response_1:
assert message.dict()["request_id"]
assert message.dict()["output"]["text"] == output_texts[i]
assert message.dict()["output"]["num_prompt_tokens"] == 7
assert message.dict()["output"]["num_completion_tokens"] == i + 1
Expand Down