Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
f2263b7
feat: Add --enable-log-outputs flag for logging model generations
mizadri Jul 9, 2025
4848f49
fix: Resolve type checking issues in output logging
mizadri Jul 10, 2025
8c0aa78
Fix line length violations (E501) in logger and serving_chat
mizadri Jul 10, 2025
4a10460
Fix line length violation in streaming delta comment
mizadri Jul 10, 2025
7d35afb
Run isort and ruff locally to fix pre commit hooks issue
mizadri Jul 11, 2025
fb13841
Shortened lines to meet rule of line length < 80 characters
mizadri Jul 11, 2025
42085ad
tests: fix incorrect assertion indices in log_outputs tests
mizadri Jul 11, 2025
6185d66
logger: Include full text in streaming response logs
mizadri Jul 11, 2025
e864415
openai/serving_chat: log all tool-call arguments in streaming deltas
mizadri Jul 11, 2025
75fee5b
Merge upstream/main into feature/enable-log-outputs
mizadri Jul 11, 2025
d478a40
Merge remote-tracking branch 'upstream/main' into feature/enable-log-…
mizadri Jul 12, 2025
1ed2689
Removed comments that broke the line length constraint:
mizadri Jul 12, 2025
11543a0
Fixed comment length violation
mizadri Jul 12, 2025
b41c263
Reverted unnecessary formatting changes
mizadri Jul 13, 2025
aaa2579
Run pre-commit hooks to fix a PR issue with some format changes
mizadri Jul 14, 2025
7dc36ae
Merge remote-tracking branch 'upstream/main' into feature/enable-log-…
mizadri Jul 14, 2025
84097a2
Merge upstream/main into feature/enable-log-outputs
mizadri Jul 17, 2025
ef84a25
Merge remote-tracking branch 'upstream/main' into feature/enable-log-…
mizadri Jul 30, 2025
b8790b1
Removed deprecated parameter from tests
mizadri Jul 30, 2025
02839e9
Merge remote-tracking branch 'upstream/main' into feature/enable-log-…
mizadri Aug 1, 2025
e8be061
Removed super.init call since RequestLogger doesn't inherit from any …
mizadri Aug 1, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
252 changes: 248 additions & 4 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
from json.decoder import JSONDecodeError
from tempfile import NamedTemporaryFile
from typing import Any
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from uuid import uuid4

import pytest

from vllm.entrypoints.logger import RequestLogger
from vllm.logger import (_DATE_FORMAT, _FORMAT, _configure_vllm_root_logger,
enable_trace_function_call, init_logger)
from vllm.logging_utils import NewLineFormatter
Expand Down Expand Up @@ -228,9 +229,10 @@ def test_prepare_object_to_dump():
list_obj = [1, 2, 3]
assert prepare_object_to_dump(list_obj) == '[1, 2, 3]'

dict_obj = {'a': 1, 'b': 'b'}
dict_obj = {"a": 1, "b": "b"}
assert prepare_object_to_dump(dict_obj) in [
"{a: 1, b: 'b'}", "{b: 'b', a: 1}"
"{a: 1, b: 'b'}",
"{b: 'b', a: 1}",
]

set_obj = {1, 2, 3}
Expand All @@ -252,4 +254,246 @@ class CustomClass:
b: str

assert (prepare_object_to_dump(CustomClass(
1, 'b')) == "CustomClass(a=1, b='b')")
1, "b")) == "CustomClass(a=1, b='b')")


def test_request_logger_log_outputs():
"""Test the new log_outputs functionality."""
# Create a mock logger to capture log calls
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test basic output logging
request_logger.log_outputs(
request_id="test-123",
outputs="Hello, world!",
output_token_ids=[1, 2, 3, 4],
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-123"
assert call_args[3] == "Hello, world!"
assert call_args[4] == [1, 2, 3, 4]
assert call_args[5] == "stop"


def test_request_logger_log_outputs_streaming_delta():
"""Test log_outputs with streaming delta mode."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test streaming delta logging
request_logger.log_outputs(
request_id="test-456",
outputs="Hello",
output_token_ids=[1],
finish_reason=None,
is_streaming=True,
delta=True,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-456"
assert call_args[2] == " (streaming delta)"
assert call_args[3] == "Hello"
assert call_args[4] == [1]
assert call_args[5] is None


def test_request_logger_log_outputs_streaming_complete():
"""Test log_outputs with streaming complete mode."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test streaming complete logging
request_logger.log_outputs(
request_id="test-789",
outputs="Complete response",
output_token_ids=[1, 2, 3],
finish_reason="length",
is_streaming=True,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-789"
assert call_args[2] == " (streaming complete)"
assert call_args[3] == "Complete response"
assert call_args[4] == [1, 2, 3]
assert call_args[5] == "length"


def test_request_logger_log_outputs_with_truncation():
"""Test log_outputs respects max_log_len setting."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
# Set max_log_len to 10
request_logger = RequestLogger(max_log_len=10)

# Test output truncation
long_output = "This is a very long output that should be truncated"
long_token_ids = list(range(20)) # 20 tokens

request_logger.log_outputs(
request_id="test-truncate",
outputs=long_output,
output_token_ids=long_token_ids,
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args

# Check that output was truncated to first 10 characters
logged_output = call_args[0][3]
assert logged_output == "This is a "
assert len(logged_output) == 10

# Check that token IDs were truncated to first 10 tokens
logged_token_ids = call_args[0][4]
assert logged_token_ids == list(range(10))
assert len(logged_token_ids) == 10


def test_request_logger_log_outputs_none_values():
"""Test log_outputs handles None values correctly."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test with None output_token_ids
request_logger.log_outputs(
request_id="test-none",
outputs="Test output",
output_token_ids=None,
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-none"
assert call_args[3] == "Test output"
assert call_args[4] is None
assert call_args[5] == "stop"


def test_request_logger_log_outputs_empty_output():
"""Test log_outputs handles empty output correctly."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=5)

# Test with empty output
request_logger.log_outputs(
request_id="test-empty",
outputs="",
output_token_ids=[],
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-empty"
assert call_args[3] == ""
assert call_args[4] == []
assert call_args[5] == "stop"


def test_request_logger_log_outputs_integration():
"""Test that log_outputs can be called alongside log_inputs."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test that both methods can be called without interference
request_logger.log_inputs(
request_id="test-integration",
prompt="Test prompt",
prompt_token_ids=[1, 2, 3],
prompt_embeds=None,
params=None,
lora_request=None,
)

request_logger.log_outputs(
request_id="test-integration",
outputs="Test output",
output_token_ids=[4, 5, 6],
finish_reason="stop",
is_streaming=False,
delta=False,
)

# Should have been called twice - once for inputs, once for outputs
assert mock_logger.info.call_count == 2

# Check that the calls were made with correct patterns
input_call = mock_logger.info.call_args_list[0][0]
output_call = mock_logger.info.call_args_list[1][0]

assert "Received request %s" in input_call[0]
assert input_call[1] == "test-integration"

assert "Generated response %s%s" in output_call[0]
assert output_call[1] == "test-integration"


def test_streaming_complete_logs_full_text_content():
"""Test that streaming complete logging includes
full accumulated text, not just token count."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test with actual content instead of token count format
full_response = "This is a complete response from streaming"
request_logger.log_outputs(
request_id="test-streaming-full-text",
outputs=full_response,
output_token_ids=None,
finish_reason="streaming_complete",
is_streaming=True,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args

# Verify the logged output is the full text, not a token count format
logged_output = call_args[3]
assert logged_output == full_response
assert "tokens>" not in logged_output
assert "streaming_complete" not in logged_output

# Verify other parameters
assert call_args[1] == "test-streaming-full-text"
assert call_args[2] == " (streaming complete)"
assert call_args[5] == "streaming_complete"
36 changes: 34 additions & 2 deletions vllm/entrypoints/logger.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

from collections.abc import Sequence
from typing import Optional, Union

import torch
Expand All @@ -16,8 +17,6 @@
class RequestLogger:

def __init__(self, *, max_log_len: Optional[int]) -> None:
super().__init__()

self.max_log_len = max_log_len

def log_inputs(
Expand Down Expand Up @@ -45,3 +44,36 @@ def log_inputs(
"lora_request: %s.", request_id, prompt, params, prompt_token_ids,
prompt_embeds.shape if prompt_embeds is not None else None,
lora_request)

def log_outputs(
self,
request_id: str,
outputs: str,
output_token_ids: Optional[Sequence[int]],
finish_reason: Optional[str] = None,
is_streaming: bool = False,
delta: bool = False,
) -> None:
max_log_len = self.max_log_len
if max_log_len is not None:
if outputs is not None:
outputs = outputs[:max_log_len]

if output_token_ids is not None:
# Convert to list and apply truncation
output_token_ids = list(output_token_ids)[:max_log_len]

stream_info = ""
if is_streaming:
stream_info = (" (streaming delta)"
if delta else " (streaming complete)")

logger.info(
"Generated response %s%s: output: %r, "
"output_token_ids: %s, finish_reason: %s",
request_id,
stream_info,
outputs,
output_token_ids,
finish_reason,
)
9 changes: 6 additions & 3 deletions vllm/entrypoints/openai/cli_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def __call__(

lora_list: list[LoRAModulePath] = []
for item in values:
if item in [None, '']: # Skip if item is None or empty string
if item in [None, ""]: # Skip if item is None or empty string
continue
if '=' in item and ',' not in item: # Old format: name=path
name, path = item.split('=')
if "=" in item and "," not in item: # Old format: name=path
name, path = item.split("=")
lora_list.append(LoRAModulePath(name, path))
else: # Assume JSON format
try:
Expand Down Expand Up @@ -163,6 +163,9 @@ class FrontendArgs:
enable_tokenizer_info_endpoint: bool = False
"""Enable the /get_tokenizer_info endpoint. May expose chat
templates and other tokenizer configuration."""
enable_log_outputs: bool = False
"""If set to True, enable logging of model outputs (generations)
in addition to the input logging that is enabled by default."""

@staticmethod
def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
Expand Down
Loading