1010from json .decoder import JSONDecodeError
1111from tempfile import NamedTemporaryFile
1212from typing import Any
13- from unittest .mock import patch
13+ from unittest .mock import MagicMock , patch
1414from uuid import uuid4
1515
1616import pytest
1717
18+ from vllm .entrypoints .logger import RequestLogger
1819from vllm .logger import (_DATE_FORMAT , _FORMAT , _configure_vllm_root_logger ,
1920 enable_trace_function_call , init_logger )
2021from vllm .logging_utils import NewLineFormatter
@@ -228,9 +229,10 @@ def test_prepare_object_to_dump():
228229 list_obj = [1 , 2 , 3 ]
229230 assert prepare_object_to_dump (list_obj ) == '[1, 2, 3]'
230231
231- dict_obj = {'a' : 1 , 'b' : 'b' }
232+ dict_obj = {"a" : 1 , "b" : "b" }
232233 assert prepare_object_to_dump (dict_obj ) in [
233- "{a: 1, b: 'b'}" , "{b: 'b', a: 1}"
234+ "{a: 1, b: 'b'}" ,
235+ "{b: 'b', a: 1}" ,
234236 ]
235237
236238 set_obj = {1 , 2 , 3 }
@@ -252,4 +254,246 @@ class CustomClass:
252254 b : str
253255
254256 assert (prepare_object_to_dump (CustomClass (
255- 1 , 'b' )) == "CustomClass(a=1, b='b')" )
257+ 1 , "b" )) == "CustomClass(a=1, b='b')" )
258+
259+
260+ def test_request_logger_log_outputs ():
261+ """Test the new log_outputs functionality."""
262+ # Create a mock logger to capture log calls
263+ mock_logger = MagicMock ()
264+
265+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
266+ request_logger = RequestLogger (max_log_len = None )
267+
268+ # Test basic output logging
269+ request_logger .log_outputs (
270+ request_id = "test-123" ,
271+ outputs = "Hello, world!" ,
272+ output_token_ids = [1 , 2 , 3 , 4 ],
273+ finish_reason = "stop" ,
274+ is_streaming = False ,
275+ delta = False ,
276+ )
277+
278+ mock_logger .info .assert_called_once ()
279+ call_args = mock_logger .info .call_args .args
280+ assert "Generated response %s%s" in call_args [0 ]
281+ assert call_args [1 ] == "test-123"
282+ assert call_args [3 ] == "Hello, world!"
283+ assert call_args [4 ] == [1 , 2 , 3 , 4 ]
284+ assert call_args [5 ] == "stop"
285+
286+
287+ def test_request_logger_log_outputs_streaming_delta ():
288+ """Test log_outputs with streaming delta mode."""
289+ mock_logger = MagicMock ()
290+
291+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
292+ request_logger = RequestLogger (max_log_len = None )
293+
294+ # Test streaming delta logging
295+ request_logger .log_outputs (
296+ request_id = "test-456" ,
297+ outputs = "Hello" ,
298+ output_token_ids = [1 ],
299+ finish_reason = None ,
300+ is_streaming = True ,
301+ delta = True ,
302+ )
303+
304+ mock_logger .info .assert_called_once ()
305+ call_args = mock_logger .info .call_args .args
306+ assert "Generated response %s%s" in call_args [0 ]
307+ assert call_args [1 ] == "test-456"
308+ assert call_args [2 ] == " (streaming delta)"
309+ assert call_args [3 ] == "Hello"
310+ assert call_args [4 ] == [1 ]
311+ assert call_args [5 ] is None
312+
313+
314+ def test_request_logger_log_outputs_streaming_complete ():
315+ """Test log_outputs with streaming complete mode."""
316+ mock_logger = MagicMock ()
317+
318+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
319+ request_logger = RequestLogger (max_log_len = None )
320+
321+ # Test streaming complete logging
322+ request_logger .log_outputs (
323+ request_id = "test-789" ,
324+ outputs = "Complete response" ,
325+ output_token_ids = [1 , 2 , 3 ],
326+ finish_reason = "length" ,
327+ is_streaming = True ,
328+ delta = False ,
329+ )
330+
331+ mock_logger .info .assert_called_once ()
332+ call_args = mock_logger .info .call_args .args
333+ assert "Generated response %s%s" in call_args [0 ]
334+ assert call_args [1 ] == "test-789"
335+ assert call_args [2 ] == " (streaming complete)"
336+ assert call_args [3 ] == "Complete response"
337+ assert call_args [4 ] == [1 , 2 , 3 ]
338+ assert call_args [5 ] == "length"
339+
340+
341+ def test_request_logger_log_outputs_with_truncation ():
342+ """Test log_outputs respects max_log_len setting."""
343+ mock_logger = MagicMock ()
344+
345+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
346+ # Set max_log_len to 10
347+ request_logger = RequestLogger (max_log_len = 10 )
348+
349+ # Test output truncation
350+ long_output = "This is a very long output that should be truncated"
351+ long_token_ids = list (range (20 )) # 20 tokens
352+
353+ request_logger .log_outputs (
354+ request_id = "test-truncate" ,
355+ outputs = long_output ,
356+ output_token_ids = long_token_ids ,
357+ finish_reason = "stop" ,
358+ is_streaming = False ,
359+ delta = False ,
360+ )
361+
362+ mock_logger .info .assert_called_once ()
363+ call_args = mock_logger .info .call_args
364+
365+ # Check that output was truncated to first 10 characters
366+ logged_output = call_args [0 ][3 ]
367+ assert logged_output == "This is a "
368+ assert len (logged_output ) == 10
369+
370+ # Check that token IDs were truncated to first 10 tokens
371+ logged_token_ids = call_args [0 ][4 ]
372+ assert logged_token_ids == list (range (10 ))
373+ assert len (logged_token_ids ) == 10
374+
375+
376+ def test_request_logger_log_outputs_none_values ():
377+ """Test log_outputs handles None values correctly."""
378+ mock_logger = MagicMock ()
379+
380+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
381+ request_logger = RequestLogger (max_log_len = None )
382+
383+ # Test with None output_token_ids
384+ request_logger .log_outputs (
385+ request_id = "test-none" ,
386+ outputs = "Test output" ,
387+ output_token_ids = None ,
388+ finish_reason = "stop" ,
389+ is_streaming = False ,
390+ delta = False ,
391+ )
392+
393+ mock_logger .info .assert_called_once ()
394+ call_args = mock_logger .info .call_args .args
395+ assert "Generated response %s%s" in call_args [0 ]
396+ assert call_args [1 ] == "test-none"
397+ assert call_args [3 ] == "Test output"
398+ assert call_args [4 ] is None
399+ assert call_args [5 ] == "stop"
400+
401+
402+ def test_request_logger_log_outputs_empty_output ():
403+ """Test log_outputs handles empty output correctly."""
404+ mock_logger = MagicMock ()
405+
406+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
407+ request_logger = RequestLogger (max_log_len = 5 )
408+
409+ # Test with empty output
410+ request_logger .log_outputs (
411+ request_id = "test-empty" ,
412+ outputs = "" ,
413+ output_token_ids = [],
414+ finish_reason = "stop" ,
415+ is_streaming = False ,
416+ delta = False ,
417+ )
418+
419+ mock_logger .info .assert_called_once ()
420+ call_args = mock_logger .info .call_args .args
421+ assert "Generated response %s%s" in call_args [0 ]
422+ assert call_args [1 ] == "test-empty"
423+ assert call_args [3 ] == ""
424+ assert call_args [4 ] == []
425+ assert call_args [5 ] == "stop"
426+
427+
428+ def test_request_logger_log_outputs_integration ():
429+ """Test that log_outputs can be called alongside log_inputs."""
430+ mock_logger = MagicMock ()
431+
432+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
433+ request_logger = RequestLogger (max_log_len = None )
434+
435+ # Test that both methods can be called without interference
436+ request_logger .log_inputs (
437+ request_id = "test-integration" ,
438+ prompt = "Test prompt" ,
439+ prompt_token_ids = [1 , 2 , 3 ],
440+ prompt_embeds = None ,
441+ params = None ,
442+ lora_request = None ,
443+ )
444+
445+ request_logger .log_outputs (
446+ request_id = "test-integration" ,
447+ outputs = "Test output" ,
448+ output_token_ids = [4 , 5 , 6 ],
449+ finish_reason = "stop" ,
450+ is_streaming = False ,
451+ delta = False ,
452+ )
453+
454+ # Should have been called twice - once for inputs, once for outputs
455+ assert mock_logger .info .call_count == 2
456+
457+ # Check that the calls were made with correct patterns
458+ input_call = mock_logger .info .call_args_list [0 ][0 ]
459+ output_call = mock_logger .info .call_args_list [1 ][0 ]
460+
461+ assert "Received request %s" in input_call [0 ]
462+ assert input_call [1 ] == "test-integration"
463+
464+ assert "Generated response %s%s" in output_call [0 ]
465+ assert output_call [1 ] == "test-integration"
466+
467+
468+ def test_streaming_complete_logs_full_text_content ():
469+ """Test that streaming complete logging includes
470+ full accumulated text, not just token count."""
471+ mock_logger = MagicMock ()
472+
473+ with patch ("vllm.entrypoints.logger.logger" , mock_logger ):
474+ request_logger = RequestLogger (max_log_len = None )
475+
476+ # Test with actual content instead of token count format
477+ full_response = "This is a complete response from streaming"
478+ request_logger .log_outputs (
479+ request_id = "test-streaming-full-text" ,
480+ outputs = full_response ,
481+ output_token_ids = None ,
482+ finish_reason = "streaming_complete" ,
483+ is_streaming = True ,
484+ delta = False ,
485+ )
486+
487+ mock_logger .info .assert_called_once ()
488+ call_args = mock_logger .info .call_args .args
489+
490+ # Verify the logged output is the full text, not a token count format
491+ logged_output = call_args [3 ]
492+ assert logged_output == full_response
493+ assert "tokens>" not in logged_output
494+ assert "streaming_complete" not in logged_output
495+
496+ # Verify other parameters
497+ assert call_args [1 ] == "test-streaming-full-text"
498+ assert call_args [2 ] == " (streaming complete)"
499+ assert call_args [5 ] == "streaming_complete"
0 commit comments