Skip to content

feat: Add --enable-log-outputs flag for logging model generations #20707

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
300 changes: 278 additions & 22 deletions tests/test_logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@
from json.decoder import JSONDecodeError
from tempfile import NamedTemporaryFile
from typing import Any
from unittest.mock import patch
from unittest.mock import MagicMock, patch
from uuid import uuid4

import pytest

from vllm.entrypoints.logger import RequestLogger
from vllm.logger import (_DATE_FORMAT, _FORMAT, _configure_vllm_root_logger,
enable_trace_function_call, init_logger)
from vllm.logging_utils import NewLineFormatter
Expand Down Expand Up @@ -137,16 +138,19 @@


@patch("vllm.logger.VLLM_CONFIGURE_LOGGING", 1)
@pytest.mark.parametrize("unexpected_config", (
"Invalid string",
[{
"version": 1,
"loggers": []
}],
0,
))
@pytest.mark.parametrize(
"unexpected_config",
(
"Invalid string",
[{
"version": 1,
"loggers": []
}],
0,
),
)
def test_an_error_is_raised_when_custom_logging_config_is_unexpected_json(
unexpected_config: Any):
unexpected_config: Any, ):
"""This test calls _configure_vllm_root_logger again to test custom logging
config behavior, however it fails before any change in behavior or
configuration occurs."""
Expand All @@ -173,14 +177,16 @@
"propagate": False,
}
},
"version": 1
"version": 1,
}
with NamedTemporaryFile(encoding="utf-8", mode="w") as logging_config_file:
logging_config_file.write(json.dumps(valid_logging_config))
logging_config_file.flush()
with patch("vllm.logger.VLLM_LOGGING_CONFIG_PATH",
logging_config_file.name), patch(
"vllm.logger.dictConfig") as dict_config_mock:
with (
patch("vllm.logger.VLLM_LOGGING_CONFIG_PATH",
logging_config_file.name),
patch("vllm.logger.dictConfig") as dict_config_mock,
):
_configure_vllm_root_logger()
dict_config_mock.assert_called_with(valid_logging_config)

Expand All @@ -196,7 +202,7 @@
"handlers": [],
}
},
"version": 1
"version": 1,
}
with NamedTemporaryFile(encoding="utf-8", mode="w") as logging_config_file:
logging_config_file.write(json.dumps(valid_logging_config))
Expand All @@ -222,21 +228,22 @@


def test_prepare_object_to_dump():
str_obj = 'str'
str_obj = "str"
assert prepare_object_to_dump(str_obj) == "'str'"

list_obj = [1, 2, 3]
assert prepare_object_to_dump(list_obj) == '[1, 2, 3]'
assert prepare_object_to_dump(list_obj) == "[1, 2, 3]"

dict_obj = {'a': 1, 'b': 'b'}
dict_obj = {"a": 1, "b": "b"}
assert prepare_object_to_dump(dict_obj) in [
"{a: 1, b: 'b'}", "{b: 'b', a: 1}"
"{a: 1, b: 'b'}",
"{b: 'b', a: 1}",
]

set_obj = {1, 2, 3}
assert prepare_object_to_dump(set_obj) == '[1, 2, 3]'
assert prepare_object_to_dump(set_obj) == "[1, 2, 3]"

tuple_obj = ('a', 'b', 'c')
tuple_obj = ("a", "b", "c")
assert prepare_object_to_dump(tuple_obj) == "['a', 'b', 'c']"

class CustomEnum(enum.Enum):
Expand All @@ -252,4 +259,253 @@
b: str

assert (prepare_object_to_dump(CustomClass(
1, 'b')) == "CustomClass(a=1, b='b')")
1, "b")) == "CustomClass(a=1, b='b')")


def test_request_logger_log_outputs():
"""Test the new log_outputs functionality."""
# Create a mock logger to capture log calls
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test basic output logging
request_logger.log_outputs(
request_id="test-123",
outputs="Hello, world!",
output_token_ids=[1, 2, 3, 4],
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
# logger.info(format_string, request_id, stream_info, outputs, output_token_ids, finish_reason)

Check failure on line 285 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:285:81: E501 Line too long (103 > 80)
Copy link
Member

@DarkLight1337 DarkLight1337 Jul 12, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove the commented-out code

assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-123"
assert call_args[3] == "Hello, world!"
assert call_args[4] == [1, 2, 3, 4]
assert call_args[5] == "stop"


def test_request_logger_log_outputs_streaming_delta():
"""Test log_outputs with streaming delta mode."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test streaming delta logging
request_logger.log_outputs(
request_id="test-456",
outputs="Hello",
output_token_ids=[1],
finish_reason=None,
is_streaming=True,
delta=True,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
# logger.info(format_string, request_id, stream_info, outputs, output_token_ids, finish_reason)

Check failure on line 312 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:312:81: E501 Line too long (103 > 80)
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-456"
assert call_args[2] == " (streaming delta)"
assert call_args[3] == "Hello"
assert call_args[4] == [1]
assert call_args[5] is None


def test_request_logger_log_outputs_streaming_complete():
"""Test log_outputs with streaming complete mode."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test streaming complete logging
request_logger.log_outputs(
request_id="test-789",
outputs="Complete response",
output_token_ids=[1, 2, 3],
finish_reason="length",
is_streaming=True,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
# logger.info(format_string, request_id, stream_info, outputs, output_token_ids, finish_reason)

Check failure on line 340 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:340:81: E501 Line too long (103 > 80)
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-789"
assert call_args[2] == " (streaming complete)"
assert call_args[3] == "Complete response"
assert call_args[4] == [1, 2, 3]
assert call_args[5] == "length"


def test_request_logger_log_outputs_with_truncation():
"""Test log_outputs respects max_log_len setting."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
# Set max_log_len to 10
request_logger = RequestLogger(max_log_len=10)

# Test output truncation
long_output = "This is a very long output that should be truncated"
long_token_ids = list(range(20)) # 20 tokens

request_logger.log_outputs(
request_id="test-truncate",
outputs=long_output,
output_token_ids=long_token_ids,
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args

# Check that output was truncated to first 10 characters
logged_output = call_args[0][3]
assert logged_output == "This is a "
assert len(logged_output) == 10

# Check that token IDs were truncated to first 10 tokens
logged_token_ids = call_args[0][4]
assert logged_token_ids == list(range(10))
assert len(logged_token_ids) == 10


def test_request_logger_log_outputs_none_values():
"""Test log_outputs handles None values correctly."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test with None output_token_ids
request_logger.log_outputs(
request_id="test-none",
outputs="Test output",
output_token_ids=None,
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
# logger.info(format_string, request_id, stream_info, outputs, output_token_ids, finish_reason)

Check failure on line 403 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:403:81: E501 Line too long (103 > 80)
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-none"
assert call_args[3] == "Test output"
assert call_args[4] is None
assert call_args[5] == "stop"


def test_request_logger_log_outputs_empty_output():
"""Test log_outputs handles empty output correctly."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=5)

# Test with empty output
request_logger.log_outputs(
request_id="test-empty",
outputs="",
output_token_ids=[],
finish_reason="stop",
is_streaming=False,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args
# logger.info(format_string, request_id, stream_info, outputs, output_token_ids, finish_reason)

Check failure on line 430 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:430:81: E501 Line too long (103 > 80)
assert "Generated response %s%s" in call_args[0]
assert call_args[1] == "test-empty"
assert call_args[3] == ""
assert call_args[4] == []
assert call_args[5] == "stop"


def test_request_logger_log_outputs_integration():
"""Test that log_outputs can be called alongside log_inputs."""
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test that both methods can be called without interference
request_logger.log_inputs(
request_id="test-integration",
prompt="Test prompt",
prompt_token_ids=[1, 2, 3],
prompt_embeds=None,
params=None,
lora_request=None,
prompt_adapter_request=None,
)

request_logger.log_outputs(
request_id="test-integration",
outputs="Test output",
output_token_ids=[4, 5, 6],
finish_reason="stop",
is_streaming=False,
delta=False,
)

# Should have been called twice - once for inputs, once for outputs
assert mock_logger.info.call_count == 2

# Check that the calls were made with correct patterns
input_call = mock_logger.info.call_args_list[0][0]
output_call = mock_logger.info.call_args_list[1][0]

# Check input call: logger.info(format_string, request_id, prompt, params, ...)

Check failure on line 472 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:472:81: E501 Line too long (87 > 80)
assert "Received request %s" in input_call[0]
assert input_call[1] == "test-integration"

# Check output call: logger.info(format_string, request_id, stream_info, outputs, ...)

Check failure on line 476 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:476:81: E501 Line too long (94 > 80)
assert "Generated response %s%s" in output_call[0]
assert output_call[1] == "test-integration"


def test_streaming_complete_logs_full_text_content():
"""Test that streaming complete logging includes full accumulated text, not just token count."""

Check failure on line 482 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:482:81: E501 Line too long (100 > 80)
mock_logger = MagicMock()

with patch("vllm.entrypoints.logger.logger", mock_logger):
request_logger = RequestLogger(max_log_len=None)

# Test with actual content instead of token count format
full_response = "This is a complete response from streaming"
request_logger.log_outputs(
request_id="test-streaming-full-text",
outputs=full_response,
output_token_ids=None,
finish_reason="streaming_complete",
is_streaming=True,
delta=False,
)

mock_logger.info.assert_called_once()
call_args = mock_logger.info.call_args.args

# Verify the logged output is the full text, not a token count format
logged_output = call_args[3]
assert logged_output == full_response
assert "tokens>" not in logged_output # Ensure it's not the old token count format

Check failure on line 505 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:505:81: E501 Line too long (91 > 80)
assert "streaming_complete" not in logged_output # Ensure it's not the fallback format

Check failure on line 506 in tests/test_logger.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

tests/test_logger.py:506:81: E501 Line too long (95 > 80)

# Verify other parameters
assert call_args[1] == "test-streaming-full-text"
assert call_args[2] == " (streaming complete)"
assert call_args[5] == "streaming_complete"
Loading