From 8e7e33b497a951b0c0e5cfc20ada7755f208df3e Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Mon, 14 Jul 2025 16:43:33 -0700 Subject: [PATCH 1/8] Enable v1 metrics tests Signed-off-by: Seiji Eicher --- .buildkite/test-pipeline.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 4440187c36e..0c9a94c2993 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -256,6 +256,7 @@ steps: - pytest -v -s v1/structured_output - pytest -v -s v1/spec_decode - pytest -v -s v1/kv_connector/unit + - pytest -v -s v1/metrics - pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_utils.py - pytest -v -s v1/test_oracle.py From 6c3d912c499b359319dbf779fcacd2f1bf858b99 Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Mon, 14 Jul 2025 17:27:45 -0700 Subject: [PATCH 2/8] Disable test_engine_log_metrics_ray Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_ray_metrics.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index 0898ae65e7c..ae349549582 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -1,8 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os + import pytest import ray +from vllm.config import ModelDType from vllm.sampling_params import SamplingParams from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger @@ -27,16 +30,22 @@ def use_v1_only(monkeypatch): def test_engine_log_metrics_ray( example_prompts, model: str, - dtype: str, + dtype: ModelDType, max_tokens: int, ) -> None: """ Simple smoke test, verifying this can be used without exceptions. Need to start a Ray cluster in order to verify outputs.""" + pytest.skip( + "Fix needed for https://github.com/vllm-project/vllm/issues/20954") @ray.remote(num_gpus=1) class EngineTestActor: async def run(self): + # Set environment variable inside the Ray actor since + # environment variables from pytest fixtures don't propagate to Ray actors + os.environ['VLLM_USE_V1'] = '1' + engine_args = AsyncEngineArgs( model=model, dtype=dtype, From 48e15070c72107db512b64de14c868fd60d67d75 Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Mon, 14 Jul 2025 16:37:42 -0700 Subject: [PATCH 3/8] Add add_logger API to AsyncLLM Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_ray_metrics.py | 4 ++-- vllm/v1/engine/async_llm.py | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index ae349549582..6770ece7f91 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -52,8 +52,8 @@ async def run(self): disable_log_stats=False, ) - engine = AsyncLLM.from_engine_args( - engine_args, stat_loggers=[RayPrometheusStatLogger]) + engine = AsyncLLM.from_engine_args(engine_args) + await engine.add_logger(RayPrometheusStatLogger) for i, prompt in enumerate(example_prompts): results = engine.generate( diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 3754570dfaa..97f84bc24cc 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -608,6 +608,25 @@ async def collective_rpc(self, return await self.engine_core.collective_rpc_async( method, timeout, args, kwargs) + async def add_logger(self, logger_factory: StatLoggerFactory) -> None: + if not self.log_stats: + raise RuntimeError( + "Stat logging is disabled. Set `disable_log_stats=False` " + "argument to enable.") + + engine_num = self.vllm_config.parallel_config.data_parallel_size + if len(self.stat_loggers) == 0: + self.stat_loggers = [[] for _ in range(engine_num)] + + logger_type = type(logger_factory) + for logger in self.stat_loggers[0]: + if type(logger) is logger_type: + raise KeyError( + f"Logger with type {logger_type} already exists.") + + for i, logger_list in enumerate(self.stat_loggers): + logger_list.append(logger_factory(self.vllm_config, i)) + @property def is_running(self) -> bool: # Is None before the loop is started. From 28dcfdd8495cc16e37d5394667d3fff7ed1683ec Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Mon, 14 Jul 2025 17:43:16 -0700 Subject: [PATCH 4/8] Add test Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_engine_logger_apis.py | 49 +++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 tests/v1/metrics/test_engine_logger_apis.py diff --git a/tests/v1/metrics/test_engine_logger_apis.py b/tests/v1/metrics/test_engine_logger_apis.py new file mode 100644 index 00000000000..e8f9aeaab9b --- /dev/null +++ b/tests/v1/metrics/test_engine_logger_apis.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest + +from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM +from vllm.v1.metrics.loggers import PrometheusStatLogger + + +@pytest.mark.asyncio +async def test_async_llm_add_logger_no_exception(): + # Minimal model config for test + model_name = "distilbert/distilgpt2" + dtype = "half" + engine_args = AsyncEngineArgs( + model=model_name, + dtype=dtype, + disable_log_stats=False, + ) + + # Force empty list to avoid default loggers + engine = AsyncLLM.from_engine_args(engine_args, stat_loggers=[]) + + # Add PrometheusStatLogger and verify no exception is raised + await engine.add_logger(PrometheusStatLogger) + + # Verify that logger is present in the first DP rank + assert len(engine.stat_loggers[0]) == 1 + assert isinstance(engine.stat_loggers[0][0], PrometheusStatLogger) + + +@pytest.mark.asyncio +async def test_async_llm_add_logger_duplicate_raises(): + model_name = "distilbert/distilgpt2" + dtype = "half" + engine_args = AsyncEngineArgs( + model=model_name, + dtype=dtype, + disable_log_stats=False, + ) + + # Force empty list to avoid default loggers + engine = AsyncLLM.from_engine_args(engine_args, stat_loggers=[]) + + # Add PrometheusStatLogger once + await engine.add_logger(PrometheusStatLogger) + + # Adding the same logger again should raise KeyError + with pytest.raises(KeyError): + await engine.add_logger(PrometheusStatLogger) From aaf93e6ea69dadd364be50760c602389d65b58ec Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Mon, 14 Jul 2025 17:48:10 -0700 Subject: [PATCH 5/8] WIP Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_engine_logger_apis.py | 21 --------------------- vllm/v1/engine/async_llm.py | 6 ------ 2 files changed, 27 deletions(-) diff --git a/tests/v1/metrics/test_engine_logger_apis.py b/tests/v1/metrics/test_engine_logger_apis.py index e8f9aeaab9b..74ccb61c52d 100644 --- a/tests/v1/metrics/test_engine_logger_apis.py +++ b/tests/v1/metrics/test_engine_logger_apis.py @@ -26,24 +26,3 @@ async def test_async_llm_add_logger_no_exception(): # Verify that logger is present in the first DP rank assert len(engine.stat_loggers[0]) == 1 assert isinstance(engine.stat_loggers[0][0], PrometheusStatLogger) - - -@pytest.mark.asyncio -async def test_async_llm_add_logger_duplicate_raises(): - model_name = "distilbert/distilgpt2" - dtype = "half" - engine_args = AsyncEngineArgs( - model=model_name, - dtype=dtype, - disable_log_stats=False, - ) - - # Force empty list to avoid default loggers - engine = AsyncLLM.from_engine_args(engine_args, stat_loggers=[]) - - # Add PrometheusStatLogger once - await engine.add_logger(PrometheusStatLogger) - - # Adding the same logger again should raise KeyError - with pytest.raises(KeyError): - await engine.add_logger(PrometheusStatLogger) diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 97f84bc24cc..30e01a43903 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -618,12 +618,6 @@ async def add_logger(self, logger_factory: StatLoggerFactory) -> None: if len(self.stat_loggers) == 0: self.stat_loggers = [[] for _ in range(engine_num)] - logger_type = type(logger_factory) - for logger in self.stat_loggers[0]: - if type(logger) is logger_type: - raise KeyError( - f"Logger with type {logger_type} already exists.") - for i, logger_list in enumerate(self.stat_loggers): logger_list.append(logger_factory(self.vllm_config, i)) From 17279e91a4648e7ab32891312b206b26117820a2 Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Tue, 15 Jul 2025 16:37:47 -0700 Subject: [PATCH 6/8] Ingore multiprocess_mode Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_ray_metrics.py | 6 ++---- vllm/v1/metrics/ray_wrappers.py | 8 +++++++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index 6770ece7f91..bd92ca7b6db 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -35,15 +35,13 @@ def test_engine_log_metrics_ray( ) -> None: """ Simple smoke test, verifying this can be used without exceptions. Need to start a Ray cluster in order to verify outputs.""" - pytest.skip( - "Fix needed for https://github.com/vllm-project/vllm/issues/20954") @ray.remote(num_gpus=1) class EngineTestActor: async def run(self): - # Set environment variable inside the Ray actor since - # environment variables from pytest fixtures don't propagate to Ray actors + # Set environment variable inside the Ray actor since environment + # variables from pytest fixtures don't propagate to Ray actors os.environ['VLLM_USE_V1'] = '1' engine_args = AsyncEngineArgs( diff --git a/vllm/v1/metrics/ray_wrappers.py b/vllm/v1/metrics/ray_wrappers.py index cce692d6c09..8384310062d 100644 --- a/vllm/v1/metrics/ray_wrappers.py +++ b/vllm/v1/metrics/ray_wrappers.py @@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric): def __init__(self, name: str, documentation: Optional[str] = "", - labelnames: Optional[list[str]] = None): + labelnames: Optional[list[str]] = None, + multiprocess_mode: Optional[str] = ""): + + # All Ray metrics are keyed by WorkerId, so multiprocess modes like + # "mostrecent", "all", "sum" do not apply. This logic can be manually + # implemented at the observability layer (Prometheus/Grafana). + del multiprocess_mode labelnames_tuple = tuple(labelnames) if labelnames else None self.metric = ray_metrics.Gauge(name=name, description=documentation, From f7ba9f2045a89c743f79a4f3b5ea3fdd8930269f Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Tue, 15 Jul 2025 16:42:20 -0700 Subject: [PATCH 7/8] Split out add_logger changes Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_engine_logger_apis.py | 28 --------------------- tests/v1/metrics/test_ray_metrics.py | 4 +-- vllm/v1/engine/async_llm.py | 13 ---------- 3 files changed, 2 insertions(+), 43 deletions(-) delete mode 100644 tests/v1/metrics/test_engine_logger_apis.py diff --git a/tests/v1/metrics/test_engine_logger_apis.py b/tests/v1/metrics/test_engine_logger_apis.py deleted file mode 100644 index 74ccb61c52d..00000000000 --- a/tests/v1/metrics/test_engine_logger_apis.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - -from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM -from vllm.v1.metrics.loggers import PrometheusStatLogger - - -@pytest.mark.asyncio -async def test_async_llm_add_logger_no_exception(): - # Minimal model config for test - model_name = "distilbert/distilgpt2" - dtype = "half" - engine_args = AsyncEngineArgs( - model=model_name, - dtype=dtype, - disable_log_stats=False, - ) - - # Force empty list to avoid default loggers - engine = AsyncLLM.from_engine_args(engine_args, stat_loggers=[]) - - # Add PrometheusStatLogger and verify no exception is raised - await engine.add_logger(PrometheusStatLogger) - - # Verify that logger is present in the first DP rank - assert len(engine.stat_loggers[0]) == 1 - assert isinstance(engine.stat_loggers[0][0], PrometheusStatLogger) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index bd92ca7b6db..dd77b05f842 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -50,8 +50,8 @@ async def run(self): disable_log_stats=False, ) - engine = AsyncLLM.from_engine_args(engine_args) - await engine.add_logger(RayPrometheusStatLogger) + engine = AsyncLLM.from_engine_args( + engine_args, stat_loggers=[RayPrometheusStatLogger]) for i, prompt in enumerate(example_prompts): results = engine.generate( diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 30e01a43903..3754570dfaa 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -608,19 +608,6 @@ async def collective_rpc(self, return await self.engine_core.collective_rpc_async( method, timeout, args, kwargs) - async def add_logger(self, logger_factory: StatLoggerFactory) -> None: - if not self.log_stats: - raise RuntimeError( - "Stat logging is disabled. Set `disable_log_stats=False` " - "argument to enable.") - - engine_num = self.vllm_config.parallel_config.data_parallel_size - if len(self.stat_loggers) == 0: - self.stat_loggers = [[] for _ in range(engine_num)] - - for i, logger_list in enumerate(self.stat_loggers): - logger_list.append(logger_factory(self.vllm_config, i)) - @property def is_running(self) -> bool: # Is None before the loop is started. From a879828a72e220a07742c06d1c5992dba793e06e Mon Sep 17 00:00:00 2001 From: Seiji Eicher Date: Tue, 15 Jul 2025 16:44:40 -0700 Subject: [PATCH 8/8] Speed up with enforce_eager Signed-off-by: Seiji Eicher --- tests/v1/metrics/test_ray_metrics.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index dd77b05f842..92f6c6f0e89 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -44,11 +44,10 @@ async def run(self): # variables from pytest fixtures don't propagate to Ray actors os.environ['VLLM_USE_V1'] = '1' - engine_args = AsyncEngineArgs( - model=model, - dtype=dtype, - disable_log_stats=False, - ) + engine_args = AsyncEngineArgs(model=model, + dtype=dtype, + disable_log_stats=False, + enforce_eager=True) engine = AsyncLLM.from_engine_args( engine_args, stat_loggers=[RayPrometheusStatLogger])