diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 4440187c36e..0c9a94c2993 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -256,6 +256,7 @@ steps: - pytest -v -s v1/structured_output - pytest -v -s v1/spec_decode - pytest -v -s v1/kv_connector/unit + - pytest -v -s v1/metrics - pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_utils.py - pytest -v -s v1/test_oracle.py diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index 0898ae65e7c..92f6c6f0e89 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -1,8 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os + import pytest import ray +from vllm.config import ModelDType from vllm.sampling_params import SamplingParams from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger @@ -27,7 +30,7 @@ def use_v1_only(monkeypatch): def test_engine_log_metrics_ray( example_prompts, model: str, - dtype: str, + dtype: ModelDType, max_tokens: int, ) -> None: """ Simple smoke test, verifying this can be used without exceptions. @@ -37,11 +40,14 @@ def test_engine_log_metrics_ray( class EngineTestActor: async def run(self): - engine_args = AsyncEngineArgs( - model=model, - dtype=dtype, - disable_log_stats=False, - ) + # Set environment variable inside the Ray actor since environment + # variables from pytest fixtures don't propagate to Ray actors + os.environ['VLLM_USE_V1'] = '1' + + engine_args = AsyncEngineArgs(model=model, + dtype=dtype, + disable_log_stats=False, + enforce_eager=True) engine = AsyncLLM.from_engine_args( engine_args, stat_loggers=[RayPrometheusStatLogger]) diff --git a/vllm/v1/metrics/ray_wrappers.py b/vllm/v1/metrics/ray_wrappers.py index cce692d6c09..8384310062d 100644 --- a/vllm/v1/metrics/ray_wrappers.py +++ b/vllm/v1/metrics/ray_wrappers.py @@ -51,7 +51,13 @@ class RayGaugeWrapper(RayPrometheusMetric): def __init__(self, name: str, documentation: Optional[str] = "", - labelnames: Optional[list[str]] = None): + labelnames: Optional[list[str]] = None, + multiprocess_mode: Optional[str] = ""): + + # All Ray metrics are keyed by WorkerId, so multiprocess modes like + # "mostrecent", "all", "sum" do not apply. This logic can be manually + # implemented at the observability layer (Prometheus/Grafana). + del multiprocess_mode labelnames_tuple = tuple(labelnames) if labelnames else None self.metric = ray_metrics.Gauge(name=name, description=documentation,