Skip to content

Commit f639237

Browse files
author
Your Name
committed
tracing
1 parent 338c2d4 commit f639237

File tree

4 files changed

+93
-4
lines changed

4 files changed

+93
-4
lines changed

vllm/beam/beam.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from vllm.beam.penalty import PenaltyComputer
66
import torch
77
from vllm.beam.ranking import RankingComputer
8+
from vllm.beam.tracing import trace_async_method
89
from vllm.entrypoints.openai.protocol import CompletionResponse, ErrorResponse, CompletionResponseChoice
910
from vllm.logger import init_logger
1011

@@ -16,6 +17,7 @@ def __init__(self, classi_idx):
1617
self.penalty_computer = PenaltyComputer(classi_idx)
1718
self.ranking_computer = RankingComputer(classi_idx)
1819

20+
@trace_async_method(span_name='pick_best_beam')
1921
async def pick_best_beam(self, responses: list[
2022
Union[AsyncGenerator[str, None], CompletionResponseChoice, ErrorResponse]]) -> Union[
2123
AsyncGenerator[str, None], CompletionResponseChoice, ErrorResponse]:

vllm/beam/filtering.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import torch
77
from starlette.datastructures import MutableHeaders
88

9+
from vllm.beam.tracing import trace_async_method
910
from vllm.entrypoints.openai.protocol import CompletionRequest, CompletionResponse, \
1011
ErrorResponse, CompletionResponseChoice
1112
from vllm.logger import init_logger
@@ -32,6 +33,7 @@ def __init__(self, classi_idx, classifier_names):
3233
self.classi_idx = classi_idx
3334
self.classifier_names = classifier_names
3435

36+
@trace_async_method(span_name='get_n_valid_beams')
3537
async def get_n_valid_beams(self, create_completion: Callable,
3638
request: CompletionRequest,
3739
chunk_num: int,

vllm/beam/tracing.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
from functools import wraps
2+
from typing import Optional, Union, AsyncGenerator
3+
import time
4+
5+
from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse
6+
from vllm.tracing import extract_trace_context, SpanAttributes, init_tracer
7+
from vllm.v1.request import Request
8+
from opentelemetry import trace
9+
10+
tracer = init_tracer(
11+
"vllm.entrypoints.openai.serving_completion",
12+
"http://localhost:4317")
13+
14+
def trace_streaming_completion(tracer_attr='tracer'):
15+
"""
16+
Decorator specifically for tracing streaming completion functions.
17+
Handles both the initial processing and the async generator.
18+
"""
19+
20+
def decorator(func):
21+
async def wrapper(self, request: CompletionRequest, raw_request: Request | None = None):
22+
ctx = extract_trace_context(dict(raw_request.headers)) if raw_request else None
23+
parent_span = tracer.start_span("chunkwise_beam_completion", context=ctx)
24+
25+
# keep the span current until we’re done
26+
scope = trace.use_span(parent_span, end_on_exit=False)
27+
28+
try:
29+
parent_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS, request.max_tokens)
30+
parent_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_N, request.n)
31+
if hasattr(request, "request_id"):
32+
parent_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_ID, request.request_id)
33+
34+
gen = await func(self, request, raw_request)
35+
if isinstance(gen, ErrorResponse):
36+
parent_span.end()
37+
scope.__exit__(None, None, None)
38+
return gen
39+
40+
async def traced_generator():
41+
with trace.use_span(parent_span, end_on_exit=False):
42+
with tracer.start_as_current_span("chunk_generation"):
43+
async for item in gen:
44+
yield item
45+
46+
47+
# now it’s safe to close the parent
48+
parent_span.end()
49+
scope.__exit__(None, None, None)
50+
51+
return traced_generator()
52+
53+
except Exception as e:
54+
parent_span.record_exception(e)
55+
parent_span.end()
56+
scope.__exit__(type(e), e, e.__traceback__)
57+
raise
58+
59+
return wrapper
60+
61+
return decorator
62+
63+
64+
def trace_async_method(span_name: Optional[str] = None, tracer_attr='tracer'):
65+
"""
66+
Simple decorator for tracing regular async methods.
67+
"""
68+
69+
def decorator(func):
70+
@wraps(func)
71+
async def wrapper(self, *args, **kwargs):
72+
name = span_name or func.__name__
73+
74+
with tracer.start_as_current_span(name) as span:
75+
start_time = time.time()
76+
try:
77+
result = await func(self, *args, **kwargs)
78+
span.set_attribute("execution_time_ms", (time.time() - start_time) * 1000)
79+
return result
80+
except Exception as e:
81+
span.record_exception(e)
82+
raise
83+
84+
return wrapper
85+
86+
return decorator

vllm/entrypoints/openai/serving_completion.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from vllm.beam.filtering import _CHUNK_SIZE, BeamValidator
1616
from vllm.beam.metrics import report_metrics
1717
from vllm.beam.penalty import MEOW_CLASSI_IDX, PenaltyComputer
18+
from vllm.beam.tracing import trace_streaming_completion, trace_async_method
1819
from vllm.config import ModelConfig
1920
from vllm.engine.protocol import EngineClient
2021
from vllm.entrypoints.logger import RequestLogger
@@ -71,10 +72,8 @@ def __init__(
7172

7273
self.beam_scorer = BeamScorer(classi_idx=MEOW_CLASSI_IDX)
7374
self.beam_validator = BeamValidator(classi_idx=MEOW_CLASSI_IDX, classifier_names=MEOW_CLASSI_IDX.keys())
74-
self.tracer = init_tracer(
75-
"vllm.entrypoints.openai.serving_completion",
76-
"http://localhost:4317")
7775

76+
@trace_streaming_completion()
7877
async def create_completion_with_chunkwise_beam(
7978
self,
8079
request: CompletionRequest,
@@ -83,7 +82,7 @@ async def create_completion_with_chunkwise_beam(
8382
"""
8483
Chunkwise beam search hack
8584
"""
86-
85+
@trace_async_method(span_name='_process_prefix')
8786
async def _process_prefix(request: CompletionRequest):
8887
og_max_tokens = request.max_tokens
8988
og_n = request.n

0 commit comments

Comments
 (0)