Skip to content

Commit b8908f9

Browse files
author
Your Name
committed
delete
1 parent b545218 commit b8908f9

File tree

2 files changed

+9
-3
lines changed

2 files changed

+9
-3
lines changed

vllm/beam/filtering.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from urllib.request import Request
55
from vllm.beam.debug import BeamDebugInfo
66
import torch
7+
from starlette.datastructures import MutableHeaders
8+
79
from vllm.entrypoints.openai.protocol import CompletionRequest, CompletionResponse, \
810
ErrorResponse
911
from vllm.logger import init_logger
@@ -40,6 +42,7 @@ def __init__(self, classi_idx, classifier_names):
4042

4143
async def get_n_valid_beams(self, create_completion: Callable,
4244
request: CompletionRequest,
45+
chunk_num: int,
4346
raw_request: Optional[Request] = None) -> list[
4447
Union[AsyncGenerator[str, None], CompletionResponse, ErrorResponse]]:
4548
request.stream = False
@@ -55,9 +58,12 @@ async def get_n_valid_beams(self, create_completion: Callable,
5558
tasks = []
5659
# TODO(@tanuj): deep copy request and raw_request?
5760
for _ in range(n):
58-
request = request
5961
if original_request_id is not None:
60-
raw_request.headers.update({"X-Request-Id": f"original_request_id-beam_{n}"})
62+
mh = MutableHeaders(scope=raw_request.scope)
63+
del mh["x-request-id"]
64+
if hasattr(raw_request, "_headers"):
65+
delattr(raw_request, "_headers")
66+
6167
tasks.append(create_completion(
6268
request,
6369
raw_request=raw_request,

vllm/entrypoints/openai/serving_completion.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ async def _chunk_generator():
115115
# TODO(@tanuj): calc created tokens
116116
while num_chunks < max_chunks and not should_stop:
117117
num_chunks += 1
118-
beams = await self.beam_validator.get_n_valid_beams(create_completion=self.create_completion, request=request, raw_request=raw_request)
118+
beams = await self.beam_validator.get_n_valid_beams(create_completion=self.create_completion, request=request, raw_request=raw_request, chunk_num=num_chunks)
119119
if isinstance(beams, ErrorResponse):
120120
yield f"data: {beams.model_dump_json()}\n\n"
121121
break

0 commit comments

Comments
 (0)