Skip to content

Commit d1d834d

Browse files
author
Your Name
committed
parallel lock
1 parent 9ebd040 commit d1d834d

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

vllm/beam/filtering.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from vllm.entrypoints.openai.protocol import CompletionRequest, CompletionResponse, \
88
ErrorResponse
99
from vllm.logger import init_logger
10+
from vllm.utils import random_uuid
1011

1112
logger = init_logger(__name__)
1213

@@ -47,10 +48,16 @@ async def get_n_valid_beams(self, create_completion: Callable,
4748
# TODO(@tanuj): accept max tokens as a parameter
4849
request.max_tokens = _CHUNK_SIZE
4950
request.echo = True
51+
original_request_id = None
52+
if raw_request is not None:
53+
original_request_id = raw_request.headers.get("X-Request-Id", None)
54+
5055
tasks = []
5156
# TODO(@tanuj): deep copy request and raw_request?
5257
for _ in range(n):
5358
request = request
59+
if original_request_id is not None:
60+
raw_request.headers.update({"X-Request-Id": f"original_request_id-beam_{n}"})
5461
tasks.append(create_completion(
5562
request,
5663
raw_request=raw_request,

0 commit comments

Comments
 (0)