Skip to content

Commit dc2ea69

Browse files
sangbumlikeagodhuydhn
authored andcommitted
[Bug][Frontend] Fix structure of transcription's decoder_prompt (vllm-project#18809)
Signed-off-by: sangbumlikeagod <oironese@naver.com>
1 parent ea104b9 commit dc2ea69

File tree

2 files changed

+31
-13
lines changed

2 files changed

+31
-13
lines changed

tests/entrypoints/openai/test_transcription_validation.py

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ async def test_basic_audio(mary_had_lamb):
3737
model_name = "openai/whisper-large-v3-turbo"
3838
server_args = ["--enforce-eager"]
3939
# Based on https://github.com/openai/openai-cookbook/blob/main/examples/Whisper_prompting_guide.ipynb.
40-
prompt = "THE FIRST WORDS I SPOKE"
4140
with RemoteOpenAIServer(model_name, server_args) as remote_server:
4241
client = remote_server.get_async_client()
4342
transcription = await client.audio.transcriptions.create(
@@ -48,16 +47,6 @@ async def test_basic_audio(mary_had_lamb):
4847
temperature=0.0)
4948
out = json.loads(transcription)['text']
5049
assert "Mary had a little lamb," in out
51-
# This should "force" whisper to continue prompt in all caps
52-
transcription_wprompt = await client.audio.transcriptions.create(
53-
model=model_name,
54-
file=mary_had_lamb,
55-
language="en",
56-
response_format="text",
57-
prompt=prompt,
58-
temperature=0.0)
59-
out_capital = json.loads(transcription_wprompt)['text']
60-
assert prompt not in out_capital
6150

6251

6352
@pytest.mark.asyncio
@@ -238,3 +227,31 @@ async def test_sampling_params(mary_had_lamb):
238227
extra_body=dict(seed=42))
239228

240229
assert greedy_transcription.text != transcription.text
230+
231+
232+
@pytest.mark.asyncio
233+
async def test_audio_prompt(mary_had_lamb):
234+
model_name = "openai/whisper-large-v3-turbo"
235+
server_args = ["--enforce-eager"]
236+
prompt = "This is a speech, recorded in a phonograph."
237+
with RemoteOpenAIServer(model_name, server_args) as remote_server:
238+
#Prompts should not omit the part of original prompt while transcribing.
239+
prefix = "The first words I spoke in the original phonograph"
240+
client = remote_server.get_async_client()
241+
transcription = await client.audio.transcriptions.create(
242+
model=model_name,
243+
file=mary_had_lamb,
244+
language="en",
245+
response_format="text",
246+
temperature=0.0)
247+
out = json.loads(transcription)['text']
248+
assert prefix in out
249+
transcription_wprompt = await client.audio.transcriptions.create(
250+
model=model_name,
251+
file=mary_had_lamb,
252+
language="en",
253+
response_format="text",
254+
prompt=prompt,
255+
temperature=0.0)
256+
out_prompt = json.loads(transcription_wprompt)['text']
257+
assert prefix in out_prompt

vllm/model_executor/models/whisper.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -780,8 +780,9 @@ def validate_language(cls, language: str) -> bool:
780780
@classmethod
781781
def get_decoder_prompt(cls, language: str, task_type: str,
782782
prompt: str) -> str:
783-
return (f"<|startoftranscript|><|{language}|><|{task_type}|>"
784-
f"<|notimestamps|>{prompt}")
783+
return ((f"<|prev|>{prompt}" if prompt else "") +
784+
f"<|startoftranscript|><|{language}|>" +
785+
f"<|{task_type}|><|notimestamps|>")
785786

786787
@classmethod
787788
def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]:

0 commit comments

Comments
 (0)