Skip to content

Commit dcad324

Browse files
russellbnjhill
authored andcommitted
[V1] Add VLLM_ALLOW_INSECURE_SERIALIZATION env var (vllm-project#17490)
Signed-off-by: Russell Bryant <rbryant@redhat.com> Signed-off-by: Nick Hill <nhill@redhat.com> Co-authored-by: Nick Hill <nhill@redhat.com> Signed-off-by: Yuqi Zhang <yuqizhang@google.com>
1 parent aa73850 commit dcad324

File tree

4 files changed

+170
-115
lines changed

4 files changed

+170
-115
lines changed

tests/v1/entrypoints/llm/test_struct_output_generate.py

Lines changed: 50 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,9 @@ def test_structured_output(
105105
max_tokens=1000,
106106
guided_decoding=GuidedDecodingParams(json=sample_json_schema))
107107
outputs = llm.generate(prompts=[
108-
f"Give an example JSON for an employee profile "
109-
f"that fits this schema: {sample_json_schema}"
108+
(f"Give an example JSON for an employee profile that fits this "
109+
f"schema. Make the response as short as possible. Schema: "
110+
f"{sample_json_schema}")
110111
] * 2,
111112
sampling_params=sampling_params,
112113
use_tqdm=True)
@@ -136,7 +137,8 @@ def test_structured_output(
136137

137138
outputs = llm.generate(
138139
prompts=("Generate a JSON object with curly braces for a person with "
139-
"name and age fields for John Smith who is 31 years old."),
140+
"name and age fields for John Smith who is 31 years old. "
141+
"Make the response as short as possible."),
140142
sampling_params=sampling_params,
141143
use_tqdm=True)
142144

@@ -165,19 +167,20 @@ def test_structured_output(
165167
with pytest.raises(ValueError,
166168
match="The provided JSON schema contains features "
167169
"not supported by xgrammar."):
168-
llm.generate(prompts=[
169-
f"Give an example JSON for an employee profile "
170-
f"that fits this schema: {unsupported_json_schema}"
171-
] * 2,
172-
sampling_params=sampling_params,
173-
use_tqdm=True)
170+
llm.generate(
171+
prompts=[(f"Give an example JSON for an employee profile that "
172+
f"fits this schema: {unsupported_json_schema}. "
173+
f"Make the response as short as possible.")] * 2,
174+
sampling_params=sampling_params,
175+
use_tqdm=True)
174176
else:
175-
outputs = llm.generate(
176-
prompts=("Give an example JSON object for a grade "
177-
"that fits this schema: "
178-
f"{unsupported_json_schema}"),
179-
sampling_params=sampling_params,
180-
use_tqdm=True)
177+
outputs = llm.generate(prompts=(
178+
"Give an example JSON object for a grade "
179+
"that fits this schema: "
180+
f"{unsupported_json_schema}. Make the response as short as "
181+
"possible."),
182+
sampling_params=sampling_params,
183+
use_tqdm=True)
181184
assert outputs is not None
182185
for output in outputs:
183186
assert output is not None
@@ -199,8 +202,10 @@ def test_structured_output(
199202
max_tokens=1000,
200203
guided_decoding=GuidedDecodingParams(grammar=sample_sql_ebnf))
201204
outputs = llm.generate(
202-
prompts=("Generate a sql statement that selects col_1 from "
203-
"table_1 where it is equal to 1"),
205+
prompts=(
206+
"Generate a sql statement that selects col_1 from "
207+
"table_1 where it is equal to 1. Make the response as short as "
208+
"possible."),
204209
sampling_params=sampling_params,
205210
use_tqdm=True,
206211
)
@@ -231,8 +236,10 @@ def test_structured_output(
231236
max_tokens=1000,
232237
guided_decoding=GuidedDecodingParams(grammar=sample_sql_lark))
233238
outputs = llm.generate(
234-
prompts=("Generate a sql statement that selects col_1 from "
235-
"table_1 where it is equal to 1"),
239+
prompts=(
240+
"Generate a sql statement that selects col_1 from "
241+
"table_1 where it is equal to 1. Make the response as short as "
242+
"possible."),
236243
sampling_params=sampling_params,
237244
use_tqdm=True,
238245
)
@@ -269,8 +276,10 @@ def test_structured_output(
269276
guided_decoding=GuidedDecodingParams(grammar="not a grammar"))
270277
with pytest.raises(ValueError, match="Failed to convert the grammar "):
271278
llm.generate(
272-
prompts=("Generate a sql statement that selects col_1 from "
273-
"table_1 where it is equal to 1"),
279+
prompts=(
280+
"Generate a sql statement that selects col_1 from "
281+
"table_1 where it is equal to 1. Make the response as short "
282+
"as possible."),
274283
sampling_params=sampling_params,
275284
use_tqdm=True,
276285
)
@@ -284,7 +293,8 @@ def test_structured_output(
284293
guided_decoding=GuidedDecodingParams(regex=sample_regex))
285294
outputs = llm.generate(
286295
prompts=[
287-
f"Give an example IPv4 address with this regex: {sample_regex}"
296+
(f"Give an example IPv4 address with this regex: {sample_regex}. "
297+
f"Make the response as short as possible.")
288298
] * 2,
289299
sampling_params=sampling_params,
290300
use_tqdm=True,
@@ -309,7 +319,8 @@ def test_structured_output(
309319
top_p=0.95,
310320
guided_decoding=GuidedDecodingParams(choice=sample_guided_choice))
311321
outputs = llm.generate(
312-
prompts="The best language for type-safe systems programming is ",
322+
prompts=("The best language for type-safe systems programming is "
323+
"(Make the response as short as possible.) "),
313324
sampling_params=sampling_params,
314325
use_tqdm=True)
315326
assert outputs is not None
@@ -331,11 +342,12 @@ def test_structured_output(
331342
temperature=1.0,
332343
max_tokens=1000,
333344
guided_decoding=GuidedDecodingParams(json=json_schema))
334-
outputs = llm.generate(
335-
prompts="Generate a JSON with the brand, model and car_type of"
336-
"the most iconic car from the 90's",
337-
sampling_params=sampling_params,
338-
use_tqdm=True)
345+
outputs = llm.generate(prompts=(
346+
"Generate a JSON with the brand, model and car_type of the most "
347+
"iconic car from the 90's. Make the response as short as "
348+
"possible."),
349+
sampling_params=sampling_params,
350+
use_tqdm=True)
339351

340352
assert outputs is not None
341353

@@ -373,7 +385,8 @@ def test_structured_output(
373385
guided_decoding=GuidedDecodingParams(json=json_schema))
374386

375387
outputs = llm.generate(
376-
prompts="Generate a description of a frog using 50 characters.",
388+
prompts=("Generate a description of a frog using 50 characters. "
389+
"Make the response as short as possible."),
377390
sampling_params=sampling_params,
378391
use_tqdm=True)
379392

@@ -452,7 +465,8 @@ def test_structured_output(
452465
453466
You are a helpful assistant.
454467
455-
Given the previous instructions, what is the weather in New York City?
468+
Given the previous instructions, what is the weather in New York City? \
469+
Make the response as short as possible.
456470
"""
457471

458472
# Change this once other backends support structural_tag
@@ -509,9 +523,10 @@ def test_structured_output_auto_mode(
509523
max_tokens=1000,
510524
guided_decoding=GuidedDecodingParams(json=unsupported_json_schema))
511525

512-
prompts = ("Give an example JSON object for a grade "
513-
"that fits this schema: "
514-
f"{unsupported_json_schema}")
526+
prompts = (
527+
"Give an example JSON object for a grade "
528+
"that fits this schema: "
529+
f"{unsupported_json_schema}. Make the response as short as possible.")
515530
# This would fail with the default of "xgrammar", but in "auto"
516531
# we will handle fallback automatically.
517532
outputs = llm.generate(prompts=prompts,
@@ -566,7 +581,8 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch):
566581
prompt = (
567582
"<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a "
568583
"helpful assistant.<|im_end|>\n<|im_start|>user\nPlease generate a "
569-
"large JSON object with key-value pairs a1=b1, a2=b2, ..., a20=b20"
584+
"large JSON object with key-value pairs a1=b1, a2=b2, ..., a20=b20. "
585+
"Make the response as short as possible."
570586
"<|im_end|>\n<|im_start|>assistant\n")
571587

572588
def generate_with_backend(backend):

0 commit comments

Comments
 (0)