Skip to content

Commit afaf8fc

Browse files
author
Your Name
committed
git Merge branch 'main' into tanuj/beam
2 parents ef261d5 + 2d162c3 commit afaf8fc

File tree

1,895 files changed

+89032
-36504
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,895 files changed

+89032
-36504
lines changed

.buildkite/check-wheel-size.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23

34
import os
45
import sys
@@ -8,12 +9,12 @@
89
# Note that we have 400 MiB quota, please use it wisely.
910
# See https://github.com/pypi/support/issues/3792 .
1011
# Please also sync the value with the one in Dockerfile.
11-
VLLM_MAX_SIZE_MB = int(os.environ.get('VLLM_MAX_SIZE_MB', 400))
12+
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 400))
1213

1314

1415
def print_top_10_largest_files(zip_file):
1516
"""Print the top 10 largest files in the given zip file."""
16-
with zipfile.ZipFile(zip_file, 'r') as z:
17+
with zipfile.ZipFile(zip_file, "r") as z:
1718
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
1819
file_sizes.sort(key=lambda x: x[1], reverse=True)
1920
for f, size in file_sizes[:10]:
@@ -28,14 +29,18 @@ def check_wheel_size(directory):
2829
wheel_path = os.path.join(root, file_name)
2930
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
3031
if wheel_size_mb > VLLM_MAX_SIZE_MB:
31-
print(f"Not allowed: Wheel {wheel_path} is larger "
32-
f"({wheel_size_mb:.2f} MB) than the limit "
33-
f"({VLLM_MAX_SIZE_MB} MB).")
32+
print(
33+
f"Not allowed: Wheel {wheel_path} is larger "
34+
f"({wheel_size_mb:.2f} MB) than the limit "
35+
f"({VLLM_MAX_SIZE_MB} MB)."
36+
)
3437
print_top_10_largest_files(wheel_path)
3538
return 1
3639
else:
37-
print(f"Wheel {wheel_path} is within the allowed size "
38-
f"({wheel_size_mb:.2f} MB).")
40+
print(
41+
f"Wheel {wheel_path} is within the allowed size "
42+
f"({wheel_size_mb:.2f} MB)."
43+
)
3944
return 0
4045

4146

@@ -45,4 +50,4 @@ def check_wheel_size(directory):
4550
sys.exit(1)
4651

4752
directory = sys.argv[1]
48-
sys.exit(check_wheel_size(directory))
53+
sys.exit(check_wheel_size(directory))

.buildkite/generate_index.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23

34
import argparse
45
import os
@@ -22,5 +23,5 @@
2223
print(f"Generated index.html for {args.wheel}")
2324
# cloudfront requires escaping the '+' character
2425
f.write(
25-
template.format(wheel=filename,
26-
wheel_html_escaped=filename.replace("+", "%2B")))
26+
template.format(wheel=filename, wheel_html_escaped=filename.replace("+", "%2B"))
27+
)
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Llama-3.2-1B-Instruct-FP8 -b "auto" -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Llama-3.2-1B-Instruct-FP8"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.335
8+
- name: "exact_match,flexible-extract"
9+
value: 0.323
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m Qwen/Qwen2.5-1.5B-Instruct -b auto -l 1319 -f 5 -t 1
2+
model_name: "Qwen/Qwen2.5-1.5B-Instruct"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.54
8+
- name: "exact_match,flexible-extract"
9+
value: 0.59
10+
limit: 1319
11+
num_fewshot: 5
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic -b auto -l 1319 -f 5 -t 1
2+
model_name: "RedHatAI/Qwen2.5-VL-3B-Instruct-FP8-Dynamic"
3+
tasks:
4+
- name: "gsm8k"
5+
metrics:
6+
- name: "exact_match,strict-match"
7+
value: 0.47
8+
- name: "exact_match,flexible-extract"
9+
value: 0.64
10+
limit: 1319
11+
num_fewshot: 5

.buildkite/lm-eval-harness/configs/models-large.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ Meta-Llama-3-70B-Instruct.yaml
33
Mixtral-8x7B-Instruct-v0.1.yaml
44
Qwen2-57B-A14-Instruct.yaml
55
DeepSeek-V2-Lite-Chat.yaml
6+
Meta-Llama-3-8B-QQQ.yaml
Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,6 @@
1-
Meta-Llama-3-8B-Instruct.yaml
2-
Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
1+
Qwen2.5-1.5B-Instruct.yaml
32
Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
43
Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
54
Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
6-
Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
5+
Qwen2.5-VL-3B-Instruct-FP8-dynamic.yaml
76
Qwen1.5-MoE-W4A16-compressed-tensors.yaml
8-
Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
9-
Qwen2-1.5B-Instruct-FP8W8.yaml
10-
Meta-Llama-3-8B-QQQ.yaml
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3+
from pathlib import Path
4+
5+
import pytest
6+
7+
8+
def pytest_addoption(parser):
9+
parser.addoption(
10+
"--config-list-file",
11+
action="store",
12+
help="Path to the file listing model config YAMLs (one per line)",
13+
)
14+
parser.addoption(
15+
"--tp-size",
16+
action="store",
17+
default="1",
18+
help="Tensor parallel size to use for evaluation",
19+
)
20+
21+
22+
@pytest.fixture(scope="session")
23+
def config_list_file(pytestconfig, config_dir):
24+
rel_path = pytestconfig.getoption("--config-list-file")
25+
return config_dir / rel_path
26+
27+
28+
@pytest.fixture(scope="session")
29+
def tp_size(pytestconfig):
30+
return pytestconfig.getoption("--tp-size")
31+
32+
33+
def pytest_generate_tests(metafunc):
34+
if "config_filename" in metafunc.fixturenames:
35+
rel_path = metafunc.config.getoption("--config-list-file")
36+
config_list_file = Path(rel_path).resolve()
37+
config_dir = config_list_file.parent
38+
with open(config_list_file, encoding="utf-8") as f:
39+
configs = [
40+
config_dir / line.strip()
41+
for line in f
42+
if line.strip() and not line.startswith("#")
43+
]
44+
metafunc.parametrize("config_filename", configs)

.buildkite/lm-eval-harness/run-tests.sh

Lines changed: 0 additions & 59 deletions
This file was deleted.
Lines changed: 24 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,69 +1,55 @@
11
# SPDX-License-Identifier: Apache-2.0
2+
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
23
"""
34
LM eval harness on model to compare vs HF baseline computed offline.
45
Configs are found in configs/$MODEL.yaml
56
6-
* export LM_EVAL_TEST_DATA_FILE=configs/Meta-Llama-3-70B-Instruct.yaml
7-
* export LM_EVAL_TP_SIZE=4
8-
* pytest -s test_lm_eval_correctness.py
7+
pytest -s -v test_lm_eval_correctness.py \
8+
--config-list-file=configs/models-small.txt \
9+
--tp-size=1
910
"""
1011

11-
import os
12-
from pathlib import Path
13-
1412
import lm_eval
15-
import numpy
16-
import pytest
13+
import numpy as np
1714
import yaml
1815

1916
RTOL = 0.08
20-
TEST_DATA_FILE = os.environ.get(
21-
"LM_EVAL_TEST_DATA_FILE",
22-
".buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml")
23-
24-
TP_SIZE = os.environ.get("LM_EVAL_TP_SIZE", 1)
25-
2617

27-
def launch_lm_eval(eval_config):
28-
trust_remote_code = eval_config.get('trust_remote_code', False)
29-
30-
model_args = f"pretrained={eval_config['model_name']}," \
31-
f"tensor_parallel_size={TP_SIZE}," \
32-
f"add_bos_token=true," \
33-
f"trust_remote_code={trust_remote_code}"
3418

19+
def launch_lm_eval(eval_config, tp_size):
20+
trust_remote_code = eval_config.get("trust_remote_code", False)
21+
model_args = (
22+
f"pretrained={eval_config['model_name']},"
23+
f"tensor_parallel_size={tp_size},"
24+
f"enforce_eager=true,"
25+
f"add_bos_token=true,"
26+
f"trust_remote_code={trust_remote_code}"
27+
)
3528
results = lm_eval.simple_evaluate(
3629
model="vllm",
3730
model_args=model_args,
3831
tasks=[task["name"] for task in eval_config["tasks"]],
3932
num_fewshot=eval_config["num_fewshot"],
4033
limit=eval_config["limit"],
41-
batch_size="auto")
42-
34+
batch_size="auto",
35+
)
4336
return results
4437

4538

46-
def test_lm_eval_correctness():
47-
eval_config = yaml.safe_load(
48-
Path(TEST_DATA_FILE).read_text(encoding="utf-8"))
49-
50-
if eval_config[
51-
"model_name"] == "nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform": #noqa: E501
52-
pytest.skip("FBGEMM is currently failing on main.")
39+
def test_lm_eval_correctness_param(config_filename, tp_size):
40+
eval_config = yaml.safe_load(config_filename.read_text(encoding="utf-8"))
5341

54-
# Launch eval requests.
55-
results = launch_lm_eval(eval_config)
42+
results = launch_lm_eval(eval_config, tp_size)
5643

57-
# Confirm scores match ground truth.
5844
success = True
5945
for task in eval_config["tasks"]:
6046
for metric in task["metrics"]:
6147
ground_truth = metric["value"]
6248
measured_value = results["results"][task["name"]][metric["name"]]
63-
print(f'{task["name"]} | {metric["name"]}: '
64-
f'ground_truth={ground_truth} | measured={measured_value}')
65-
success = success and numpy.isclose(
66-
ground_truth, measured_value, rtol=RTOL)
49+
print(
50+
f"{task['name']} | {metric['name']}: "
51+
f"ground_truth={ground_truth} | measured={measured_value}"
52+
)
53+
success = success and np.isclose(ground_truth, measured_value, rtol=RTOL)
6754

68-
# Assert at the end, print all scores even on failure for debugging.
6955
assert success

0 commit comments

Comments
 (0)