Skip to content

Commit 8a588d1

Browse files
sangstarEta0
authored andcommitted
[Frontend] [Core] Integrate Tensorizer in to S3 loading machinery, allow passing arbitrary arguments during save/load (#19619)
Signed-off-by: Sanger Steel <sangersteel@gmail.com> Co-authored-by: Eta <esyra@coreweave.com>
1 parent d78f9c8 commit 8a588d1

File tree

18 files changed

+807
-189
lines changed

18 files changed

+807
-189
lines changed

examples/others/tensorize_vllm_model.py

Lines changed: 83 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import argparse
55
import dataclasses
66
import json
7+
import logging
78
import os
89
import uuid
910

@@ -15,9 +16,13 @@
1516
TensorizerConfig,
1617
tensorize_lora_adapter,
1718
tensorize_vllm_model,
19+
tensorizer_kwargs_arg,
1820
)
1921
from vllm.utils import FlexibleArgumentParser
2022

23+
logger = logging.getLogger()
24+
25+
2126
# yapf conflicts with isort for this docstring
2227
# yapf: disable
2328
"""
@@ -119,7 +124,7 @@
119124
"""
120125

121126

122-
def parse_args():
127+
def get_parser():
123128
parser = FlexibleArgumentParser(
124129
description="An example script that can be used to serialize and "
125130
"deserialize vLLM models. These models "
@@ -135,13 +140,13 @@ def parse_args():
135140
required=False,
136141
help="Path to a LoRA adapter to "
137142
"serialize along with model tensors. This can then be deserialized "
138-
"along with the model by passing a tensorizer_config kwarg to "
139-
"LoRARequest with type TensorizerConfig. See the docstring for this "
140-
"for a usage example."
141-
143+
"along with the model by instantiating a TensorizerConfig object, "
144+
"creating a dict from it with TensorizerConfig.to_serializable(), "
145+
"and passing it to LoRARequest's initializer with the kwarg "
146+
"tensorizer_config_dict."
142147
)
143148

144-
subparsers = parser.add_subparsers(dest='command')
149+
subparsers = parser.add_subparsers(dest='command', required=True)
145150

146151
serialize_parser = subparsers.add_parser(
147152
'serialize', help="Serialize a model to `--serialized-directory`")
@@ -171,6 +176,14 @@ def parse_args():
171176
"where `suffix` is given by `--suffix` or a random UUID if not "
172177
"provided.")
173178

179+
serialize_parser.add_argument(
180+
"--serialization-kwargs",
181+
type=tensorizer_kwargs_arg,
182+
required=False,
183+
help=("A JSON string containing additional keyword arguments to "
184+
"pass to Tensorizer's TensorSerializer during "
185+
"serialization."))
186+
174187
serialize_parser.add_argument(
175188
"--keyfile",
176189
type=str,
@@ -186,21 +199,45 @@ def parse_args():
186199
deserialize_parser.add_argument(
187200
"--path-to-tensors",
188201
type=str,
189-
required=True,
202+
required=False,
190203
help="The local path or S3 URI to the model tensors to deserialize. ")
191204

205+
deserialize_parser.add_argument(
206+
"--serialized-directory",
207+
type=str,
208+
required=False,
209+
help="Directory with model artifacts for loading. Assumes a "
210+
"model.tensors file exists therein. Can supersede "
211+
"--path-to-tensors.")
212+
192213
deserialize_parser.add_argument(
193214
"--keyfile",
194215
type=str,
195216
required=False,
196217
help=("Path to a binary key to use to decrypt the model weights,"
197218
" if the model was serialized with encryption"))
198219

199-
TensorizerArgs.add_cli_args(deserialize_parser)
220+
deserialize_parser.add_argument(
221+
"--deserialization-kwargs",
222+
type=tensorizer_kwargs_arg,
223+
required=False,
224+
help=("A JSON string containing additional keyword arguments to "
225+
"pass to Tensorizer's `TensorDeserializer` during "
226+
"deserialization."))
200227

201-
return parser.parse_args()
228+
TensorizerArgs.add_cli_args(deserialize_parser)
202229

230+
return parser
203231

232+
def merge_extra_config_with_tensorizer_config(extra_cfg: dict,
233+
cfg: TensorizerConfig):
234+
for k, v in extra_cfg.items():
235+
if hasattr(cfg, k):
236+
setattr(cfg, k, v)
237+
logger.info(
238+
"Updating TensorizerConfig with %s from "
239+
"--model-loader-extra-config provided", k
240+
)
204241

205242
def deserialize(args, tensorizer_config):
206243
if args.lora_path:
@@ -230,7 +267,8 @@ def deserialize(args, tensorizer_config):
230267
lora_request=LoRARequest("sql-lora",
231268
1,
232269
args.lora_path,
233-
tensorizer_config = tensorizer_config)
270+
tensorizer_config_dict = tensorizer_config
271+
.to_serializable())
234272
)
235273
)
236274
else:
@@ -243,7 +281,8 @@ def deserialize(args, tensorizer_config):
243281

244282

245283
def main():
246-
args = parse_args()
284+
parser = get_parser()
285+
args = parser.parse_args()
247286

248287
s3_access_key_id = (getattr(args, 's3_access_key_id', None)
249288
or os.environ.get("S3_ACCESS_KEY_ID", None))
@@ -265,13 +304,24 @@ def main():
265304
else:
266305
keyfile = None
267306

307+
extra_config = {}
268308
if args.model_loader_extra_config:
269-
config = json.loads(args.model_loader_extra_config)
270-
tensorizer_args = \
271-
TensorizerConfig(**config)._construct_tensorizer_args()
272-
tensorizer_args.tensorizer_uri = args.path_to_tensors
273-
else:
274-
tensorizer_args = None
309+
extra_config = json.loads(args.model_loader_extra_config)
310+
311+
312+
tensorizer_dir = (args.serialized_directory or
313+
extra_config.get("tensorizer_dir"))
314+
tensorizer_uri = (getattr(args, "path_to_tensors", None)
315+
or extra_config.get("tensorizer_uri"))
316+
317+
if tensorizer_dir and tensorizer_uri:
318+
parser.error("--serialized-directory and --path-to-tensors "
319+
"cannot both be provided")
320+
321+
if not tensorizer_dir and not tensorizer_uri:
322+
parser.error("Either --serialized-directory or --path-to-tensors "
323+
"must be provided")
324+
275325

276326
if args.command == "serialize":
277327
eng_args_dict = {f.name: getattr(args, f.name) for f in
@@ -281,7 +331,7 @@ def main():
281331
argparse.Namespace(**eng_args_dict)
282332
)
283333

284-
input_dir = args.serialized_directory.rstrip('/')
334+
input_dir = tensorizer_dir.rstrip('/')
285335
suffix = args.suffix if args.suffix else uuid.uuid4().hex
286336
base_path = f"{input_dir}/vllm/{model_ref}/{suffix}"
287337
if engine_args.tensor_parallel_size > 1:
@@ -292,21 +342,29 @@ def main():
292342
tensorizer_config = TensorizerConfig(
293343
tensorizer_uri=model_path,
294344
encryption_keyfile=keyfile,
295-
**credentials)
345+
serialization_kwargs=args.serialization_kwargs or {},
346+
**credentials
347+
)
296348

297349
if args.lora_path:
298350
tensorizer_config.lora_dir = tensorizer_config.tensorizer_dir
299351
tensorize_lora_adapter(args.lora_path, tensorizer_config)
300352

353+
merge_extra_config_with_tensorizer_config(extra_config,
354+
tensorizer_config)
301355
tensorize_vllm_model(engine_args, tensorizer_config)
302356

303357
elif args.command == "deserialize":
304-
if not tensorizer_args:
305-
tensorizer_config = TensorizerConfig(
306-
tensorizer_uri=args.path_to_tensors,
307-
encryption_keyfile = keyfile,
308-
**credentials
309-
)
358+
tensorizer_config = TensorizerConfig(
359+
tensorizer_uri=args.path_to_tensors,
360+
tensorizer_dir=args.serialized_directory,
361+
encryption_keyfile=keyfile,
362+
deserialization_kwargs=args.deserialization_kwargs or {},
363+
**credentials
364+
)
365+
366+
merge_extra_config_with_tensorizer_config(extra_config,
367+
tensorizer_config)
310368
deserialize(args, tensorizer_config)
311369
else:
312370
raise ValueError("Either serialize or deserialize must be specified.")

requirements/nightly_torch_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# testing
22
pytest
3-
tensorizer>=2.9.0
3+
tensorizer==2.10.1
44
pytest-forked
55
pytest-asyncio
66
pytest-rerunfailures

requirements/rocm.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ datasets
1111
ray>=2.10.0,<2.45.0
1212
peft
1313
pytest-asyncio
14-
tensorizer>=2.9.0
14+
tensorizer==2.10.1
1515
packaging>=24.2
1616
setuptools>=77.0.3,<80.0.0
1717
setuptools-scm>=8

requirements/test.in

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# testing
22
pytest
3-
tensorizer>=2.9.0
3+
tensorizer==2.10.1
44
pytest-forked
55
pytest-asyncio
66
pytest-rerunfailures

requirements/test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ tenacity==9.0.0
741741
# via
742742
# lm-eval
743743
# plotly
744-
tensorizer==2.9.0
744+
tensorizer==2.10.1
745745
# via -r requirements/test.in
746746
threadpoolctl==3.5.0
747747
# via scikit-learn

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def _read_requirements(filename: str) -> list[str]:
689689
install_requires=get_requirements(),
690690
extras_require={
691691
"bench": ["pandas", "datasets"],
692-
"tensorizer": ["tensorizer>=2.9.0"],
692+
"tensorizer": ["tensorizer==2.10.1"],
693693
"fastsafetensors": ["fastsafetensors >= 0.1.10"],
694694
"runai": ["runai-model-streamer", "runai-model-streamer-s3", "boto3"],
695695
"audio": ["librosa", "soundfile"], # Required for audio processing

tests/entrypoints/openai/test_tensorizer_entrypoint.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import gc
4-
import json
4+
import os
55
import tempfile
66

77
import openai
@@ -58,18 +58,20 @@ def tensorize_model_and_lora(tmp_dir, model_uri):
5858

5959
@pytest.fixture(scope="module")
6060
def server(model_uri, tensorize_model_and_lora):
61-
model_loader_extra_config = {
62-
"tensorizer_uri": model_uri,
63-
}
61+
# In this case, model_uri is a directory with a model.tensors
62+
# file and all necessary model artifacts, particularly a
63+
# HF `config.json` file. In this case, Tensorizer can infer the
64+
# `TensorizerConfig` so --model-loader-extra-config can be completely
65+
# omitted.
6466

6567
## Start OpenAI API server
6668
args = [
67-
"--load-format", "tensorizer", "--device", "cuda",
68-
"--model-loader-extra-config",
69-
json.dumps(model_loader_extra_config), "--enable-lora"
69+
"--load-format", "tensorizer", "--served-model-name", MODEL_NAME,
70+
"--enable-lora"
7071
]
7172

72-
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server:
73+
model_dir = os.path.dirname(model_uri)
74+
with RemoteOpenAIServer(model_dir, args) as remote_server:
7375
yield remote_server
7476

7577

tests/lora/test_llama_tp.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,8 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
169169
f"{VLLM_PATH}/examples/others/tensorize_vllm_model.py", "--model",
170170
MODEL_PATH, "--lora-path", lora_path, "--tensor-parallel-size",
171171
str(tp_size), "serialize", "--serialized-directory",
172-
str(tmp_path), "--suffix", suffix
172+
str(tmp_path), "--suffix", suffix, "--serialization-kwargs",
173+
'{"limit_cpu_concurrency": 4}'
173174
],
174175
check=True,
175176
capture_output=True,
@@ -195,7 +196,7 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files,
195196
tensor_parallel_size=2,
196197
max_loras=2)
197198

198-
tensorizer_config_dict = tensorizer_config.to_dict()
199+
tensorizer_config_dict = tensorizer_config.to_serializable()
199200

200201
print("lora adapter created")
201202
assert do_sample(loaded_vllm_model,

0 commit comments

Comments
 (0)