Skip to content

Commit 7717e24

Browse files
committed
[HWORKS-1885] Add vllm-openai deployment and support for config files
1 parent 84abef2 commit 7717e24

File tree

9 files changed

+49
-19
lines changed

9 files changed

+49
-19
lines changed

python/hsml/core/serving_api.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,4 @@ def _get_hopsworks_inference_path(self, project_id: int, deployment_instance):
419419
]
420420

421421
def _get_istio_inference_path(self, deployment_instance):
422-
if deployment_instance.model_server == "VLLM":
423-
return ["openai", "v1", "completions"]
424-
425422
return ["v1", "models", deployment_instance.name + ":predict"]

python/hsml/deployment.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,15 @@ def script_file(self):
415415
def script_file(self, script_file: str):
416416
self._predictor.script_file = script_file
417417

418+
@property
419+
def config_file(self):
420+
"""Config file passed to the predictor."""
421+
return self._predictor.config_file
422+
423+
@config_file.setter
424+
def config_file(self, config_file: str):
425+
self._predictor.config_file = config_file
426+
418427
@property
419428
def resources(self):
420429
"""Resource configuration for the predictor."""

python/hsml/engine/serving_engine.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -563,11 +563,13 @@ def predict(
563563
inputs: Union[Dict, List[Dict]],
564564
):
565565
# validate user-provided payload
566-
if deployment_instance.model_server != "VLLM":
567-
self._validate_inference_payload(
568-
deployment_instance.api_protocol, data, inputs
566+
if deployment_instance.model_server == PREDICTOR.MODEL_SERVER_VLLM:
567+
raise ModelServingException(
568+
"Inference requests to LLM deployments are not supported by the `predict` method. Please, use any OpenAI API-compatible client instead."
569569
)
570570

571+
self._validate_inference_payload(deployment_instance.api_protocol, data, inputs)
572+
571573
# build inference payload based on API protocol
572574
payload = self._build_inference_payload(
573575
deployment_instance.api_protocol, data, inputs

python/hsml/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ def deploy(
171171
artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE,
172172
serving_tool: Optional[str] = None,
173173
script_file: Optional[str] = None,
174+
config_file: Optional[str] = None,
174175
resources: Optional[Union[PredictorResources, dict]] = None,
175176
inference_logger: Optional[Union[InferenceLogger, dict]] = None,
176177
inference_batcher: Optional[Union[InferenceBatcher, dict]] = None,
@@ -202,6 +203,7 @@ def deploy(
202203
or `MODEL-ONLY` to reuse the shared artifact containing only the model files.
203204
serving_tool: Serving tool used to deploy the model server.
204205
script_file: Path to a custom predictor script implementing the Predict class.
206+
config_file: Server configuration file to be passed to the model deployment.
205207
resources: Resources to be allocated for the predictor.
206208
inference_logger: Inference logger configuration.
207209
inference_batcher: Inference batcher configuration.
@@ -223,6 +225,7 @@ def deploy(
223225
artifact_version=artifact_version,
224226
serving_tool=serving_tool,
225227
script_file=script_file,
228+
config_file=config_file,
226229
resources=resources,
227230
inference_logger=inference_logger,
228231
inference_batcher=inference_batcher,

python/hsml/model_serving.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@ def create_predictor(
160160
artifact_version: Optional[str] = ARTIFACT_VERSION.CREATE,
161161
serving_tool: Optional[str] = None,
162162
script_file: Optional[str] = None,
163+
config_file: Optional[str] = None,
163164
resources: Optional[Union[PredictorResources, dict]] = None,
164165
inference_logger: Optional[Union[InferenceLogger, dict, str]] = None,
165166
inference_batcher: Optional[Union[InferenceBatcher, dict]] = None,
@@ -197,6 +198,7 @@ def create_predictor(
197198
or `MODEL-ONLY` to reuse the shared artifact containing only the model files.
198199
serving_tool: Serving tool used to deploy the model server.
199200
script_file: Path to a custom predictor script implementing the Predict class.
201+
config_file: Server configuration file to be passed to the model deployment.
200202
resources: Resources to be allocated for the predictor.
201203
inference_logger: Inference logger configuration.
202204
inference_batcher: Inference batcher configuration.
@@ -216,6 +218,7 @@ def create_predictor(
216218
artifact_version=artifact_version,
217219
serving_tool=serving_tool,
218220
script_file=script_file,
221+
config_file=config_file,
219222
resources=resources,
220223
inference_logger=inference_logger,
221224
inference_batcher=inference_batcher,

python/hsml/predictor.py

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def __init__(
4848
model_server: str,
4949
serving_tool: Optional[str] = None,
5050
script_file: Optional[str] = None,
51+
config_file: Optional[str] = None,
5152
resources: Optional[Union[PredictorResources, dict, Default]] = None, # base
5253
inference_logger: Optional[
5354
Union[InferenceLogger, dict, Default]
@@ -87,6 +88,7 @@ def __init__(
8788
self._artifact_version = artifact_version
8889
self._serving_tool = serving_tool
8990
self._model_server = model_server
91+
self._config_file = config_file
9092
self._id = id
9193
self._description = description
9294
self._created_at = created_at
@@ -167,12 +169,9 @@ def _validate_serving_tool(cls, serving_tool):
167169

168170
@classmethod
169171
def _validate_script_file(cls, model_framework, script_file):
170-
if script_file is None and (
171-
model_framework == MODEL.FRAMEWORK_PYTHON
172-
or model_framework == MODEL.FRAMEWORK_LLM
173-
):
172+
if script_file is None and (model_framework == MODEL.FRAMEWORK_PYTHON):
174173
raise ValueError(
175-
"Predictor scripts are required in deployments for custom Python models and LLMs."
174+
"Predictor scripts are required in deployments for custom Python models."
176175
)
177176

178177
@classmethod
@@ -273,6 +272,9 @@ def extract_fields_from_json(cls, json_decamelized):
273272
kwargs["script_file"] = util.extract_field_from_json(
274273
json_decamelized, "predictor"
275274
)
275+
kwargs["config_file"] = util.extract_field_from_json(
276+
json_decamelized, "config_file"
277+
)
276278
kwargs["resources"] = PredictorResources.from_json(json_decamelized)
277279
kwargs["inference_logger"] = InferenceLogger.from_json(json_decamelized)
278280
kwargs["inference_batcher"] = InferenceBatcher.from_json(json_decamelized)
@@ -311,6 +313,7 @@ def to_dict(self):
311313
"modelServer": self._model_server,
312314
"servingTool": self._serving_tool,
313315
"predictor": self._script_file,
316+
"configFile": self._config_file,
314317
"apiProtocol": self._api_protocol,
315318
"projectNamespace": self._project_namespace,
316319
}
@@ -442,6 +445,16 @@ def script_file(self, script_file: str):
442445
self._script_file = script_file
443446
self._artifact_version = ARTIFACT_VERSION.CREATE
444447

448+
@property
449+
def config_file(self):
450+
"""Server config file to be passed to the model deployment."""
451+
return self._config_file
452+
453+
@config_file.setter
454+
def config_file(self, config_file: str):
455+
self._config_file = config_file
456+
self._artifact_version = ARTIFACT_VERSION.CREATE
457+
445458
@property
446459
def inference_logger(self):
447460
"""Configuration of the inference logger attached to this predictor."""

python/tests/fixtures/predictor_fixtures.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
"artifact_version": 2,
2020
"predictor": "predictor_file",
2121
"transformer": "transformer_file",
22+
"config_file": "config_file",
2223
"requested_instances": 1,
2324
"requested_transformer_instances": 1,
2425
"predictor_resources": {
@@ -74,6 +75,7 @@
7475
"api_protocol": "REST",
7576
"artifact_version": 2,
7677
"predictor": "predictor_file",
78+
"config_file": "config_file",
7779
"transformer": "transformer_file",
7880
"requested_instances": 1,
7981
"requested_transformer_instances": 1,
@@ -117,6 +119,7 @@
117119
"api_protocol": "REST",
118120
"artifact_version": 3,
119121
"predictor": "predictor_file",
122+
"config_file": "config_file",
120123
"transformer": "transformer_file",
121124
"requested_instances": 1,
122125
"requested_transformer_instances": 1,

python/tests/test_model.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def test_deploy(self, mocker, backend_fixtures):
211211
artifact_version=p_json["artifact_version"],
212212
serving_tool=p_json["serving_tool"],
213213
script_file=p_json["predictor"],
214+
config_file=p_json["config_file"],
214215
resources=resources,
215216
inference_logger=inference_logger,
216217
inference_batcher=inference_batcher,
@@ -227,6 +228,7 @@ def test_deploy(self, mocker, backend_fixtures):
227228
artifact_version=p_json["artifact_version"],
228229
serving_tool=p_json["serving_tool"],
229230
script_file=p_json["predictor"],
231+
config_file=p_json["config_file"],
230232
resources=resources,
231233
inference_logger=inference_logger,
232234
inference_batcher=inference_batcher,

python/tests/test_predictor.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,7 @@ def test_from_response_json_singleton(self, mocker, backend_fixtures):
8080
assert p.artifact_version == p_json["artifact_version"]
8181
assert p.environment == p_json["environment_dto"]["name"]
8282
assert p.script_file == p_json["predictor"]
83+
assert p.config_file == p_json["config_file"]
8384
assert isinstance(p.resources, resources.PredictorResources)
8485
assert isinstance(p.transformer, transformer.Transformer)
8586
assert p.transformer.script_file == p_json["transformer"]
@@ -123,6 +124,7 @@ def test_from_response_json_list(self, mocker, backend_fixtures):
123124
assert p.environment == p_json["environment_dto"]["name"]
124125
assert p.artifact_version == p_json["artifact_version"]
125126
assert p.script_file == p_json["predictor"]
127+
assert p.config_file == p_json["config_file"]
126128
assert isinstance(p.resources, resources.PredictorResources)
127129
assert isinstance(p.transformer, transformer.Transformer)
128130
assert p.transformer.script_file == p_json["transformer"]
@@ -161,6 +163,7 @@ def test_from_response_json_single(self, mocker, backend_fixtures):
161163
assert p.environment == p_json["environment_dto"]["name"]
162164
assert p.artifact_version == p_json["artifact_version"]
163165
assert p.script_file == p_json["predictor"]
166+
assert p.config_file == p_json["config_file"]
164167
assert isinstance(p.resources, resources.PredictorResources)
165168
assert isinstance(p.transformer, transformer.Transformer)
166169
assert p.transformer.script_file == p_json["transformer"]
@@ -213,6 +216,7 @@ def test_constructor(self, mocker, backend_fixtures):
213216
environment=p_json["environment_dto"]["name"],
214217
artifact_version=p_json["artifact_version"],
215218
script_file=p_json["predictor"],
219+
config_file=p_json["config_file"],
216220
resources=p_json["predictor_resources"],
217221
transformer={
218222
"script_file": p_json["transformer"],
@@ -241,6 +245,7 @@ def test_constructor(self, mocker, backend_fixtures):
241245
assert p.environment == p_json["environment_dto"]["name"]
242246
assert p.artifact_version == p_json["artifact_version"]
243247
assert p.script_file == p_json["predictor"]
248+
assert p.config_file == p_json["config_file"]
244249
assert isinstance(p.resources, resources.PredictorResources)
245250
assert isinstance(p.transformer, transformer.Transformer)
246251
assert p.transformer.script_file == p_json["transformer"]
@@ -340,14 +345,6 @@ def test_validate_script_file_py_none(self):
340345
# Assert
341346
assert "Predictor scripts are required" in str(e_info.value)
342347

343-
def test_validate_script_file_llm_none(self):
344-
# Act
345-
with pytest.raises(ValueError) as e_info:
346-
_ = predictor.Predictor._validate_script_file(MODEL.FRAMEWORK_LLM, None)
347-
348-
# Assert
349-
assert "Predictor scripts are required" in str(e_info.value)
350-
351348
def test_validate_script_file_tf_script_file(self):
352349
# Act
353350
predictor.Predictor._validate_script_file(
@@ -659,6 +656,7 @@ def extract_fields_from_json(self, mocker, backend_fixtures):
659656
assert kwargs["model_server"] == p_json["model_server"]
660657
assert kwargs["serving_tool"] == p_json["serving_tool"]
661658
assert kwargs["script_file"] == p_json["predictor"]
659+
assert kwargs["config_file"] == p_json["config_file"]
662660
assert isinstance(kwargs["resources"], resources.PredictorResources)
663661
assert isinstance(kwargs["inference_logger"], inference_logger.InferenceLogger)
664662
assert kwargs["inference_logger"].mode == p_json["inference_logging"]

0 commit comments

Comments
 (0)