Skip to content

Commit ad6b764

Browse files
authored
handle update endpoint errors (#414)
* handle invalid values errors * update test cases * fix when gpus = 0 * fix when gpu_type is None * throw explicit 500 for EndpointInfraStateNotFound
1 parent c4b8bd1 commit ad6b764

File tree

3 files changed

+328
-14
lines changed

3 files changed

+328
-14
lines changed

model-engine/model_engine_server/api/model_endpoints_v1.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
from model_engine_server.core.loggers import logger_name, make_logger
2727
from model_engine_server.domain.exceptions import (
2828
EndpointDeleteFailedException,
29+
EndpointInfraStateNotFound,
2930
EndpointLabelsException,
3031
EndpointResourceInvalidRequestException,
3132
ExistingEndpointOperationInProgressException,
@@ -67,14 +68,11 @@ async def create_model_endpoint(
6768
status_code=400,
6869
detail="The specified model endpoint already exists.",
6970
) from exc
70-
except EndpointLabelsException as exc:
71-
raise HTTPException(
72-
status_code=400,
73-
detail=str(exc),
74-
) from exc
75-
except ObjectHasInvalidValueException as exc:
76-
raise HTTPException(status_code=400, detail=str(exc))
77-
except EndpointResourceInvalidRequestException as exc:
71+
except (
72+
EndpointLabelsException,
73+
ObjectHasInvalidValueException,
74+
EndpointResourceInvalidRequestException,
75+
) as exc:
7876
raise HTTPException(
7977
status_code=400,
8078
detail=str(exc),
@@ -148,7 +146,11 @@ async def update_model_endpoint(
148146
return await use_case.execute(
149147
user=auth, model_endpoint_id=model_endpoint_id, request=request
150148
)
151-
except EndpointLabelsException as exc:
149+
except (
150+
EndpointLabelsException,
151+
ObjectHasInvalidValueException,
152+
EndpointResourceInvalidRequestException,
153+
) as exc:
152154
raise HTTPException(
153155
status_code=400,
154156
detail=str(exc),
@@ -163,6 +165,11 @@ async def update_model_endpoint(
163165
status_code=409,
164166
detail="Existing operation on endpoint in progress, try again later.",
165167
) from exc
168+
except EndpointInfraStateNotFound as exc:
169+
raise HTTPException(
170+
status_code=500,
171+
detail="Endpoint infra state not found, try again later.",
172+
) from exc
166173

167174

168175
@model_endpoint_router_v1.delete(

model-engine/model_engine_server/domain/use_cases/model_endpoint_use_cases.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -362,13 +362,22 @@ async def execute(
362362
# infra_state to make sure that after the update, all resources are valid and in sync.
363363
# E.g. If user only want to update gpus and leave gpu_type as None, we use the existing gpu_type
364364
# from infra_state to avoid passing in None to validate_resource_requests.
365+
raw_request = request.dict(exclude_unset=True)
365366
validate_resource_requests(
366367
bundle=bundle,
367-
cpus=request.cpus or infra_state.resource_state.cpus,
368-
memory=request.memory or infra_state.resource_state.memory,
369-
storage=request.storage or infra_state.resource_state.storage,
370-
gpus=request.gpus or infra_state.resource_state.gpus,
371-
gpu_type=request.gpu_type or infra_state.resource_state.gpu_type,
368+
cpus=(request.cpus if "cpus" in raw_request else infra_state.resource_state.cpus),
369+
memory=(
370+
request.memory if "memory" in raw_request else infra_state.resource_state.memory
371+
),
372+
storage=(
373+
request.storage if "storage" in raw_request else infra_state.resource_state.storage
374+
),
375+
gpus=(request.gpus if "gpus" in raw_request else infra_state.resource_state.gpus),
376+
gpu_type=(
377+
request.gpu_type
378+
if "gpu_type" in raw_request
379+
else infra_state.resource_state.gpu_type
380+
),
372381
)
373382

374383
validate_deployment_resources(

model-engine/tests/unit/domain/test_model_endpoint_use_cases.py

Lines changed: 298 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
ObjectNotFoundException,
2828
)
2929
from model_engine_server.domain.use_cases.model_endpoint_use_cases import (
30+
CONVERTED_FROM_ARTIFACT_LIKE_KEY,
3031
CreateModelEndpointV1UseCase,
3132
DeleteModelEndpointByIdV1UseCase,
3233
GetModelEndpointByIdV1UseCase,
@@ -855,6 +856,303 @@ async def test_update_model_endpoint_team_success(
855856
assert isinstance(response, UpdateModelEndpointV1Response)
856857

857858

859+
@pytest.mark.asyncio
860+
async def test_update_model_endpoint_use_case_raises_invalid_value_exception(
861+
fake_model_bundle_repository,
862+
fake_model_endpoint_service,
863+
model_bundle_2: ModelBundle,
864+
model_endpoint_1: ModelEndpoint,
865+
update_model_endpoint_request: UpdateModelEndpointV1Request,
866+
):
867+
fake_model_bundle_repository.add_model_bundle(model_bundle_2)
868+
fake_model_endpoint_service.add_model_endpoint(model_endpoint_1)
869+
fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository
870+
use_case = UpdateModelEndpointByIdV1UseCase(
871+
model_bundle_repository=fake_model_bundle_repository,
872+
model_endpoint_service=fake_model_endpoint_service,
873+
)
874+
user_id = model_endpoint_1.record.created_by
875+
user = User(user_id=user_id, team_id=user_id, is_privileged_user=True)
876+
877+
request = update_model_endpoint_request.copy()
878+
request.metadata = {CONVERTED_FROM_ARTIFACT_LIKE_KEY: False}
879+
with pytest.raises(ObjectHasInvalidValueException):
880+
await use_case.execute(
881+
user=user,
882+
model_endpoint_id=model_endpoint_1.record.id,
883+
request=request,
884+
)
885+
886+
887+
@pytest.mark.asyncio
888+
async def test_update_model_endpoint_use_case_raises_resource_request_exception(
889+
fake_model_bundle_repository,
890+
fake_model_endpoint_service,
891+
model_bundle_1: ModelBundle,
892+
model_bundle_2: ModelBundle,
893+
model_bundle_4: ModelBundle,
894+
model_bundle_6: ModelBundle,
895+
model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage: ModelBundle,
896+
model_endpoint_1: ModelEndpoint,
897+
model_endpoint_2: ModelEndpoint,
898+
update_model_endpoint_request: UpdateModelEndpointV1Request,
899+
):
900+
fake_model_bundle_repository.add_model_bundle(model_bundle_1)
901+
fake_model_bundle_repository.add_model_bundle(model_bundle_2)
902+
fake_model_bundle_repository.add_model_bundle(model_bundle_4)
903+
fake_model_bundle_repository.add_model_bundle(model_bundle_6)
904+
fake_model_bundle_repository.add_model_bundle(
905+
model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage
906+
)
907+
fake_model_endpoint_service.add_model_endpoint(model_endpoint_1)
908+
fake_model_endpoint_service.add_model_endpoint(model_endpoint_2)
909+
fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository
910+
use_case = UpdateModelEndpointByIdV1UseCase(
911+
model_bundle_repository=fake_model_bundle_repository,
912+
model_endpoint_service=fake_model_endpoint_service,
913+
)
914+
user_id = model_endpoint_1.record.created_by
915+
user = User(user_id=user_id, team_id=user_id, is_privileged_user=True)
916+
917+
request = update_model_endpoint_request.copy()
918+
request.cpus = -1
919+
with pytest.raises(EndpointResourceInvalidRequestException):
920+
await use_case.execute(
921+
user=user,
922+
model_endpoint_id=model_endpoint_1.record.id,
923+
request=request,
924+
)
925+
926+
request = update_model_endpoint_request.copy()
927+
request.cpus = float("inf")
928+
with pytest.raises(EndpointResourceInvalidRequestException):
929+
await use_case.execute(
930+
user=user,
931+
model_endpoint_id=model_endpoint_1.record.id,
932+
request=request,
933+
)
934+
935+
request = update_model_endpoint_request.copy()
936+
request.memory = "invalid_memory_amount"
937+
with pytest.raises(EndpointResourceInvalidRequestException):
938+
await use_case.execute(
939+
user=user,
940+
model_endpoint_id=model_endpoint_1.record.id,
941+
request=request,
942+
)
943+
944+
request = update_model_endpoint_request.copy()
945+
request.memory = float("inf")
946+
with pytest.raises(EndpointResourceInvalidRequestException):
947+
await use_case.execute(
948+
user=user,
949+
model_endpoint_id=model_endpoint_1.record.id,
950+
request=request,
951+
)
952+
953+
request = update_model_endpoint_request.copy()
954+
request.storage = "invalid_storage_amount"
955+
with pytest.raises(EndpointResourceInvalidRequestException):
956+
await use_case.execute(
957+
user=user,
958+
model_endpoint_id=model_endpoint_1.record.id,
959+
request=request,
960+
)
961+
962+
request = update_model_endpoint_request.copy()
963+
request.storage = float("inf")
964+
with pytest.raises(EndpointResourceInvalidRequestException):
965+
await use_case.execute(
966+
user=user,
967+
model_endpoint_id=model_endpoint_1.record.id,
968+
request=request,
969+
)
970+
971+
# specific to sync endpoint
972+
request = update_model_endpoint_request.copy()
973+
request.min_workers = 0
974+
with pytest.raises(EndpointResourceInvalidRequestException):
975+
await use_case.execute(
976+
user=user,
977+
model_endpoint_id=model_endpoint_2.record.id,
978+
request=request,
979+
)
980+
981+
request = update_model_endpoint_request.copy()
982+
request.max_workers = 2**63
983+
with pytest.raises(EndpointResourceInvalidRequestException):
984+
await use_case.execute(
985+
user=user,
986+
model_endpoint_id=model_endpoint_1.record.id,
987+
request=request,
988+
)
989+
990+
request = update_model_endpoint_request.copy()
991+
request.gpus = 0
992+
with pytest.raises(EndpointResourceInvalidRequestException):
993+
await use_case.execute(
994+
user=user,
995+
model_endpoint_id=model_endpoint_1.record.id,
996+
request=request,
997+
)
998+
999+
request = update_model_endpoint_request.copy()
1000+
request.gpu_type = None
1001+
with pytest.raises(EndpointResourceInvalidRequestException):
1002+
await use_case.execute(
1003+
user=user,
1004+
model_endpoint_id=model_endpoint_1.record.id,
1005+
request=request,
1006+
)
1007+
1008+
request = update_model_endpoint_request.copy()
1009+
request.gpu_type = "invalid_gpu_type"
1010+
with pytest.raises(EndpointResourceInvalidRequestException):
1011+
await use_case.execute(
1012+
user=user,
1013+
model_endpoint_id=model_endpoint_1.record.id,
1014+
request=request,
1015+
)
1016+
1017+
instance_limits = REQUESTS_BY_GPU_TYPE[model_endpoint_1.infra_state.resource_state.gpu_type]
1018+
1019+
request = update_model_endpoint_request.copy()
1020+
request.model_bundle_id = model_bundle_1.id
1021+
# Test that request.cpus + FORWARDER_CPU_USAGE > instance_limits["cpus"] should fail
1022+
request.cpus = instance_limits["cpus"]
1023+
with pytest.raises(EndpointResourceInvalidRequestException):
1024+
await use_case.execute(
1025+
user=user,
1026+
model_endpoint_id=model_endpoint_1.record.id,
1027+
request=request,
1028+
)
1029+
1030+
request = update_model_endpoint_request.copy()
1031+
request.model_bundle_id = model_bundle_1.id
1032+
# Test that request.memory + FORWARDER_MEMORY_USAGE > instance_limits["memory"] should fail
1033+
request.memory = instance_limits["memory"]
1034+
with pytest.raises(EndpointResourceInvalidRequestException):
1035+
await use_case.execute(
1036+
user=user,
1037+
model_endpoint_id=model_endpoint_1.record.id,
1038+
request=request,
1039+
)
1040+
1041+
request = update_model_endpoint_request.copy()
1042+
request.model_bundle_id = model_bundle_1.id
1043+
# Test that request.storage + FORWARDER_STORAGE_USAGE > STORAGE_LIMIT should fail
1044+
request.storage = STORAGE_LIMIT
1045+
with pytest.raises(EndpointResourceInvalidRequestException):
1046+
await use_case.execute(
1047+
user=user,
1048+
model_endpoint_id=model_endpoint_1.record.id,
1049+
request=request,
1050+
)
1051+
1052+
request = update_model_endpoint_request.copy()
1053+
request.model_bundle_id = model_bundle_4.id
1054+
# Test that request.cpus + FORWARDER_CPU_USAGE > instance_limits["cpus"] should fail
1055+
request.cpus = instance_limits["cpus"]
1056+
with pytest.raises(EndpointResourceInvalidRequestException):
1057+
await use_case.execute(
1058+
user=user,
1059+
model_endpoint_id=model_endpoint_1.record.id,
1060+
request=request,
1061+
)
1062+
1063+
request = update_model_endpoint_request.copy()
1064+
request.model_bundle_id = model_bundle_4.id
1065+
# Test that request.memory + FORWARDER_MEMORY_USAGE > instance_limits["memory"] should fail
1066+
request.memory = instance_limits["memory"]
1067+
with pytest.raises(EndpointResourceInvalidRequestException):
1068+
await use_case.execute(
1069+
user=user,
1070+
model_endpoint_id=model_endpoint_1.record.id,
1071+
request=request,
1072+
)
1073+
1074+
request = update_model_endpoint_request.copy()
1075+
request.model_bundle_id = model_bundle_4.id
1076+
# Test that request.storage + FORWARDER_STORAGE_USAGE > STORAGE_LIMIT should fail
1077+
request.storage = STORAGE_LIMIT
1078+
with pytest.raises(EndpointResourceInvalidRequestException):
1079+
await use_case.execute(
1080+
user=user,
1081+
model_endpoint_id=model_endpoint_1.record.id,
1082+
request=request,
1083+
)
1084+
1085+
# Test TritonEnhancedRunnableImageFlavor specific validation logic
1086+
request = update_model_endpoint_request.copy()
1087+
request.model_bundle_id = model_bundle_6.id
1088+
# TritonEnhancedRunnableImageFlavor requires gpu >= 1
1089+
request.gpus = 0.9
1090+
with pytest.raises(EndpointResourceInvalidRequestException):
1091+
await use_case.execute(
1092+
user=user,
1093+
model_endpoint_id=model_endpoint_1.record.id,
1094+
request=request,
1095+
)
1096+
1097+
request = update_model_endpoint_request.copy()
1098+
request.model_bundle_id = model_bundle_6.id
1099+
# TritonEnhancedRunnableImageFlavor requires gpu_type be specified
1100+
request.gpu_type = None
1101+
with pytest.raises(EndpointResourceInvalidRequestException):
1102+
await use_case.execute(
1103+
user=user,
1104+
model_endpoint_id=model_endpoint_1.record.id,
1105+
request=request,
1106+
)
1107+
1108+
request = update_model_endpoint_request.copy()
1109+
request.model_bundle_id = model_bundle_6.id
1110+
# Test that request.cpus + FORWARDER_CPU_USAGE + triton_num_cpu > instance_limits["cpu"] should fail
1111+
request.cpus = instance_limits["cpus"] - FORWARDER_CPU_USAGE
1112+
with pytest.raises(EndpointResourceInvalidRequestException):
1113+
await use_case.execute(
1114+
user=user,
1115+
model_endpoint_id=model_endpoint_1.record.id,
1116+
request=request,
1117+
)
1118+
1119+
request = update_model_endpoint_request.copy()
1120+
request.model_bundle_id = model_bundle_6.id
1121+
# Test that request.memory + FORWARDER_MEMORY_USAGE + triton_memory > instance_limits["memory"] should fail
1122+
request.memory = parse_mem_request(instance_limits["memory"]) - parse_mem_request(
1123+
FORWARDER_MEMORY_USAGE
1124+
)
1125+
with pytest.raises(EndpointResourceInvalidRequestException):
1126+
await use_case.execute(
1127+
user=user,
1128+
model_endpoint_id=model_endpoint_1.record.id,
1129+
request=request,
1130+
)
1131+
1132+
request = update_model_endpoint_request.copy()
1133+
request.model_bundle_id = model_bundle_6.id
1134+
# Test that request.storage + FORWARDER_STORAGE_USAGE + triton_storage > STORAGE_LIMIT should fail
1135+
request.storage = parse_mem_request(STORAGE_LIMIT) - parse_mem_request(FORWARDER_STORAGE_USAGE)
1136+
with pytest.raises(EndpointResourceInvalidRequestException):
1137+
await use_case.execute(
1138+
user=user,
1139+
model_endpoint_id=model_endpoint_1.record.id,
1140+
request=request,
1141+
)
1142+
1143+
request = update_model_endpoint_request.copy()
1144+
# Test triton_num_cpu >= 1
1145+
request.model_bundle_id = (
1146+
model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage.id
1147+
)
1148+
with pytest.raises(EndpointResourceInvalidRequestException):
1149+
await use_case.execute(
1150+
user=user,
1151+
model_endpoint_id=model_endpoint_1.record.id,
1152+
request=request,
1153+
)
1154+
1155+
8581156
@pytest.mark.asyncio
8591157
async def test_update_model_endpoint_raises_not_found(
8601158
fake_model_bundle_repository,

0 commit comments

Comments
 (0)