|
27 | 27 | ObjectNotFoundException,
|
28 | 28 | )
|
29 | 29 | from model_engine_server.domain.use_cases.model_endpoint_use_cases import (
|
| 30 | + CONVERTED_FROM_ARTIFACT_LIKE_KEY, |
30 | 31 | CreateModelEndpointV1UseCase,
|
31 | 32 | DeleteModelEndpointByIdV1UseCase,
|
32 | 33 | GetModelEndpointByIdV1UseCase,
|
@@ -855,6 +856,303 @@ async def test_update_model_endpoint_team_success(
|
855 | 856 | assert isinstance(response, UpdateModelEndpointV1Response)
|
856 | 857 |
|
857 | 858 |
|
| 859 | +@pytest.mark.asyncio |
| 860 | +async def test_update_model_endpoint_use_case_raises_invalid_value_exception( |
| 861 | + fake_model_bundle_repository, |
| 862 | + fake_model_endpoint_service, |
| 863 | + model_bundle_2: ModelBundle, |
| 864 | + model_endpoint_1: ModelEndpoint, |
| 865 | + update_model_endpoint_request: UpdateModelEndpointV1Request, |
| 866 | +): |
| 867 | + fake_model_bundle_repository.add_model_bundle(model_bundle_2) |
| 868 | + fake_model_endpoint_service.add_model_endpoint(model_endpoint_1) |
| 869 | + fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository |
| 870 | + use_case = UpdateModelEndpointByIdV1UseCase( |
| 871 | + model_bundle_repository=fake_model_bundle_repository, |
| 872 | + model_endpoint_service=fake_model_endpoint_service, |
| 873 | + ) |
| 874 | + user_id = model_endpoint_1.record.created_by |
| 875 | + user = User(user_id=user_id, team_id=user_id, is_privileged_user=True) |
| 876 | + |
| 877 | + request = update_model_endpoint_request.copy() |
| 878 | + request.metadata = {CONVERTED_FROM_ARTIFACT_LIKE_KEY: False} |
| 879 | + with pytest.raises(ObjectHasInvalidValueException): |
| 880 | + await use_case.execute( |
| 881 | + user=user, |
| 882 | + model_endpoint_id=model_endpoint_1.record.id, |
| 883 | + request=request, |
| 884 | + ) |
| 885 | + |
| 886 | + |
| 887 | +@pytest.mark.asyncio |
| 888 | +async def test_update_model_endpoint_use_case_raises_resource_request_exception( |
| 889 | + fake_model_bundle_repository, |
| 890 | + fake_model_endpoint_service, |
| 891 | + model_bundle_1: ModelBundle, |
| 892 | + model_bundle_2: ModelBundle, |
| 893 | + model_bundle_4: ModelBundle, |
| 894 | + model_bundle_6: ModelBundle, |
| 895 | + model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage: ModelBundle, |
| 896 | + model_endpoint_1: ModelEndpoint, |
| 897 | + model_endpoint_2: ModelEndpoint, |
| 898 | + update_model_endpoint_request: UpdateModelEndpointV1Request, |
| 899 | +): |
| 900 | + fake_model_bundle_repository.add_model_bundle(model_bundle_1) |
| 901 | + fake_model_bundle_repository.add_model_bundle(model_bundle_2) |
| 902 | + fake_model_bundle_repository.add_model_bundle(model_bundle_4) |
| 903 | + fake_model_bundle_repository.add_model_bundle(model_bundle_6) |
| 904 | + fake_model_bundle_repository.add_model_bundle( |
| 905 | + model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage |
| 906 | + ) |
| 907 | + fake_model_endpoint_service.add_model_endpoint(model_endpoint_1) |
| 908 | + fake_model_endpoint_service.add_model_endpoint(model_endpoint_2) |
| 909 | + fake_model_endpoint_service.model_bundle_repository = fake_model_bundle_repository |
| 910 | + use_case = UpdateModelEndpointByIdV1UseCase( |
| 911 | + model_bundle_repository=fake_model_bundle_repository, |
| 912 | + model_endpoint_service=fake_model_endpoint_service, |
| 913 | + ) |
| 914 | + user_id = model_endpoint_1.record.created_by |
| 915 | + user = User(user_id=user_id, team_id=user_id, is_privileged_user=True) |
| 916 | + |
| 917 | + request = update_model_endpoint_request.copy() |
| 918 | + request.cpus = -1 |
| 919 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 920 | + await use_case.execute( |
| 921 | + user=user, |
| 922 | + model_endpoint_id=model_endpoint_1.record.id, |
| 923 | + request=request, |
| 924 | + ) |
| 925 | + |
| 926 | + request = update_model_endpoint_request.copy() |
| 927 | + request.cpus = float("inf") |
| 928 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 929 | + await use_case.execute( |
| 930 | + user=user, |
| 931 | + model_endpoint_id=model_endpoint_1.record.id, |
| 932 | + request=request, |
| 933 | + ) |
| 934 | + |
| 935 | + request = update_model_endpoint_request.copy() |
| 936 | + request.memory = "invalid_memory_amount" |
| 937 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 938 | + await use_case.execute( |
| 939 | + user=user, |
| 940 | + model_endpoint_id=model_endpoint_1.record.id, |
| 941 | + request=request, |
| 942 | + ) |
| 943 | + |
| 944 | + request = update_model_endpoint_request.copy() |
| 945 | + request.memory = float("inf") |
| 946 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 947 | + await use_case.execute( |
| 948 | + user=user, |
| 949 | + model_endpoint_id=model_endpoint_1.record.id, |
| 950 | + request=request, |
| 951 | + ) |
| 952 | + |
| 953 | + request = update_model_endpoint_request.copy() |
| 954 | + request.storage = "invalid_storage_amount" |
| 955 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 956 | + await use_case.execute( |
| 957 | + user=user, |
| 958 | + model_endpoint_id=model_endpoint_1.record.id, |
| 959 | + request=request, |
| 960 | + ) |
| 961 | + |
| 962 | + request = update_model_endpoint_request.copy() |
| 963 | + request.storage = float("inf") |
| 964 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 965 | + await use_case.execute( |
| 966 | + user=user, |
| 967 | + model_endpoint_id=model_endpoint_1.record.id, |
| 968 | + request=request, |
| 969 | + ) |
| 970 | + |
| 971 | + # specific to sync endpoint |
| 972 | + request = update_model_endpoint_request.copy() |
| 973 | + request.min_workers = 0 |
| 974 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 975 | + await use_case.execute( |
| 976 | + user=user, |
| 977 | + model_endpoint_id=model_endpoint_2.record.id, |
| 978 | + request=request, |
| 979 | + ) |
| 980 | + |
| 981 | + request = update_model_endpoint_request.copy() |
| 982 | + request.max_workers = 2**63 |
| 983 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 984 | + await use_case.execute( |
| 985 | + user=user, |
| 986 | + model_endpoint_id=model_endpoint_1.record.id, |
| 987 | + request=request, |
| 988 | + ) |
| 989 | + |
| 990 | + request = update_model_endpoint_request.copy() |
| 991 | + request.gpus = 0 |
| 992 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 993 | + await use_case.execute( |
| 994 | + user=user, |
| 995 | + model_endpoint_id=model_endpoint_1.record.id, |
| 996 | + request=request, |
| 997 | + ) |
| 998 | + |
| 999 | + request = update_model_endpoint_request.copy() |
| 1000 | + request.gpu_type = None |
| 1001 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1002 | + await use_case.execute( |
| 1003 | + user=user, |
| 1004 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1005 | + request=request, |
| 1006 | + ) |
| 1007 | + |
| 1008 | + request = update_model_endpoint_request.copy() |
| 1009 | + request.gpu_type = "invalid_gpu_type" |
| 1010 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1011 | + await use_case.execute( |
| 1012 | + user=user, |
| 1013 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1014 | + request=request, |
| 1015 | + ) |
| 1016 | + |
| 1017 | + instance_limits = REQUESTS_BY_GPU_TYPE[model_endpoint_1.infra_state.resource_state.gpu_type] |
| 1018 | + |
| 1019 | + request = update_model_endpoint_request.copy() |
| 1020 | + request.model_bundle_id = model_bundle_1.id |
| 1021 | + # Test that request.cpus + FORWARDER_CPU_USAGE > instance_limits["cpus"] should fail |
| 1022 | + request.cpus = instance_limits["cpus"] |
| 1023 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1024 | + await use_case.execute( |
| 1025 | + user=user, |
| 1026 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1027 | + request=request, |
| 1028 | + ) |
| 1029 | + |
| 1030 | + request = update_model_endpoint_request.copy() |
| 1031 | + request.model_bundle_id = model_bundle_1.id |
| 1032 | + # Test that request.memory + FORWARDER_MEMORY_USAGE > instance_limits["memory"] should fail |
| 1033 | + request.memory = instance_limits["memory"] |
| 1034 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1035 | + await use_case.execute( |
| 1036 | + user=user, |
| 1037 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1038 | + request=request, |
| 1039 | + ) |
| 1040 | + |
| 1041 | + request = update_model_endpoint_request.copy() |
| 1042 | + request.model_bundle_id = model_bundle_1.id |
| 1043 | + # Test that request.storage + FORWARDER_STORAGE_USAGE > STORAGE_LIMIT should fail |
| 1044 | + request.storage = STORAGE_LIMIT |
| 1045 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1046 | + await use_case.execute( |
| 1047 | + user=user, |
| 1048 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1049 | + request=request, |
| 1050 | + ) |
| 1051 | + |
| 1052 | + request = update_model_endpoint_request.copy() |
| 1053 | + request.model_bundle_id = model_bundle_4.id |
| 1054 | + # Test that request.cpus + FORWARDER_CPU_USAGE > instance_limits["cpus"] should fail |
| 1055 | + request.cpus = instance_limits["cpus"] |
| 1056 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1057 | + await use_case.execute( |
| 1058 | + user=user, |
| 1059 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1060 | + request=request, |
| 1061 | + ) |
| 1062 | + |
| 1063 | + request = update_model_endpoint_request.copy() |
| 1064 | + request.model_bundle_id = model_bundle_4.id |
| 1065 | + # Test that request.memory + FORWARDER_MEMORY_USAGE > instance_limits["memory"] should fail |
| 1066 | + request.memory = instance_limits["memory"] |
| 1067 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1068 | + await use_case.execute( |
| 1069 | + user=user, |
| 1070 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1071 | + request=request, |
| 1072 | + ) |
| 1073 | + |
| 1074 | + request = update_model_endpoint_request.copy() |
| 1075 | + request.model_bundle_id = model_bundle_4.id |
| 1076 | + # Test that request.storage + FORWARDER_STORAGE_USAGE > STORAGE_LIMIT should fail |
| 1077 | + request.storage = STORAGE_LIMIT |
| 1078 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1079 | + await use_case.execute( |
| 1080 | + user=user, |
| 1081 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1082 | + request=request, |
| 1083 | + ) |
| 1084 | + |
| 1085 | + # Test TritonEnhancedRunnableImageFlavor specific validation logic |
| 1086 | + request = update_model_endpoint_request.copy() |
| 1087 | + request.model_bundle_id = model_bundle_6.id |
| 1088 | + # TritonEnhancedRunnableImageFlavor requires gpu >= 1 |
| 1089 | + request.gpus = 0.9 |
| 1090 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1091 | + await use_case.execute( |
| 1092 | + user=user, |
| 1093 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1094 | + request=request, |
| 1095 | + ) |
| 1096 | + |
| 1097 | + request = update_model_endpoint_request.copy() |
| 1098 | + request.model_bundle_id = model_bundle_6.id |
| 1099 | + # TritonEnhancedRunnableImageFlavor requires gpu_type be specified |
| 1100 | + request.gpu_type = None |
| 1101 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1102 | + await use_case.execute( |
| 1103 | + user=user, |
| 1104 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1105 | + request=request, |
| 1106 | + ) |
| 1107 | + |
| 1108 | + request = update_model_endpoint_request.copy() |
| 1109 | + request.model_bundle_id = model_bundle_6.id |
| 1110 | + # Test that request.cpus + FORWARDER_CPU_USAGE + triton_num_cpu > instance_limits["cpu"] should fail |
| 1111 | + request.cpus = instance_limits["cpus"] - FORWARDER_CPU_USAGE |
| 1112 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1113 | + await use_case.execute( |
| 1114 | + user=user, |
| 1115 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1116 | + request=request, |
| 1117 | + ) |
| 1118 | + |
| 1119 | + request = update_model_endpoint_request.copy() |
| 1120 | + request.model_bundle_id = model_bundle_6.id |
| 1121 | + # Test that request.memory + FORWARDER_MEMORY_USAGE + triton_memory > instance_limits["memory"] should fail |
| 1122 | + request.memory = parse_mem_request(instance_limits["memory"]) - parse_mem_request( |
| 1123 | + FORWARDER_MEMORY_USAGE |
| 1124 | + ) |
| 1125 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1126 | + await use_case.execute( |
| 1127 | + user=user, |
| 1128 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1129 | + request=request, |
| 1130 | + ) |
| 1131 | + |
| 1132 | + request = update_model_endpoint_request.copy() |
| 1133 | + request.model_bundle_id = model_bundle_6.id |
| 1134 | + # Test that request.storage + FORWARDER_STORAGE_USAGE + triton_storage > STORAGE_LIMIT should fail |
| 1135 | + request.storage = parse_mem_request(STORAGE_LIMIT) - parse_mem_request(FORWARDER_STORAGE_USAGE) |
| 1136 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1137 | + await use_case.execute( |
| 1138 | + user=user, |
| 1139 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1140 | + request=request, |
| 1141 | + ) |
| 1142 | + |
| 1143 | + request = update_model_endpoint_request.copy() |
| 1144 | + # Test triton_num_cpu >= 1 |
| 1145 | + request.model_bundle_id = ( |
| 1146 | + model_bundle_triton_enhanced_runnable_image_0_cpu_None_memory_storage.id |
| 1147 | + ) |
| 1148 | + with pytest.raises(EndpointResourceInvalidRequestException): |
| 1149 | + await use_case.execute( |
| 1150 | + user=user, |
| 1151 | + model_endpoint_id=model_endpoint_1.record.id, |
| 1152 | + request=request, |
| 1153 | + ) |
| 1154 | + |
| 1155 | + |
858 | 1156 | @pytest.mark.asyncio
|
859 | 1157 | async def test_update_model_endpoint_raises_not_found(
|
860 | 1158 | fake_model_bundle_repository,
|
|
0 commit comments