|
40 | 40 | from ads.model.deployment.model_deployment_runtime import (
|
41 | 41 | ModelDeploymentCondaRuntime,
|
42 | 42 | ModelDeploymentContainerRuntime,
|
| 43 | + ModelDeploymentMode, |
43 | 44 | ModelDeploymentRuntime,
|
44 | 45 | ModelDeploymentRuntimeType,
|
45 | 46 | OCIModelDeploymentRuntimeType,
|
@@ -80,11 +81,6 @@ class ModelDeploymentLogType:
|
80 | 81 | ACCESS = "access"
|
81 | 82 |
|
82 | 83 |
|
83 |
| -class ModelDeploymentMode: |
84 |
| - HTTPS = "HTTPS_ONLY" |
85 |
| - STREAM = "STREAM_ONLY" |
86 |
| - |
87 |
| - |
88 | 84 | class LogNotConfiguredError(Exception): # pragma: no cover
|
89 | 85 | pass
|
90 | 86 |
|
@@ -911,48 +907,59 @@ def predict(
|
911 | 907 | "`data` and `json_input` are both provided. You can only use one of them."
|
912 | 908 | )
|
913 | 909 |
|
914 |
| - if auto_serialize_data: |
915 |
| - data = data or json_input |
916 |
| - serialized_data = serializer.serialize(data=data) |
917 |
| - return send_request( |
918 |
| - data=serialized_data, |
919 |
| - endpoint=endpoint, |
920 |
| - is_json_payload=_is_json_serializable(serialized_data), |
921 |
| - header=header, |
922 |
| - ) |
| 910 | + try: |
| 911 | + if auto_serialize_data: |
| 912 | + data = data or json_input |
| 913 | + serialized_data = serializer.serialize(data=data) |
| 914 | + return send_request( |
| 915 | + data=serialized_data, |
| 916 | + endpoint=endpoint, |
| 917 | + is_json_payload=_is_json_serializable(serialized_data), |
| 918 | + header=header, |
| 919 | + ) |
923 | 920 |
|
924 |
| - if json_input is not None: |
925 |
| - if not _is_json_serializable(json_input): |
926 |
| - raise ValueError( |
927 |
| - "`json_input` must be json serializable. " |
928 |
| - "Set `auto_serialize_data` to True, or serialize the provided input data first," |
929 |
| - "or using `data` to pass binary data." |
| 921 | + if json_input is not None: |
| 922 | + if not _is_json_serializable(json_input): |
| 923 | + raise ValueError( |
| 924 | + "`json_input` must be json serializable. " |
| 925 | + "Set `auto_serialize_data` to True, or serialize the provided input data first," |
| 926 | + "or using `data` to pass binary data." |
| 927 | + ) |
| 928 | + utils.get_logger().warning( |
| 929 | + "The `json_input` argument of `predict()` will be deprecated soon. " |
| 930 | + "Please use `data` argument. " |
930 | 931 | )
|
931 |
| - utils.get_logger().warning( |
932 |
| - "The `json_input` argument of `predict()` will be deprecated soon. " |
933 |
| - "Please use `data` argument. " |
934 |
| - ) |
935 |
| - data = json_input |
| 932 | + data = json_input |
936 | 933 |
|
937 |
| - is_json_payload = _is_json_serializable(data) |
938 |
| - if not isinstance(data, bytes) and not is_json_payload: |
939 |
| - raise TypeError( |
940 |
| - "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
941 |
| - ) |
942 |
| - if model_name and model_version: |
943 |
| - header["model-name"] = model_name |
944 |
| - header["model-version"] = model_version |
945 |
| - elif bool(model_version) ^ bool(model_name): |
946 |
| - raise ValueError( |
947 |
| - "`model_name` and `model_version` have to be provided together." |
| 934 | + is_json_payload = _is_json_serializable(data) |
| 935 | + if not isinstance(data, bytes) and not is_json_payload: |
| 936 | + raise TypeError( |
| 937 | + "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
| 938 | + ) |
| 939 | + if model_name and model_version: |
| 940 | + header["model-name"] = model_name |
| 941 | + header["model-version"] = model_version |
| 942 | + elif bool(model_version) ^ bool(model_name): |
| 943 | + raise ValueError( |
| 944 | + "`model_name` and `model_version` have to be provided together." |
| 945 | + ) |
| 946 | + prediction = send_request( |
| 947 | + data=data, |
| 948 | + endpoint=endpoint, |
| 949 | + is_json_payload=is_json_payload, |
| 950 | + header=header, |
948 | 951 | )
|
949 |
| - prediction = send_request( |
950 |
| - data=data, |
951 |
| - endpoint=endpoint, |
952 |
| - is_json_payload=is_json_payload, |
953 |
| - header=header, |
954 |
| - ) |
955 |
| - return prediction |
| 952 | + return prediction |
| 953 | + except oci.exceptions.ServiceError as ex: |
| 954 | + # When bandwidth exceeds the allocated value, TooManyRequests error (429) will be raised by oci backend. |
| 955 | + if ex.status == 429: |
| 956 | + bandwidth_mbps = self.infrastructure.bandwidth_mbps or MODEL_DEPLOYMENT_BANDWIDTH_MBPS |
| 957 | + utils.get_logger().warning( |
| 958 | + f"Load balancer bandwidth exceeds the allocated {bandwidth_mbps} Mbps." |
| 959 | + "To estimate the actual bandwidth, use formula: (payload size in KB) * (estimated requests per second) * 8 / 1024." |
| 960 | + "To resolve the issue, try sizing down the payload, slowing down the request rate or increasing the allocated bandwidth." |
| 961 | + ) |
| 962 | + raise |
956 | 963 |
|
957 | 964 | def activate(
|
958 | 965 | self,
|
|
0 commit comments