|
8 | 8 | import collections
|
9 | 9 | import copy
|
10 | 10 | import datetime
|
11 |
| -import sys |
12 | 11 | import oci
|
13 | 12 | import warnings
|
14 | 13 | import time
|
|
72 | 71 | MODEL_DEPLOYMENT_INSTANCE_COUNT = 1
|
73 | 72 | MODEL_DEPLOYMENT_BANDWIDTH_MBPS = 10
|
74 | 73 |
|
75 |
| -TIME_FRAME = 60 |
76 |
| -MAXIMUM_PAYLOAD_SIZE = 10 * 1024 * 1024 # bytes |
77 |
| - |
78 | 74 | MODEL_DEPLOYMENT_RUNTIMES = {
|
79 | 75 | ModelDeploymentRuntimeType.CONDA: ModelDeploymentCondaRuntime,
|
80 | 76 | ModelDeploymentRuntimeType.CONTAINER: ModelDeploymentContainerRuntime,
|
@@ -253,10 +249,6 @@ class ModelDeployment(Builder):
|
253 | 249 | CONST_TIME_CREATED: "time_created",
|
254 | 250 | }
|
255 | 251 |
|
256 |
| - count_start_time = 0 |
257 |
| - request_counter = 0 |
258 |
| - estimate_request_per_second = 100 |
259 |
| - |
260 | 252 | initialize_spec_attributes = [
|
261 | 253 | "display_name",
|
262 | 254 | "description",
|
@@ -915,51 +907,60 @@ def predict(
|
915 | 907 | raise AttributeError(
|
916 | 908 | "`data` and `json_input` are both provided. You can only use one of them."
|
917 | 909 | )
|
918 |
| - |
919 |
| - if auto_serialize_data: |
920 |
| - data = data or json_input |
921 |
| - serialized_data = serializer.serialize(data=data) |
922 |
| - self._validate_bandwidth(serialized_data) |
923 |
| - return send_request( |
924 |
| - data=serialized_data, |
925 |
| - endpoint=endpoint, |
926 |
| - is_json_payload=_is_json_serializable(serialized_data), |
927 |
| - header=header, |
928 |
| - ) |
929 | 910 |
|
930 |
| - if json_input is not None: |
931 |
| - if not _is_json_serializable(json_input): |
932 |
| - raise ValueError( |
933 |
| - "`json_input` must be json serializable. " |
934 |
| - "Set `auto_serialize_data` to True, or serialize the provided input data first," |
935 |
| - "or using `data` to pass binary data." |
| 911 | + try: |
| 912 | + if auto_serialize_data: |
| 913 | + data = data or json_input |
| 914 | + serialized_data = serializer.serialize(data=data) |
| 915 | + return send_request( |
| 916 | + data=serialized_data, |
| 917 | + endpoint=endpoint, |
| 918 | + is_json_payload=_is_json_serializable(serialized_data), |
| 919 | + header=header, |
936 | 920 | )
|
937 |
| - utils.get_logger().warning( |
938 |
| - "The `json_input` argument of `predict()` will be deprecated soon. " |
939 |
| - "Please use `data` argument. " |
940 |
| - ) |
941 |
| - data = json_input |
942 | 921 |
|
943 |
| - is_json_payload = _is_json_serializable(data) |
944 |
| - if not isinstance(data, bytes) and not is_json_payload: |
945 |
| - raise TypeError( |
946 |
| - "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
947 |
| - ) |
948 |
| - if model_name and model_version: |
949 |
| - header["model-name"] = model_name |
950 |
| - header["model-version"] = model_version |
951 |
| - elif bool(model_version) ^ bool(model_name): |
952 |
| - raise ValueError( |
953 |
| - "`model_name` and `model_version` have to be provided together." |
| 922 | + if json_input is not None: |
| 923 | + if not _is_json_serializable(json_input): |
| 924 | + raise ValueError( |
| 925 | + "`json_input` must be json serializable. " |
| 926 | + "Set `auto_serialize_data` to True, or serialize the provided input data first," |
| 927 | + "or using `data` to pass binary data." |
| 928 | + ) |
| 929 | + utils.get_logger().warning( |
| 930 | + "The `json_input` argument of `predict()` will be deprecated soon. " |
| 931 | + "Please use `data` argument. " |
| 932 | + ) |
| 933 | + data = json_input |
| 934 | + |
| 935 | + is_json_payload = _is_json_serializable(data) |
| 936 | + if not isinstance(data, bytes) and not is_json_payload: |
| 937 | + raise TypeError( |
| 938 | + "`data` is not bytes or json serializable. Set `auto_serialize_data` to `True` to serialize the input data." |
| 939 | + ) |
| 940 | + if model_name and model_version: |
| 941 | + header["model-name"] = model_name |
| 942 | + header["model-version"] = model_version |
| 943 | + elif bool(model_version) ^ bool(model_name): |
| 944 | + raise ValueError( |
| 945 | + "`model_name` and `model_version` have to be provided together." |
| 946 | + ) |
| 947 | + prediction = send_request( |
| 948 | + data=data, |
| 949 | + endpoint=endpoint, |
| 950 | + is_json_payload=is_json_payload, |
| 951 | + header=header, |
954 | 952 | )
|
955 |
| - self._validate_bandwidth(data) |
956 |
| - prediction = send_request( |
957 |
| - data=data, |
958 |
| - endpoint=endpoint, |
959 |
| - is_json_payload=is_json_payload, |
960 |
| - header=header, |
961 |
| - ) |
962 |
| - return prediction |
| 953 | + return prediction |
| 954 | + except oci.exceptions.ServiceError as ex: |
| 955 | + # When bandwidth exceeds the allocated value, TooManyRequests error (429) will be raised by oci backend. |
| 956 | + if ex.status == 429: |
| 957 | + bandwidth_mbps = self.infrastructure.bandwidth_mbps or MODEL_DEPLOYMENT_BANDWIDTH_MBPS |
| 958 | + utils.get_logger().warning( |
| 959 | + f"Load balancer bandwidth exceeds the allocated {bandwidth_mbps} Mbps." |
| 960 | + "To estimate the actual bandwidth, use formula: (payload size in KB) * (estimated requests per second) * 8 / 1024." |
| 961 | + "To resolve the issue, try sizing down the payload, slowing down the request rate or increasing the allocated bandwidth." |
| 962 | + ) |
| 963 | + raise |
963 | 964 |
|
964 | 965 | def activate(
|
965 | 966 | self,
|
@@ -1800,45 +1801,6 @@ def _extract_spec_kwargs(self, **kwargs) -> Dict:
|
1800 | 1801 | if attribute in kwargs:
|
1801 | 1802 | spec_kwargs[attribute] = kwargs[attribute]
|
1802 | 1803 | return spec_kwargs
|
1803 |
| - |
1804 |
| - def _validate_bandwidth(self, data: Any): |
1805 |
| - """Validates payload size and load balancer bandwidth. |
1806 |
| -
|
1807 |
| - Parameters |
1808 |
| - ---------- |
1809 |
| - data: Any |
1810 |
| - Data or JSON payload for the prediction. |
1811 |
| - """ |
1812 |
| - payload_size = sys.getsizeof(data) |
1813 |
| - if payload_size > MAXIMUM_PAYLOAD_SIZE: |
1814 |
| - raise ValueError( |
1815 |
| - f"Payload size exceeds the maximum allowed {MAXIMUM_PAYLOAD_SIZE} bytes. Size down the payload." |
1816 |
| - ) |
1817 |
| - |
1818 |
| - time_now = int(time.time()) |
1819 |
| - if self.count_start_time == 0: |
1820 |
| - self.count_start_time = time_now |
1821 |
| - if time_now - self.count_start_time < TIME_FRAME: |
1822 |
| - self.request_counter += 1 |
1823 |
| - else: |
1824 |
| - self.estimate_request_per_second = (int)(self.request_counter / TIME_FRAME) |
1825 |
| - self.request_counter = 0 |
1826 |
| - self.count_start_time = 0 |
1827 |
| - |
1828 |
| - if not self.infrastructure or not self.runtime: |
1829 |
| - raise ValueError("Missing parameter infrastructure or runtime. Try reruning it after parameters are fully configured.") |
1830 |
| - |
1831 |
| - # load balancer bandwidth is only needed for HTTPS mode. |
1832 |
| - if self.runtime.deployment_mode == ModelDeploymentMode.HTTPS: |
1833 |
| - bandwidth_mbps = self.infrastructure.bandwidth_mbps or MODEL_DEPLOYMENT_BANDWIDTH_MBPS |
1834 |
| - # formula: (payload size in KB) * (estimated requests per second) * 8 / 1024 |
1835 |
| - # 20% extra for estimation errors and sporadic peak traffic |
1836 |
| - payload_size_in_kb = payload_size / 1024 |
1837 |
| - if (payload_size_in_kb * self.estimate_request_per_second * 8 * 1.2) / 1024 > bandwidth_mbps: |
1838 |
| - raise ValueError( |
1839 |
| - f"Load balancer bandwidth exceeds the allocated {bandwidth_mbps} Mbps." |
1840 |
| - "Try sizing down the payload, slowing down the request rate or increasing bandwidth." |
1841 |
| - ) |
1842 | 1804 |
|
1843 | 1805 | def build(self) -> "ModelDeployment":
|
1844 | 1806 | """Load default values from the environment for the job infrastructure."""
|
|
0 commit comments