|
8 | 8 | import collections
|
9 | 9 | import copy
|
10 | 10 | import datetime
|
| 11 | +import sys |
11 | 12 | import oci
|
12 | 13 | import warnings
|
13 | 14 | import time
|
|
70 | 71 | MODEL_DEPLOYMENT_INSTANCE_COUNT = 1
|
71 | 72 | MODEL_DEPLOYMENT_BANDWIDTH_MBPS = 10
|
72 | 73 |
|
| 74 | +TIME_FRAME = 60 |
| 75 | +MAXIMUM_PAYLOAD_SIZE = 10 * 1024 * 1024 # bytes |
| 76 | + |
73 | 77 | MODEL_DEPLOYMENT_RUNTIMES = {
|
74 | 78 | ModelDeploymentRuntimeType.CONDA: ModelDeploymentCondaRuntime,
|
75 | 79 | ModelDeploymentRuntimeType.CONTAINER: ModelDeploymentContainerRuntime,
|
@@ -253,6 +257,10 @@ class ModelDeployment(Builder):
|
253 | 257 | CONST_TIME_CREATED: "time_created",
|
254 | 258 | }
|
255 | 259 |
|
| 260 | + count_start_time = 0 |
| 261 | + request_counter = 0 |
| 262 | + estimate_request_per_second = 100 |
| 263 | + |
256 | 264 | initialize_spec_attributes = [
|
257 | 265 | "display_name",
|
258 | 266 | "description",
|
@@ -911,6 +919,8 @@ def predict(
|
911 | 919 | raise AttributeError(
|
912 | 920 | "`data` and `json_input` are both provided. You can only use one of them."
|
913 | 921 | )
|
| 922 | + |
| 923 | + self._validate_bandwidth(data or json_input) |
914 | 924 |
|
915 | 925 | if auto_serialize_data:
|
916 | 926 | data = data or json_input
|
@@ -1794,6 +1804,45 @@ def _extract_spec_kwargs(self, **kwargs) -> Dict:
|
1794 | 1804 | if attribute in kwargs:
|
1795 | 1805 | spec_kwargs[attribute] = kwargs[attribute]
|
1796 | 1806 | return spec_kwargs
|
| 1807 | + |
| 1808 | + def _validate_bandwidth(self, data: Any): |
| 1809 | + """Validates payload size and load balancer bandwidth. |
| 1810 | +
|
| 1811 | + Parameters |
| 1812 | + ---------- |
| 1813 | + data: Any |
| 1814 | + Data or JSON payload for the prediction. |
| 1815 | + """ |
| 1816 | + payload_size = sys.getsizeof(data) |
| 1817 | + if payload_size > MAXIMUM_PAYLOAD_SIZE: |
| 1818 | + raise ValueError( |
| 1819 | + f"Payload size exceeds the maximum allowed {MAXIMUM_PAYLOAD_SIZE} bytes. Size down the payload." |
| 1820 | + ) |
| 1821 | + |
| 1822 | + time_now = int(time.time()) |
| 1823 | + if self.count_start_time == 0: |
| 1824 | + self.count_start_time = time_now |
| 1825 | + if time_now - self.count_start_time < TIME_FRAME: |
| 1826 | + self.request_counter += 1 |
| 1827 | + else: |
| 1828 | + self.estimate_request_per_second = (int)(self.request_counter / TIME_FRAME) |
| 1829 | + self.request_counter = 0 |
| 1830 | + self.count_start_time = 0 |
| 1831 | + |
| 1832 | + if not self.infrastructure or not self.runtime: |
| 1833 | + raise ValueError("Missing parameter infrastructure or runtime. Try reruning it after parameters are fully configured.") |
| 1834 | + |
| 1835 | + # load balancer bandwidth is only needed for HTTPS mode. |
| 1836 | + if self.runtime.deployment_mode == ModelDeploymentMode.HTTPS: |
| 1837 | + bandwidth_mbps = self.infrastructure.bandwidth_mbps or MODEL_DEPLOYMENT_BANDWIDTH_MBPS |
| 1838 | + # formula: (payload size in KB) * (estimated requests per second) * 8 / 1024 |
| 1839 | + # 20% extra for estimation errors and sporadic peak traffic |
| 1840 | + payload_size_in_kb = payload_size / 1024 |
| 1841 | + if (payload_size_in_kb * self.estimate_request_per_second * 8 * 1.2) / 1024 > bandwidth_mbps: |
| 1842 | + raise ValueError( |
| 1843 | + f"Load balancer bandwidth exceeds the allocated {bandwidth_mbps} Mbps." |
| 1844 | + "Try sizing down the payload, slowing down the request rate or increasing bandwidth." |
| 1845 | + ) |
1797 | 1846 |
|
1798 | 1847 | def build(self) -> "ModelDeployment":
|
1799 | 1848 | """Load default values from the environment for the job infrastructure."""
|
|
0 commit comments