Skip to content

Commit c20b4d8

Browse files
authored
ODSC-50530: Pack recommended infra config into operator yaml (#467)
2 parents 22d8de0 + fb96b39 commit c20b4d8

File tree

6 files changed

+180
-15
lines changed

6 files changed

+180
-15
lines changed

ads/opctl/operator/cmd.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -208,15 +208,16 @@ def init(
208208

209209
with fsspec.open(
210210
os.path.join(
211-
output, f"{operator_info.type}_{'_'.join(key).replace('.','_')}.yaml"
211+
output,
212+
f"{operator_info.type}_{'_'.join(key).replace('.','_')}_backend.yaml",
212213
),
213214
mode="w",
214215
) as f:
215216
f.write(yaml.dump(tmp_config))
216217

217-
logger.info("#" * 100)
218+
logger.info("#" * 50)
218219
logger.info(f"The auto-generated configs have been placed in: {output}")
219-
logger.info("#" * 100)
220+
logger.info("#" * 50)
220221

221222

222223
@runtime_dependency(module="docker", install_from=OptionalDependency.OPCTL)

ads/opctl/operator/common/backend_factory.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,16 @@ def _init_backend_config(
419419

420420
# get config info from ini files
421421
p = ConfigProcessor(
422-
{**runtime_kwargs, **{"execution": {"backend": resource_type}}}
422+
{
423+
**runtime_kwargs,
424+
**{"execution": {"backend": resource_type}},
425+
**{
426+
"infrastructure": {
427+
**operator_info.jobs_default_params.to_dict(),
428+
**operator_info.dataflow_default_params.to_dict(),
429+
}
430+
},
431+
}
423432
).step(
424433
ConfigMerger,
425434
ads_config=ads_config or DEFAULT_ADS_CONFIG_FOLDER,

ads/opctl/operator/common/operator_loader.py

Lines changed: 84 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
import sys
1414
import tempfile
1515
from abc import ABC, abstractmethod
16-
from dataclasses import dataclass
16+
from dataclasses import dataclass, field
1717
from typing import Any, Dict, List
1818
from urllib.parse import urlparse
1919

@@ -33,6 +33,74 @@
3333
LOCAL_SCHEME = "local"
3434
MAIN_BRANCH = "main"
3535

36+
DEFAULT_SHAPE = "VM.Standard.E4.Flex"
37+
DEFAULT_OCPUS = 32
38+
DEFAULT_MEMORY_IN_GBS = 512
39+
DEFAULT_BLOCK_STORAGE_SIZE_IN_GBS = 512
40+
DEFAULT_SPARK_VERSION = "3.2.1"
41+
DEFAULT_NUM_OF_EXECUTORS = 1
42+
43+
44+
@dataclass(repr=True)
45+
class JobsDefaultParams(DataClassSerializable):
46+
"""Class representing the default params for the DataScience Job.
47+
48+
Attributes
49+
----------
50+
shape_name (str)
51+
The name of the shape.
52+
ocpus (int)
53+
The OCPUs count.
54+
memory_in_gbs (int)
55+
The size of the memory in GBs.
56+
block_storage_size_in_GBs (int)
57+
Size of the block storage drive.
58+
"""
59+
60+
shape_name: str = DEFAULT_SHAPE
61+
ocpus: int = DEFAULT_OCPUS
62+
memory_in_gbs: int = DEFAULT_MEMORY_IN_GBS
63+
block_storage_size_in_GBs: int = DEFAULT_BLOCK_STORAGE_SIZE_IN_GBS
64+
65+
@classmethod
66+
def from_dict(cls, *args, **kwargs: Dict) -> "JobsDefaultParams":
67+
return super().from_dict(*args, **{**kwargs, **{"side_effect": None}})
68+
69+
70+
@dataclass(repr=True)
71+
class DataFlowDefaultParams(DataClassSerializable):
72+
"""Class representing the default params for the Data Flow Application.
73+
74+
Attributes
75+
----------
76+
driver_shape (str)
77+
The name of the driver shape.
78+
driver_shape_ocpus (int)
79+
The OCPUs count for the driver shape.
80+
driver_shape_memory_in_gbs (int)
81+
The size of the memory in GBs for the driver shape.
82+
executor_shape (str)
83+
The name of the executor shape.
84+
executor_shape_ocpus (int)
85+
The OCPUs count for the executor shape.
86+
executor_shape_memory_in_gbs (int)
87+
The size of the memory in GBs for the executor shape.
88+
num_executors (int)
89+
The number of executors.
90+
spark_version (str)
91+
The version of the SPARK.
92+
"""
93+
94+
spark_version: str = DEFAULT_SPARK_VERSION
95+
driver_shape: str = DEFAULT_SHAPE
96+
driver_shape_ocpus: int = DEFAULT_OCPUS
97+
driver_shape_memory_in_gbs: int = DEFAULT_MEMORY_IN_GBS
98+
99+
num_executors: int = DEFAULT_NUM_OF_EXECUTORS
100+
executor_shape: str = DEFAULT_SHAPE
101+
executor_shape_ocpus: int = DEFAULT_OCPUS
102+
executor_shape_memory_in_gbs: int = DEFAULT_MEMORY_IN_GBS
103+
36104

37105
@dataclass(repr=True)
38106
class OperatorInfo(DataClassSerializable):
@@ -57,11 +125,17 @@ class OperatorInfo(DataClassSerializable):
57125
conda_type (str)
58126
The type of conda pack (e.g., PACK_TYPE.CUSTOM).
59127
path (str)
60-
The location of the operator.
128+
The physical location of the operator.
61129
keywords (List[str])
62130
Keywords associated with the operator.
63131
backends (List[str])
64132
List of supported backends.
133+
jobs_default_params (JobsDefaultParams)
134+
The default params for the Jobs service.
135+
Will be used when operator run on the Jobs service.
136+
dataflow_default_params (DataFlowDefaultParams)
137+
The default params for the DataFlow service.
138+
Will be used when operator run on the DataFlow service.
65139
66140
Properties
67141
----------
@@ -79,6 +153,10 @@ class OperatorInfo(DataClassSerializable):
79153
path: str = ""
80154
keywords: List[str] = None
81155
backends: List[str] = None
156+
jobs_default_params: JobsDefaultParams = field(default_factory=JobsDefaultParams)
157+
dataflow_default_params: DataFlowDefaultParams = field(
158+
default_factory=DataFlowDefaultParams
159+
)
82160

83161
@property
84162
def conda_prefix(self) -> str:
@@ -107,6 +185,10 @@ def __post_init__(self):
107185
self.version = self.version or "v1"
108186
self.conda_type = self.conda_type or PACK_TYPE.CUSTOM
109187
self.conda = self.conda or f"{self.type}_{self.version}"
188+
self.jobs_default_params = self.jobs_default_params or JobsDefaultParams()
189+
self.dataflow_default_params = (
190+
self.dataflow_default_params or DataFlowDefaultParams()
191+
)
110192

111193
@classmethod
112194
def from_yaml(

ads/opctl/operator/common/operator_schema.yaml

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,75 @@ conda:
5656
type: string
5757
meta:
5858
description: "The operator's conda environment name. Will be auto-generated if not provided."
59+
jobs_default_params:
60+
required: false
61+
type: dict
62+
meta:
63+
description: "The default parameters for the Data Science Job."
64+
schema:
65+
shape_name:
66+
required: false
67+
type: string
68+
meta:
69+
description: "The name of the shape."
70+
ocpus:
71+
required: false
72+
type: integer
73+
meta:
74+
description: "The OCPUs count."
75+
memory_in_gbs:
76+
required: false
77+
type: integer
78+
meta:
79+
description: "The size of the memory in GBs."
80+
block_storage_size_in_GBs:
81+
required: false
82+
type: integer
83+
meta:
84+
description: "Size of the block storage drive."
85+
dataflow_default_params:
86+
required: false
87+
type: dict
88+
meta:
89+
description: "The default params for the DataFlow service."
90+
schema:
91+
spark_version:
92+
required: false
93+
type: string
94+
meta:
95+
description: "The version of the SPARK."
96+
driver_shape:
97+
required: false
98+
type: string
99+
meta:
100+
description: "The name of the driver shape."
101+
driver_shape_ocpus:
102+
required: false
103+
type: integer
104+
meta:
105+
description: "The OCPUs count for the driver shape."
106+
driver_shape_memory_in_gbs:
107+
required: false
108+
type: integer
109+
meta:
110+
description: "The size of the memory in GBs for the driver shape."
111+
executor_shape:
112+
required: false
113+
type: string
114+
meta:
115+
description: "The name of the executor shape."
116+
executor_shape_ocpus:
117+
required: false
118+
type: integer
119+
meta:
120+
description: "The OCPUs count for the executor shape."
121+
executor_shape_memory_in_gbs:
122+
required: false
123+
type: integer
124+
meta:
125+
description: "The size of the memory in GBs for the executor shape."
126+
num_executors:
127+
required: false
128+
type: integer
129+
meta:
130+
description: "The number of executors."

ads/opctl/operator/lowcode/forecast/MLoperator

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,11 @@ name: Forecasting Operator
44
conda_type: published
55
conda: forecast_v1
66
gpu: no
7+
jobs_default_params:
8+
shape_name: VM.Standard.E4.Flex
9+
ocpus: 32
10+
memory_in_gbs: 512
11+
block_storage_size_in_GBs: 512
712
keywords:
813
- Prophet
914
- AutoML

ads/opctl/operator/lowcode/forecast/README.md

Lines changed: 5 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,7 @@ Below are the steps to configure and run the Forecasting Operator on different r
66

77
## 1. Prerequisites
88

9-
Follow the [CLI Configuration](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html) steps from the ADS documentation. This step is mandatory as it sets up default values for different options while running the Forecasting Operator on OCI Data Science jobs or OCI Data Flow applications. If you have previously done this and used a flexible shape, make sure to adjust `ml_job_config.ini` with shape config details and `docker_registry` information.
10-
11-
- ocpus = 1
12-
- memory_in_gbs = 16
13-
- docker_registry = `<iad.ocir.io/namespace/>`
9+
Follow the [CLI Configuration](https://accelerated-data-science.readthedocs.io/en/latest/user_guide/cli/opctl/configure.html) steps from the ADS documentation. This step is mandatory as it sets up default values for different options while running the Forecasting Operator on OCI Data Science Jobs.
1410

1511
## 2. Generating configs
1612

@@ -23,10 +19,10 @@ ads operator init -t forecast --overwrite --output ~/forecast/
2319
The most important files expected to be generated are:
2420

2521
- `forecast.yaml`: Contains forecast-related configuration.
26-
- `backend_operator_local_python_config.yaml`: This includes a local backend configuration for running forecasting in a local environment. The environment should be set up manually before running the operator.
27-
- `backend_operator_local_container_config.yaml`: This includes a local backend configuration for running forecasting within a local container. The container should be built before running the operator. Please refer to the instructions below for details on how to accomplish this.
28-
- `backend_job_container_config.yaml`: Contains Data Science job-related config to run forecasting in a Data Science job within a container (BYOC) runtime. The container should be built and published before running the operator. Please refer to the instructions below for details on how to accomplish this.
29-
- `backend_job_python_config.yaml`: Contains Data Science job-related config to run forecasting in a Data Science job within a conda runtime. The conda should be built and published before running the operator.
22+
- `forecast_operator_local_python_backend.yaml`: This includes a local backend configuration for running forecasting in a local environment. The environment should be set up manually before running the operator.
23+
- `forecast_operator_local_container_backend.yaml`: This includes a local backend configuration for running forecasting within a local container. The container should be built before running the operator. Please refer to the instructions below for details on how to accomplish this.
24+
- `forecast_job_container_backend.yaml`: Contains Data Science job-related config to run forecasting in a Data Science job within a container (BYOC) runtime. The container should be built and published before running the operator. Please refer to the instructions below for details on how to accomplish this.
25+
- `forecast_job_python_backend.yaml`: Contains Data Science job-related config to run forecasting in a Data Science job within a conda runtime. The conda should be built and published before running the operator.
3026

3127
All generated configurations should be ready to use without the need for any additional adjustments. However, they are provided as starter kit configurations that can be customized as needed.
3228

0 commit comments

Comments
 (0)