Skip to content

Commit 3e24cd2

Browse files
committed
ODSC-36246: adding more example
1 parent 32f80a1 commit 3e24cd2

File tree

2 files changed

+265
-19
lines changed

2 files changed

+265
-19
lines changed

docs/source/user_guide/model_registration/quick_start.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ Other Frameworks
296296
from ads.model.generic_model import GenericModel
297297
from catboost import CatBoostRegressor
298298
299-
300299
ads.set_auth(auth="resource_principal")
301300
302301
# Initialize data

docs/source/user_guide/model_serialization/genericmodel.rst

Lines changed: 265 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -139,26 +139,57 @@ By default, the ``GenericModel`` serializes to a pickle file. The following exam
139139
.. code-block:: python3
140140
141141
import tempfile
142-
from ads.catalog.model import ModelCatalog
142+
143+
import ads
143144
from ads.model.generic_model import GenericModel
145+
from catboost import CatBoostRegressor
144146
145-
class Toy:
146-
def predict(self, x):
147-
return x ** 2
148-
model = Toy()
149-
150-
generic_model = GenericModel(estimator=model, artifact_dir=tempfile.mkdtemp())
151-
generic_model.summary_status()
152-
generic_model.prepare(
153-
inference_conda_env="dataexpl_p37_cpu_v3",
154-
model_file_name="toy_model.pkl",
155-
force_overwrite=True
156-
)
157-
generic_model.verify(2)
158-
model_id = generic_model.save()
159-
generic_model.deploy()
160-
generic_model.predict(2)
161-
generic_model.delete_deployment(wait_for_completion=True)
147+
148+
ads.set_auth(auth="resource_principal")
149+
150+
# Initialize data
151+
152+
X_train = [[1, 4, 5, 6],
153+
[4, 5, 6, 7],
154+
[30, 40, 50, 60]]
155+
156+
X_test = [[2, 4, 6, 8],
157+
[1, 4, 50, 60]]
158+
159+
y_train = [10, 20, 30]
160+
161+
# Initialize CatBoostRegressor
162+
catboost_estimator = CatBoostRegressor(iterations=2,
163+
learning_rate=1,
164+
depth=2)
165+
# Train a CatBoostRegressor model
166+
catboost_estimator.fit(X_train, y_train)
167+
168+
# Get predictions
169+
preds = catboost_estimator.predict(X_test)
170+
171+
# Instantiate ads.model.generic_model.GenericModel using the trained Custom Model using the trained CatBoost Classifier model
172+
catboost_model = GenericModel(estimator=catboost_estimator,
173+
artifact_dir=tempfile.mkdtemp(),
174+
model_save_serializer="cloudpickle",
175+
model_input_serializer="json")
176+
177+
# Autogenerate score.py, pickled model, runtime.yaml, input_schema.json and output_schema.json
178+
catboost_model.prepare(
179+
inference_conda_env="oci://bucket@namespace/path/to/your/conda/pack",
180+
inference_python_version="your_python_version",
181+
X_sample=X_train,
182+
y_sample=y_train,
183+
)
184+
185+
# Verify generated artifacts
186+
catboost_model.verify(X_test, auto_serialize_data=True)
187+
188+
# Register CatBoostRegressor model
189+
model_id = catboost_model.save(display_name="CatBoost Model")
190+
catboost_model.deploy()
191+
catboost_model.predict(X_test)
192+
catboost_model.delete_deployment(wait_for_completion=True)
162193
ModelCatalog(compartment_id=os.environ['NB_SESSION_COMPARTMENT_OCID']).delete_model(model_id)
163194
164195
You can also use the shortcut ``.prepare_save_deploy()`` instead of calling ``.prepare()``, ``.save()`` and ``.deploy()`` seperately.
@@ -182,3 +213,219 @@ You can also use the shortcut ``.prepare_save_deploy()`` instead of calling ``.p
182213
model.predict(2)
183214
model.delete_deployment(wait_for_completion=True)
184215
ModelCatalog(compartment_id=os.environ['NB_SESSION_COMPARTMENT_OCID']).delete_model(model.model_id)
216+
217+
218+
Example -- Save Your Own Model
219+
==============================
220+
221+
By default, the ``serialize`` in ``GenericModel`` class is True, and it will serialize the model using cloudpickle. However, you can set ``serialize=False`` to disable it. And serialize the model on your own. You just need to copy the serialized model into the ``.artifact_dir``. This example shows step by step how you can do that.
222+
The example is illustrated using an AutoMLx model.
223+
224+
.. code-block:: python3
225+
226+
import automl
227+
import ads
228+
from automl import init
229+
from sklearn.datasets import fetch_openml
230+
from sklearn.model_selection import train_test_split
231+
from ads.model import GenericModel
232+
233+
dataset = fetch_openml(name='adult', as_frame=True)
234+
df, y = dataset.data, dataset.target
235+
236+
# Several of the columns are incorrectly labeled as category type in the original dataset
237+
numeric_columns = ['age', 'capitalgain', 'capitalloss', 'hoursperweek']
238+
for col in df.columns:
239+
if col in numeric_columns:
240+
df[col] = df[col].astype(int)
241+
242+
243+
X_train, X_test, y_train, y_test = train_test_split(df,
244+
y.map({'>50K': 1, '<=50K': 0}).astype(int),
245+
train_size=0.7,
246+
random_state=0)
247+
248+
X_train.shape, X_test.shape
249+
250+
# create a AutoMLx model
251+
init(engine='local')
252+
253+
est = automl.Pipeline(task='classification')
254+
est.fit(X_train, y_train)
255+
256+
# Authentication
257+
ads.set_auth(auth="resource_principal")
258+
259+
# Serialize your model. You can choose your own way to serialize your model.
260+
import cloudpickle
261+
with open("./model.pkl", "wb") as f:
262+
cloudpickle.dump(est, f)
263+
264+
model = GenericModel(est, artifact_dir = "model_artifact_folder", serialize=False)
265+
model.prepare(inference_conda_env="automlx_p38_cpu_v1",force_overwrite=True, model_file_name="model.pkl", X_sample=X_test)
266+
267+
Now copy the model.pkl file and paste into the ``model_artifact_folder`` folder. And open the score.py in the ``model_artifact_folder`` folder and add implement the ``load_model`` function. You can also edit ``pre_inference`` and ``post_inference`` function. Below is an example implementation of the score.py.
268+
Replace your score.py with the code below.
269+
270+
.. code-block:: python3
271+
:emphasize-lines: 28, 29, 30, 31, 122
272+
273+
# score.py 1.0 generated by ADS 2.8.2 on 20230301_065458
274+
import os
275+
import sys
276+
import json
277+
from functools import lru_cache
278+
279+
model_name = 'model.pkl'
280+
281+
282+
"""
283+
Inference script. This script is used for prediction by scoring server when schema is known.
284+
"""
285+
286+
@lru_cache(maxsize=10)
287+
def load_model(model_file_name=model_name):
288+
"""
289+
Loads model from the serialized format
290+
291+
Returns
292+
-------
293+
model: a model instance on which predict API can be invoked
294+
"""
295+
model_dir = os.path.dirname(os.path.realpath(__file__))
296+
if model_dir not in sys.path:
297+
sys.path.insert(0, model_dir)
298+
contents = os.listdir(model_dir)
299+
if model_file_name in contents:
300+
import cloudpickle
301+
with open(os.path.join(model_dir, model_name), "rb") as f:
302+
model = cloudpickle.load(f)
303+
return model
304+
else:
305+
raise Exception(f'{model_file_name} is not found in model directory {model_dir}')
306+
307+
@lru_cache(maxsize=1)
308+
def fetch_data_type_from_schema(input_schema_path=os.path.join(os.path.dirname(os.path.realpath(__file__)), "input_schema.json")):
309+
"""
310+
Returns data type information fetch from input_schema.json.
311+
312+
Parameters
313+
----------
314+
input_schema_path: path of input schema.
315+
316+
Returns
317+
-------
318+
data_type: data type fetch from input_schema.json.
319+
320+
"""
321+
data_type = {}
322+
if os.path.exists(input_schema_path):
323+
schema = json.load(open(input_schema_path))
324+
for col in schema['schema']:
325+
data_type[col['name']] = col['dtype']
326+
else:
327+
print("input_schema has to be passed in in order to recover the same data type. pass `X_sample` in `ads.model.framework.sklearn_model.SklearnModel.prepare` function to generate the input_schema. Otherwise, the data type might be changed after serialization/deserialization.")
328+
return data_type
329+
330+
def deserialize(data, input_schema_path):
331+
"""
332+
Deserialize json serialization data to data in original type when sent to predict.
333+
334+
Parameters
335+
----------
336+
data: serialized input data.
337+
input_schema_path: path of input schema.
338+
339+
Returns
340+
-------
341+
data: deserialized input data.
342+
343+
"""
344+
345+
import pandas as pd
346+
import numpy as np
347+
import base64
348+
from io import BytesIO
349+
if isinstance(data, bytes):
350+
return data
351+
352+
data_type = data.get('data_type', '') if isinstance(data, dict) else ''
353+
json_data = data.get('data', data) if isinstance(data, dict) else data
354+
355+
if "numpy.ndarray" in data_type:
356+
load_bytes = BytesIO(base64.b64decode(json_data.encode('utf-8')))
357+
return np.load(load_bytes, allow_pickle=True)
358+
if "pandas.core.series.Series" in data_type:
359+
return pd.Series(json_data)
360+
if "pandas.core.frame.DataFrame" in data_type or isinstance(json_data, str):
361+
return pd.read_json(json_data, dtype=fetch_data_type_from_schema(input_schema_path))
362+
if isinstance(json_data, dict):
363+
return pd.DataFrame.from_dict(json_data)
364+
return json_data
365+
366+
def pre_inference(data, input_schema_path):
367+
"""
368+
Preprocess data
369+
370+
Parameters
371+
----------
372+
data: Data format as expected by the predict API of the core estimator.
373+
input_schema_path: path of input schema.
374+
375+
Returns
376+
-------
377+
data: Data format after any processing.
378+
379+
"""
380+
return deserialize(data, input_schema_path)
381+
382+
def post_inference(yhat):
383+
"""
384+
Post-process the model results
385+
386+
Parameters
387+
----------
388+
yhat: Data format after calling model.predict.
389+
390+
Returns
391+
-------
392+
yhat: Data format after any processing.
393+
394+
"""
395+
return yhat.tolist()
396+
397+
def predict(data, model=load_model(), input_schema_path=os.path.join(os.path.dirname(os.path.realpath(__file__)), "input_schema.json")):
398+
"""
399+
Returns prediction given the model and data to predict
400+
401+
Parameters
402+
----------
403+
model: Model instance returned by load_model API.
404+
data: Data format as expected by the predict API of the core estimator. For eg. in case of sckit models it could be numpy array/List of list/Pandas DataFrame.
405+
input_schema_path: path of input schema.
406+
407+
Returns
408+
-------
409+
predictions: Output from scoring server
410+
Format: {'prediction': output from model.predict method}
411+
412+
"""
413+
features = pre_inference(data, input_schema_path)
414+
yhat = post_inference(
415+
model.predict(features)
416+
)
417+
return {'prediction': yhat}
418+
419+
Save the score.py and now call verify to check if it works locally.
420+
421+
.. code-block:: python3
422+
423+
model.verify(X_test.iloc[:2], auto_serialize_data=True)
424+
425+
After verify run successfully, you can save the model to model catalog, deploy and call predict to invoke the endpoint.
426+
427+
.. code-block:: python3
428+
429+
model_id = model.save(display_name='Demo AutoMLModel model')
430+
deploy = model.deploy(display_name='Demo AutoMLModel deployment')
431+
model.predict(X_test.iloc[:2].to_json())

0 commit comments

Comments
 (0)