Skip to content

Commit 52ece37

Browse files
committed
recommender v1 added
1 parent 79ab368 commit 52ece37

File tree

13 files changed

+723
-0
lines changed

13 files changed

+723
-0
lines changed

ads/opctl/operator/lowcode/recommender/README.md

Whitespace-only changes.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2024 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import json
8+
import os
9+
import sys
10+
from typing import Dict, List
11+
12+
import yaml
13+
14+
from ads.opctl import logger
15+
from ads.opctl.operator.common.const import ENV_OPERATOR_ARGS
16+
from ads.opctl.operator.common.utils import _parse_input_args
17+
18+
from .model.recommender_dataset import RecommenderDatasets
19+
from .operator_config import RecommenderOperatorConfig
20+
from .model.factory import RecommenderOperatorModelFactory
21+
22+
def operate(operator_config: RecommenderOperatorConfig) -> None:
23+
"""Runs the recommender operator."""
24+
25+
datasets = RecommenderDatasets(operator_config)
26+
RecommenderOperatorModelFactory.get_model(
27+
operator_config, datasets
28+
).generate_report()
29+
30+
31+
def verify(spec: Dict, **kwargs: Dict) -> bool:
32+
"""Verifies the recommender detection operator config."""
33+
operator = RecommenderOperatorConfig.from_dict(spec)
34+
msg_header = (
35+
f"{'*' * 50} The operator config has been successfully verified {'*' * 50}"
36+
)
37+
print(msg_header)
38+
print(operator.to_yaml())
39+
print("*" * len(msg_header))
40+
41+
42+
def main(raw_args: List[str]):
43+
"""The entry point of the recommender the operator."""
44+
args, _ = _parse_input_args(raw_args)
45+
if not args.file and not args.spec and not os.environ.get(ENV_OPERATOR_ARGS):
46+
logger.info(
47+
"Please specify -f[--file] or -s[--spec] or "
48+
f"pass operator's arguments via {ENV_OPERATOR_ARGS} environment variable."
49+
)
50+
return
51+
52+
logger.info("-" * 100)
53+
logger.info(
54+
f"{'Running' if not args.verify else 'Verifying'} the recommender detection operator."
55+
)
56+
57+
yaml_string = ""
58+
if args.spec or os.environ.get(ENV_OPERATOR_ARGS):
59+
operator_spec_str = args.spec or os.environ.get(ENV_OPERATOR_ARGS)
60+
try:
61+
yaml_string = yaml.safe_dump(json.loads(operator_spec_str))
62+
except json.JSONDecodeError:
63+
yaml_string = yaml.safe_dump(yaml.safe_load(operator_spec_str))
64+
except:
65+
yaml_string = operator_spec_str
66+
67+
operator_config = RecommenderOperatorConfig.from_yaml(
68+
uri=args.file,
69+
yaml_string=yaml_string,
70+
)
71+
72+
logger.info(operator_config.to_yaml())
73+
74+
# run operator
75+
if args.verify:
76+
verify(operator_config)
77+
else:
78+
operate(operator_config)
79+
80+
81+
if __name__ == "__main__":
82+
main(sys.argv[1:])
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
from typing import Dict
8+
9+
from ads.opctl.operator.common.operator_yaml_generator import YamlGenerator
10+
from ads.opctl.operator.common.utils import _load_yaml_from_uri
11+
12+
13+
def init(**kwargs: Dict) -> str:
14+
"""
15+
Generates operator config by the schema.
16+
17+
Properties
18+
----------
19+
kwargs: (Dict, optional).
20+
Additional key value arguments.
21+
22+
- type: str
23+
The type of the operator.
24+
25+
Returns
26+
-------
27+
str
28+
The YAML specification generated based on the schema.
29+
"""
30+
31+
default_detector = [{"name": "<type>.<entity>", "action": "mask"}]
32+
33+
return YamlGenerator(
34+
schema=_load_yaml_from_uri(__file__.replace("cmd.py", "schema.yaml"))
35+
).generate_example_dict(
36+
values={"type": kwargs.get("type"), "detectors": default_detector}
37+
)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
from ads.common.extended_enum import ExtendedEnumMeta
8+
9+
DEFAULT_SHOW_ROWS = 25
10+
DEFAULT_REPORT_FILENAME = "report.html"
11+
12+
class OutputColumns(str, metaclass=ExtendedEnumMeta):
13+
"""output columns for recommender operator"""
14+
USER_COL = "user"
15+
ITEM_COL = "item"
16+
SCORE = "score"
17+
18+
class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
19+
"""Supported forecast metrics."""
20+
RMSE = "RMSE"
21+
22+
class SupportedModels(str, metaclass=ExtendedEnumMeta):
23+
"""Supported recommender models."""
24+
SVD = "svd"
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
name: pii
2+
channels:
3+
- conda-forge
4+
dependencies:
5+
- python=3.9
6+
- pip
7+
- pip:
8+
- report-creator
9+
- oracle_ads[opctl]
10+
- plotly
11+
- scikit-surprise
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import os
8+
import time
9+
from abc import ABC, abstractmethod
10+
11+
import pandas as pd
12+
13+
from ads.common.object_storage_details import ObjectStorageDetails
14+
from ads.opctl import logger
15+
from ads.opctl.operator.lowcode.common.utils import default_signer
16+
from ads.opctl.operator.lowcode.common.utils import (
17+
write_data,
18+
)
19+
from .recommender_dataset import RecommenderDatasets
20+
from ..operator_config import RecommenderOperatorConfig
21+
22+
23+
class RecommenderOperatorBaseModel(ABC):
24+
"""The base class for the recommender detection operator models."""
25+
26+
def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets):
27+
self.spec = config.spec
28+
self.datasets = datasets
29+
30+
def generate_report(self):
31+
start_time = time.time()
32+
result_df = self._build_model()
33+
elapsed_time = time.time() - start_time
34+
logger.info("Building the models completed in %s seconds", elapsed_time)
35+
# save the report and result CSV
36+
self._save_report(
37+
result_df=result_df
38+
)
39+
40+
def _save_report(self, result_df):
41+
"""Saves resulting reports to the given folder."""
42+
43+
unique_output_dir = self.spec.output_directory.url
44+
45+
if ObjectStorageDetails.is_oci_path(unique_output_dir):
46+
storage_options = default_signer()
47+
else:
48+
storage_options = dict()
49+
50+
# forecast csv report
51+
write_data(
52+
data=result_df,
53+
filename=os.path.join(unique_output_dir, self.spec.recommendations_filename),
54+
format="csv",
55+
storage_options=storage_options,
56+
)
57+
58+
logger.info(
59+
f"The outputs have been successfully "
60+
f"generated and placed into the directory: {unique_output_dir}."
61+
)
62+
63+
@abstractmethod
64+
def _generate_report(self):
65+
"""
66+
Generates the report for the particular model.
67+
The method that needs to be implemented on the particular model level.
68+
"""
69+
70+
@abstractmethod
71+
def _build_model(self) -> pd.DataFrame:
72+
"""
73+
Build the model.
74+
The method that needs to be implemented on the particular model level.
75+
"""
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
from ..constant import SupportedModels
8+
from ..operator_config import RecommenderOperatorConfig
9+
from .base_model import RecommenderOperatorBaseModel
10+
from .recommender_dataset import RecommenderDatasets
11+
from .svd import SVDOperatorModel
12+
13+
class UnSupportedModelError(Exception):
14+
def __init__(self, model_type: str):
15+
super().__init__(
16+
f"Model: `{model_type}` "
17+
f"is not supported. Supported models: {SupportedModels.values}"
18+
)
19+
20+
21+
class RecommenderOperatorModelFactory:
22+
"""
23+
The factory class helps to instantiate proper model operator based on the model type.
24+
"""
25+
26+
_MAP = {
27+
SupportedModels.SVD: SVDOperatorModel
28+
}
29+
30+
@classmethod
31+
def get_model(
32+
cls, operator_config: RecommenderOperatorConfig, datasets: RecommenderDatasets
33+
) -> RecommenderOperatorBaseModel:
34+
"""
35+
Gets the operator model based on the model type.
36+
37+
Parameters
38+
----------
39+
operator_config: RecommenderOperatorConfig
40+
The recommender detection operator config.
41+
42+
datasets: RecommenderDatasets
43+
Datasets for finding recommender
44+
45+
Returns
46+
-------
47+
RecommenderOperatorBaseModel
48+
The recommender detection operator model.
49+
50+
Raises
51+
------
52+
UnSupportedModelError
53+
In case of not supported model.
54+
"""
55+
model_type = SupportedModels.SVD
56+
if model_type not in cls._MAP:
57+
raise UnSupportedModelError(model_type)
58+
return cls._MAP[model_type](config=operator_config, datasets=datasets)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import pandas as pd
8+
9+
from ..operator_config import RecommenderOperatorConfig
10+
11+
12+
class RecommenderDatasets:
13+
def __init__(self, config: RecommenderOperatorConfig):
14+
"""Instantiates the DataIO instance.
15+
16+
Properties
17+
----------
18+
spec: RecommenderOperatorSpec
19+
The recommender operator spec.
20+
"""
21+
spec = config.spec
22+
self.interactions: pd.DataFrame = pd.read_csv(spec.interactions_data.url)
23+
self.users: pd.DataFrame = pd.read_csv(spec.user_data.url)
24+
self.items: pd.DataFrame = pd.read_csv(spec.item_data.url)
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*--
3+
4+
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
5+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6+
7+
import pandas as pd
8+
from .recommender_dataset import RecommenderDatasets
9+
from ..operator_config import RecommenderOperatorConfig
10+
from .factory import RecommenderOperatorBaseModel
11+
from surprise import Dataset, Reader
12+
from surprise.model_selection import train_test_split
13+
from surprise import SVD
14+
from surprise import accuracy
15+
16+
17+
class SVDOperatorModel(RecommenderOperatorBaseModel):
18+
"""Class representing scikit surprise SVD operator model."""
19+
20+
def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets):
21+
super().__init__(config, datasets)
22+
self.interactions = datasets.interactions
23+
self.users = datasets.users
24+
self.items = datasets.items
25+
self.user_id = config.spec.user_column_name
26+
self.item_id = config.spec.item_column_name
27+
self.rating_col = config.spec.ratings_column_name
28+
self.test_size = 0.2
29+
30+
def _get_recommendations(self, user_id, algo, items, n=10):
31+
all_item_ids = items[self.item_id].unique()
32+
rated_items = self.interactions[self.interactions[self.user_id] == user_id][self.item_id]
33+
unrated_items = [item_id for item_id in all_item_ids if item_id not in rated_items.values]
34+
predictions = [algo.predict(user_id, item_id) for item_id in unrated_items]
35+
predictions.sort(key=lambda x: x.est, reverse=True)
36+
top_n_recommendations = predictions[:n]
37+
return [(pred.iid, pred.est) for pred in top_n_recommendations]
38+
39+
def _build_model(self) -> pd.DataFrame:
40+
min_rating = self.interactions[self.rating_col].min()
41+
max_rating = self.interactions[self.rating_col].max()
42+
reader = Reader(rating_scale=(min_rating, max_rating))
43+
data = Dataset.load_from_df(self.interactions[[self.user_id, self.item_id, self.rating_col]], reader)
44+
trainset, testset = train_test_split(data, test_size=self.test_size)
45+
algo = SVD()
46+
algo.fit(trainset)
47+
predictions = algo.test(testset)
48+
accuracy.rmse(predictions)
49+
all_recommendations = []
50+
for user_id in self.users[self.user_id]:
51+
recommendations = self._get_recommendations(user_id, algo, self.items, n=self.spec.top_k)
52+
for item_id, est_rating in recommendations:
53+
all_recommendations.append({
54+
self.user_id: user_id,
55+
self.item_id: item_id,
56+
self.rating_col: est_rating
57+
})
58+
recommendations_df = pd.DataFrame(all_recommendations)
59+
return recommendations_df

0 commit comments

Comments
 (0)