Skip to content

Commit 68c0821

Browse files
committed
report added
1 parent 52ece37 commit 68c0821

File tree

5 files changed

+190
-29
lines changed

5 files changed

+190
-29
lines changed

ads/opctl/operator/lowcode/recommender/constant.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):
1818
class SupportedMetrics(str, metaclass=ExtendedEnumMeta):
1919
"""Supported forecast metrics."""
2020
RMSE = "RMSE"
21+
MAE = "MAE"
2122

2223
class SupportedModels(str, metaclass=ExtendedEnumMeta):
2324
"""Supported recommender models."""

ads/opctl/operator/lowcode/recommender/model/base_model.py

Lines changed: 128 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,39 +5,144 @@
55
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
66

77
import os
8+
import tempfile
89
import time
910
from abc import ABC, abstractmethod
11+
from typing import Tuple, Dict
1012

13+
import fsspec
1114
import pandas as pd
15+
import report_creator as rc
1216

1317
from ads.common.object_storage_details import ObjectStorageDetails
1418
from ads.opctl import logger
1519
from ads.opctl.operator.lowcode.common.utils import default_signer
1620
from ads.opctl.operator.lowcode.common.utils import (
21+
human_time_friendly,
22+
enable_print,
23+
disable_print,
1724
write_data,
1825
)
26+
from .factory import SupportedModels
1927
from .recommender_dataset import RecommenderDatasets
2028
from ..operator_config import RecommenderOperatorConfig
29+
from plotly import graph_objects as go
30+
import matplotlib.pyplot as plt
2131

2232

2333
class RecommenderOperatorBaseModel(ABC):
2434
"""The base class for the recommender detection operator models."""
2535

2636
def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatasets):
27-
self.spec = config.spec
37+
self.config = config
38+
self.spec = self.config.spec
2839
self.datasets = datasets
2940

3041
def generate_report(self):
42+
item_col = self.spec.item_column
43+
user_col = self.spec.user_column
44+
interaction_col = self.spec.interaction_column
3145
start_time = time.time()
32-
result_df = self._build_model()
46+
result_df, metrics = self._build_model()
3347
elapsed_time = time.time() - start_time
3448
logger.info("Building the models completed in %s seconds", elapsed_time)
49+
50+
if self.spec.generate_report:
51+
# build the report
52+
(
53+
model_description,
54+
other_sections,
55+
) = self._generate_report()
56+
57+
header_section = rc.Block(
58+
rc.Heading("Recommender Report", level=1),
59+
rc.Text(
60+
f"The recommendations was generated using {SupportedModels.SVD.upper()}. {model_description}"
61+
),
62+
rc.Group(
63+
rc.Metric(
64+
heading="Recommendations was generated in ",
65+
value=human_time_friendly(elapsed_time),
66+
),
67+
rc.Metric(
68+
heading="Num users",
69+
value=len(self.datasets.users),
70+
),
71+
rc.Metric(
72+
heading="Num items",
73+
value=len(self.datasets.items),
74+
)
75+
),
76+
)
77+
78+
summary = rc.Block(
79+
header_section,
80+
)
81+
# user and item distributions in interactions
82+
user_title = rc.Heading("User Statistics", level=2)
83+
user_rating_counts = self.datasets.interactions[user_col].value_counts()
84+
fig_user = go.Figure(data=[go.Histogram(x=user_rating_counts, nbinsx=100)])
85+
fig_user.update_layout(
86+
title=f'Distribution of the number of interactions by {user_col}',
87+
xaxis_title=f'Number of {interaction_col}',
88+
yaxis_title=f'Number of {user_col}',
89+
bargap=0.2
90+
)
91+
item_title = rc.Heading("Item Statistics", level=2)
92+
item_rating_counts = self.datasets.interactions[item_col].value_counts()
93+
fig_item = go.Figure(data=[go.Histogram(x=item_rating_counts, nbinsx=100)])
94+
fig_item.update_layout(
95+
title=f'Distribution of the number of interactions by {item_col}',
96+
xaxis_title=f'Number of {interaction_col}',
97+
yaxis_title=f'Number of {item_col}',
98+
bargap=0.2
99+
)
100+
result_heatmap_title = rc.Heading("Sample Recommendations", level=2)
101+
sample_items = result_df[item_col].head(100).index
102+
filtered_df = result_df[result_df[item_col].isin(sample_items)]
103+
data = filtered_df.pivot(index=user_col, columns=item_col, values=interaction_col)
104+
fig = go.Figure(data=go.Heatmap(
105+
z=data.values,
106+
x=data.columns,
107+
y=data.index,
108+
colorscale='Viridis'
109+
))
110+
fig.update_layout(
111+
title='Recommendation heatmap of User-Item Interactions (sample)',
112+
width=1500,
113+
height=800,
114+
xaxis_title=item_col,
115+
yaxis_title=user_col,
116+
coloraxis_colorbar=dict(title=interaction_col)
117+
)
118+
plots = [user_title, rc.Widget(fig_user),
119+
item_title, rc.Widget(fig_item),
120+
result_heatmap_title, rc.Widget(fig)]
121+
122+
test_metrics_sections = [rc.DataTable(pd.DataFrame(metrics, index=[0]))]
123+
yaml_appendix_title = rc.Heading("Reference: YAML File", level=2)
124+
yaml_appendix = rc.Yaml(self.config.to_dict())
125+
report_sections = (
126+
[summary]
127+
+ plots
128+
+ test_metrics_sections
129+
+ other_sections
130+
+ [yaml_appendix_title, yaml_appendix]
131+
)
132+
35133
# save the report and result CSV
36134
self._save_report(
135+
report_sections=report_sections,
37136
result_df=result_df
38137
)
39138

40-
def _save_report(self, result_df):
139+
def _evaluation_metrics(self):
140+
pass
141+
142+
def _test_data_evaluate_metrics(self):
143+
pass
144+
145+
def _save_report(self, report_sections: Tuple, result_df: pd.DataFrame):
41146
"""Saves resulting reports to the given folder."""
42147

43148
unique_output_dir = self.spec.output_directory.url
@@ -47,7 +152,25 @@ def _save_report(self, result_df):
47152
else:
48153
storage_options = dict()
49154

50-
# forecast csv report
155+
# report-creator html report
156+
if self.spec.generate_report:
157+
with tempfile.TemporaryDirectory() as temp_dir:
158+
report_local_path = os.path.join(temp_dir, "___report.html")
159+
disable_print()
160+
with rc.ReportCreator("My Report") as report:
161+
report.save(rc.Block(*report_sections), report_local_path)
162+
enable_print()
163+
164+
report_path = os.path.join(unique_output_dir, self.spec.report_filename)
165+
with open(report_local_path) as f1:
166+
with fsspec.open(
167+
report_path,
168+
"w",
169+
**storage_options,
170+
) as f2:
171+
f2.write(f1.read())
172+
173+
# recommender csv report
51174
write_data(
52175
data=result_df,
53176
filename=os.path.join(unique_output_dir, self.spec.recommendations_filename),
@@ -68,7 +191,7 @@ def _generate_report(self):
68191
"""
69192

70193
@abstractmethod
71-
def _build_model(self) -> pd.DataFrame:
194+
def _build_model(self) -> [pd.DataFrame, Dict]:
72195
"""
73196
Build the model.
74197
The method that needs to be implemented on the particular model level.
Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,22 @@
11
#!/usr/bin/env python
22
# -*- coding: utf-8 -*--
3+
from typing import Tuple, Dict, Any
34

45
# Copyright (c) 2023, 2024 Oracle and/or its affiliates.
56
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
67

78
import pandas as pd
9+
from pandas import DataFrame
10+
811
from .recommender_dataset import RecommenderDatasets
912
from ..operator_config import RecommenderOperatorConfig
1013
from .factory import RecommenderOperatorBaseModel
1114
from surprise import Dataset, Reader
1215
from surprise.model_selection import train_test_split
1316
from surprise import SVD
14-
from surprise import accuracy
17+
from surprise.accuracy import rmse, mae
18+
import report_creator as rc
19+
from ..constant import SupportedMetrics
1520

1621

1722
class SVDOperatorModel(RecommenderOperatorBaseModel):
@@ -22,38 +27,62 @@ def __init__(self, config: RecommenderOperatorConfig, datasets: RecommenderDatas
2227
self.interactions = datasets.interactions
2328
self.users = datasets.users
2429
self.items = datasets.items
25-
self.user_id = config.spec.user_column_name
26-
self.item_id = config.spec.item_column_name
27-
self.rating_col = config.spec.ratings_column_name
30+
self.user_id = config.spec.user_column
31+
self.item_id = config.spec.item_column
32+
self.interaction_column = config.spec.interaction_column
2833
self.test_size = 0.2
34+
self.algo = SVD()
2935

30-
def _get_recommendations(self, user_id, algo, items, n=10):
31-
all_item_ids = items[self.item_id].unique()
36+
def _get_recommendations(self, user_id, n):
37+
all_item_ids = self.items[self.item_id].unique()
3238
rated_items = self.interactions[self.interactions[self.user_id] == user_id][self.item_id]
3339
unrated_items = [item_id for item_id in all_item_ids if item_id not in rated_items.values]
34-
predictions = [algo.predict(user_id, item_id) for item_id in unrated_items]
40+
predictions = [self.algo.predict(user_id, item_id) for item_id in unrated_items]
3541
predictions.sort(key=lambda x: x.est, reverse=True)
3642
top_n_recommendations = predictions[:n]
3743
return [(pred.iid, pred.est) for pred in top_n_recommendations]
3844

39-
def _build_model(self) -> pd.DataFrame:
40-
min_rating = self.interactions[self.rating_col].min()
41-
max_rating = self.interactions[self.rating_col].max()
45+
def _build_model(self) -> Tuple[DataFrame, Dict]:
46+
min_rating = self.interactions[self.interaction_column].min()
47+
max_rating = self.interactions[self.interaction_column].max()
4248
reader = Reader(rating_scale=(min_rating, max_rating))
43-
data = Dataset.load_from_df(self.interactions[[self.user_id, self.item_id, self.rating_col]], reader)
49+
data = Dataset.load_from_df(self.interactions[[self.user_id, self.item_id, self.interaction_column]], reader)
4450
trainset, testset = train_test_split(data, test_size=self.test_size)
45-
algo = SVD()
46-
algo.fit(trainset)
47-
predictions = algo.test(testset)
48-
accuracy.rmse(predictions)
51+
self.algo.fit(trainset)
52+
predictions = self.algo.test(testset)
53+
54+
metric = {}
55+
metric[SupportedMetrics.RMSE] = rmse(predictions, verbose=True)
56+
metric[SupportedMetrics.MAE] = mae(predictions, verbose=True)
4957
all_recommendations = []
5058
for user_id in self.users[self.user_id]:
51-
recommendations = self._get_recommendations(user_id, algo, self.items, n=self.spec.top_k)
59+
recommendations = self._get_recommendations(user_id, n=self.spec.top_k)
5260
for item_id, est_rating in recommendations:
5361
all_recommendations.append({
5462
self.user_id: user_id,
5563
self.item_id: item_id,
56-
self.rating_col: est_rating
64+
self.interaction_column: est_rating
5765
})
5866
recommendations_df = pd.DataFrame(all_recommendations)
59-
return recommendations_df
67+
return recommendations_df, metric
68+
69+
def _generate_report(self):
70+
model_description = """
71+
Singular Value Decomposition (SVD) is a matrix factorization technique used in recommendation systems to
72+
decompose a user-item interaction matrix into three constituent matrices. These matrices capture the
73+
latent factors that explain the observed interactions.
74+
"""
75+
new_user_recommendations = self._get_recommendations("__new_user__", self.spec.top_k)
76+
new_recommendations = []
77+
for item_id, est_rating in new_user_recommendations:
78+
new_recommendations.append({
79+
self.user_id: "__new_user__",
80+
self.item_id: item_id,
81+
self.interaction_column: est_rating
82+
})
83+
title = rc.Heading("Recommendations for new users", level=2)
84+
other_sections = [title, rc.DataTable(new_recommendations)]
85+
return (
86+
model_description,
87+
other_sections
88+
)

ads/opctl/operator/lowcode/recommender/operator_config.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,16 +32,24 @@ class RecommenderOperatorSpec(DataClassSerializable):
3232
output_directory: OutputDirectory = field(default_factory=OutputDirectory)
3333
top_k: int = None
3434
model_name: str = None
35-
user_column_name: str = None
36-
item_column_name: str = None
37-
ratings_column_name: str = None
35+
user_column: str = None
36+
item_column: str = None
37+
interaction_column: str = None
3838
recommendations_filename: str = None
39+
generate_report: bool = None
40+
report_filename: str = None
41+
3942

4043
def __post_init__(self):
4144
"""Adjusts the specification details."""
4245
self.output_directory = self.output_directory or OutputDirectory(url=find_output_dirname(self.output_directory))
4346
self.model_name = self.model_name or SupportedModels.SVD
4447
self.recommendations_filename = self.recommendations_filename or "recommendations.csv"
48+
# For Report Generation. When user doesn't specify defaults to True
49+
self.generate_report = (
50+
self.generate_report if self.generate_report is not None else True
51+
)
52+
self.report_filename = self.report_filename or "report.html"
4553

4654

4755
@dataclass(repr=True)

ads/opctl/operator/lowcode/recommender/schema.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,17 +246,17 @@ spec:
246246
meta:
247247
description: "Placed into output_directory location. Defaults to report.html"
248248

249-
user_column_name:
249+
user_column:
250250
type: string
251251
required: true
252252
default: "user_id"
253253

254-
item_column_name:
254+
item_column:
255255
type: string
256256
required: true
257257
default: "item_id"
258258

259-
rating_column_name:
259+
interaction_column:
260260
type: string
261261
required: true
262262
default: "rating"

0 commit comments

Comments
 (0)