5
5
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
6
6
7
7
import os
8
+ import tempfile
8
9
import time
9
10
from abc import ABC , abstractmethod
11
+ from typing import Tuple , Dict
10
12
13
+ import fsspec
11
14
import pandas as pd
15
+ import report_creator as rc
12
16
13
17
from ads .common .object_storage_details import ObjectStorageDetails
14
18
from ads .opctl import logger
15
19
from ads .opctl .operator .lowcode .common .utils import default_signer
16
20
from ads .opctl .operator .lowcode .common .utils import (
21
+ human_time_friendly ,
22
+ enable_print ,
23
+ disable_print ,
17
24
write_data ,
18
25
)
26
+ from .factory import SupportedModels
19
27
from .recommender_dataset import RecommenderDatasets
20
28
from ..operator_config import RecommenderOperatorConfig
29
+ from plotly import graph_objects as go
30
+ import matplotlib .pyplot as plt
21
31
22
32
23
33
class RecommenderOperatorBaseModel (ABC ):
24
34
"""The base class for the recommender detection operator models."""
25
35
26
36
def __init__ (self , config : RecommenderOperatorConfig , datasets : RecommenderDatasets ):
27
- self .spec = config .spec
37
+ self .config = config
38
+ self .spec = self .config .spec
28
39
self .datasets = datasets
29
40
30
41
def generate_report (self ):
42
+ item_col = self .spec .item_column
43
+ user_col = self .spec .user_column
44
+ interaction_col = self .spec .interaction_column
31
45
start_time = time .time ()
32
- result_df = self ._build_model ()
46
+ result_df , metrics = self ._build_model ()
33
47
elapsed_time = time .time () - start_time
34
48
logger .info ("Building the models completed in %s seconds" , elapsed_time )
49
+
50
+ if self .spec .generate_report :
51
+ # build the report
52
+ (
53
+ model_description ,
54
+ other_sections ,
55
+ ) = self ._generate_report ()
56
+
57
+ header_section = rc .Block (
58
+ rc .Heading ("Recommender Report" , level = 1 ),
59
+ rc .Text (
60
+ f"The recommendations was generated using { SupportedModels .SVD .upper ()} . { model_description } "
61
+ ),
62
+ rc .Group (
63
+ rc .Metric (
64
+ heading = "Recommendations was generated in " ,
65
+ value = human_time_friendly (elapsed_time ),
66
+ ),
67
+ rc .Metric (
68
+ heading = "Num users" ,
69
+ value = len (self .datasets .users ),
70
+ ),
71
+ rc .Metric (
72
+ heading = "Num items" ,
73
+ value = len (self .datasets .items ),
74
+ )
75
+ ),
76
+ )
77
+
78
+ summary = rc .Block (
79
+ header_section ,
80
+ )
81
+ # user and item distributions in interactions
82
+ user_title = rc .Heading ("User Statistics" , level = 2 )
83
+ user_rating_counts = self .datasets .interactions [user_col ].value_counts ()
84
+ fig_user = go .Figure (data = [go .Histogram (x = user_rating_counts , nbinsx = 100 )])
85
+ fig_user .update_layout (
86
+ title = f'Distribution of the number of interactions by { user_col } ' ,
87
+ xaxis_title = f'Number of { interaction_col } ' ,
88
+ yaxis_title = f'Number of { user_col } ' ,
89
+ bargap = 0.2
90
+ )
91
+ item_title = rc .Heading ("Item Statistics" , level = 2 )
92
+ item_rating_counts = self .datasets .interactions [item_col ].value_counts ()
93
+ fig_item = go .Figure (data = [go .Histogram (x = item_rating_counts , nbinsx = 100 )])
94
+ fig_item .update_layout (
95
+ title = f'Distribution of the number of interactions by { item_col } ' ,
96
+ xaxis_title = f'Number of { interaction_col } ' ,
97
+ yaxis_title = f'Number of { item_col } ' ,
98
+ bargap = 0.2
99
+ )
100
+ result_heatmap_title = rc .Heading ("Sample Recommendations" , level = 2 )
101
+ sample_items = result_df [item_col ].head (100 ).index
102
+ filtered_df = result_df [result_df [item_col ].isin (sample_items )]
103
+ data = filtered_df .pivot (index = user_col , columns = item_col , values = interaction_col )
104
+ fig = go .Figure (data = go .Heatmap (
105
+ z = data .values ,
106
+ x = data .columns ,
107
+ y = data .index ,
108
+ colorscale = 'Viridis'
109
+ ))
110
+ fig .update_layout (
111
+ title = 'Recommendation heatmap of User-Item Interactions (sample)' ,
112
+ width = 1500 ,
113
+ height = 800 ,
114
+ xaxis_title = item_col ,
115
+ yaxis_title = user_col ,
116
+ coloraxis_colorbar = dict (title = interaction_col )
117
+ )
118
+ plots = [user_title , rc .Widget (fig_user ),
119
+ item_title , rc .Widget (fig_item ),
120
+ result_heatmap_title , rc .Widget (fig )]
121
+
122
+ test_metrics_sections = [rc .DataTable (pd .DataFrame (metrics , index = [0 ]))]
123
+ yaml_appendix_title = rc .Heading ("Reference: YAML File" , level = 2 )
124
+ yaml_appendix = rc .Yaml (self .config .to_dict ())
125
+ report_sections = (
126
+ [summary ]
127
+ + plots
128
+ + test_metrics_sections
129
+ + other_sections
130
+ + [yaml_appendix_title , yaml_appendix ]
131
+ )
132
+
35
133
# save the report and result CSV
36
134
self ._save_report (
135
+ report_sections = report_sections ,
37
136
result_df = result_df
38
137
)
39
138
40
- def _save_report (self , result_df ):
139
+ def _evaluation_metrics (self ):
140
+ pass
141
+
142
+ def _test_data_evaluate_metrics (self ):
143
+ pass
144
+
145
+ def _save_report (self , report_sections : Tuple , result_df : pd .DataFrame ):
41
146
"""Saves resulting reports to the given folder."""
42
147
43
148
unique_output_dir = self .spec .output_directory .url
@@ -47,7 +152,25 @@ def _save_report(self, result_df):
47
152
else :
48
153
storage_options = dict ()
49
154
50
- # forecast csv report
155
+ # report-creator html report
156
+ if self .spec .generate_report :
157
+ with tempfile .TemporaryDirectory () as temp_dir :
158
+ report_local_path = os .path .join (temp_dir , "___report.html" )
159
+ disable_print ()
160
+ with rc .ReportCreator ("My Report" ) as report :
161
+ report .save (rc .Block (* report_sections ), report_local_path )
162
+ enable_print ()
163
+
164
+ report_path = os .path .join (unique_output_dir , self .spec .report_filename )
165
+ with open (report_local_path ) as f1 :
166
+ with fsspec .open (
167
+ report_path ,
168
+ "w" ,
169
+ ** storage_options ,
170
+ ) as f2 :
171
+ f2 .write (f1 .read ())
172
+
173
+ # recommender csv report
51
174
write_data (
52
175
data = result_df ,
53
176
filename = os .path .join (unique_output_dir , self .spec .recommendations_filename ),
@@ -68,7 +191,7 @@ def _generate_report(self):
68
191
"""
69
192
70
193
@abstractmethod
71
- def _build_model (self ) -> pd .DataFrame :
194
+ def _build_model (self ) -> [ pd .DataFrame , Dict ] :
72
195
"""
73
196
Build the model.
74
197
The method that needs to be implemented on the particular model level.
0 commit comments