@@ -28,6 +28,18 @@ def __init__(self, spec: AnomalyOperatorSpec):
28
28
super ().__init__ (spec = spec , name = "test_data" )
29
29
30
30
31
+ class ValidationData (AbstractData ):
32
+ def __init__ (self , spec : AnomalyOperatorSpec ):
33
+ super ().__init__ (spec = spec , name = "validation_data" )
34
+
35
+ def _ingest_data (self , spec ):
36
+ self .X_valid_dict = dict ()
37
+ self .y_valid_dict = dict ()
38
+ for s_id , df in self .get_dict_by_series ().items ():
39
+ self .X_valid_dict [s_id ] = df .drop ([OutputColumns .ANOMALY_COL ], axis = 1 )
40
+ self .y_valid_dict [s_id ] = df [OutputColumns .ANOMALY_COL ]
41
+
42
+
31
43
class AnomalyDatasets :
32
44
def __init__ (self , spec : AnomalyOperatorSpec ):
33
45
"""Instantiates the DataIO instance.
@@ -39,63 +51,23 @@ def __init__(self, spec: AnomalyOperatorSpec):
39
51
"""
40
52
self ._data = AnomalyData (spec )
41
53
self .data = self ._data .get_data_long ()
42
- # self.test_data = None
43
- # self.target_columns = None
44
54
self .full_data_dict = self ._data .get_dict_by_series ()
45
- # self._load_data(spec)
46
-
47
- # def _load_data(self, spec):
48
- # """Loads anomaly input data."""
49
- # try:
50
- # self.data = load_data(
51
- # filename=spec.input_data.url,
52
- # format=spec.input_data.format,
53
- # columns=spec.input_data.columns,
54
- # )
55
- # except InvalidParameterError as e:
56
- # e.args = e.args + ("Invalid Parameter: input_data",)
57
- # raise e
58
- # date_col = spec.datetime_column.name
59
- # self.data[date_col] = pd.to_datetime(self.data[date_col])
60
- # try:
61
- # spec.freq = get_frequency_of_datetime(self.data, spec)
62
- # except TypeError as e:
63
- # logger.warn(
64
- # f"Error determining frequency: {e.args}. Setting Frequency to None"
65
- # )
66
- # logger.debug(f"Full traceback: {e}")
67
- # spec.freq = None
68
-
69
- # if spec.target_category_columns is None:
70
- # if spec.target_column is None:
71
- # target_col = [
72
- # col
73
- # for col in self.data.columns
74
- # if col not in [spec.datetime_column.name]
75
- # ]
76
- # spec.target_column = target_col[0]
77
- # self.full_data_dict = {spec.target_column: self.data}
78
- # else:
79
- # # Merge target category columns
80
-
81
- # self.data[OutputColumns.Series] = merge_category_columns(
82
- # self.data, spec.target_category_columns
83
- # )
84
- # unique_categories = self.data[OutputColumns.Series].unique()
85
- # self.full_data_dict = dict()
86
-
87
- # for cat in unique_categories:
88
- # data_by_cat = self.data[self.data[OutputColumns.Series] == cat].drop(
89
- # spec.target_category_columns + [OutputColumns.Series], axis=1
90
- # )
91
- # self.full_data_dict[cat] = data_by_cat
55
+ if spec .validation_data is not None :
56
+ self .valid_data = ValidationData (spec )
57
+ self .X_valid_dict = self .valid_data .X_valid_dict
58
+ self .y_valid_dict = self .valid_data .y_valid_dict
92
59
93
60
94
61
class AnomalyOutput :
95
62
def __init__ (self , date_column ):
96
63
self .category_map = dict ()
97
64
self .date_column = date_column
98
65
66
+ def list_categories (self ):
67
+ categories = list (self .category_map .keys ())
68
+ categories .sort ()
69
+ return categories
70
+
99
71
def add_output (self , category : str , anomalies : pd .DataFrame , scores : pd .DataFrame ):
100
72
self .category_map [category ] = (anomalies , scores )
101
73
@@ -126,7 +98,7 @@ def get_outliers_by_cat(self, category: str, data: pd.DataFrame):
126
98
def get_inliers (self , data ):
127
99
inliers = pd .DataFrame ()
128
100
129
- for category in self .category_map . keys ():
101
+ for category in self .list_categories ():
130
102
inliers = pd .concat (
131
103
[
132
104
inliers ,
@@ -145,7 +117,7 @@ def get_inliers(self, data):
145
117
def get_outliers (self , data ):
146
118
outliers = pd .DataFrame ()
147
119
148
- for category in self .category_map . keys ():
120
+ for category in self .list_categories ():
149
121
outliers = pd .concat (
150
122
[
151
123
outliers ,
@@ -163,10 +135,10 @@ def get_outliers(self, data):
163
135
164
136
def get_scores (self , target_category_columns ):
165
137
if target_category_columns is None :
166
- return self .get_scores_by_cat (list ( self .category_map . keys () )[0 ])
138
+ return self .get_scores_by_cat (self .list_categories ( )[0 ])
167
139
168
140
scores = pd .DataFrame ()
169
- for category in self .category_map . keys ():
141
+ for category in self .list_categories ():
170
142
score = self .get_scores_by_cat (category )
171
143
score [target_category_columns [0 ]] = category
172
144
scores = pd .concat ([scores , score ], axis = 0 , ignore_index = True )
0 commit comments