Skip to content

Commit d68ac6d

Browse files
committed
Optimised report loading for anomaly operator
1 parent 67e66f7 commit d68ac6d

File tree

4 files changed

+35
-6
lines changed

4 files changed

+35
-6
lines changed

ads/opctl/operator/lowcode/anomaly/model/base_model.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def generate_report(self):
7979
anomaly_output, test_data, elapsed_time
8080
)
8181
table_blocks = [
82-
rc.DataTable(df, label=col, index=True)
82+
rc.DataTable(df.head(1000) if self.spec.optimize_report and len(df) > 1000 else df, label=col, index=True)
8383
for col, df in self.datasets.full_data_dict.items()
8484
]
8585
data_table = rc.Select(blocks=table_blocks)
@@ -94,20 +94,35 @@ def generate_report(self):
9494
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
9595
OutputColumns.ANOMALY_COL
9696
]
97+
anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
98+
downsampled_time_col = time_col
99+
selected_indices = list(range(len(time_col)))
100+
if self.spec.optimize_report:
101+
non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
102+
# Downsample non-anomalous data if it exceeds the threshold (1000)
103+
if len(non_anomaly_indices) > 1000:
104+
downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//1000]
105+
selected_indices = sorted(anomaly_indices + downsampled_non_anomaly_indices)
106+
downsampled_time_col = time_col[selected_indices]
107+
97108
columns = set(df.columns).difference({date_column})
98109
for col in columns:
99110
y = df[col].reset_index(drop=True)
111+
112+
downsampled_y = y[selected_indices]
113+
100114
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
101115
ax.grid()
102-
ax.plot(time_col, y, color="black")
103-
for i, index in enumerate(anomaly_col):
104-
if index == 1:
105-
ax.scatter(time_col[i], y[i], color="red", marker="o")
116+
ax.plot(downsampled_time_col, downsampled_y, color="black")
117+
# Plot anomalies
118+
for i in anomaly_indices:
119+
ax.scatter(time_col[i], y[i], color="red", marker="o")
106120
plt.xlabel(date_column)
107121
plt.ylabel(col)
108122
plt.title(f"`{col}` with reference to anomalies")
109123
figure_blocks.append(rc.Widget(ax))
110-
blocks.append(rc.Group(*figure_blocks, label=target))
124+
125+
blocks.append(rc.Group(*figure_blocks, label=target))
111126
plots = rc.Select(blocks)
112127

113128
report_sections = []

ads/opctl/operator/lowcode/anomaly/operator_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
7777
model: str = None
7878
model_kwargs: Dict = field(default_factory=dict)
7979
contamination: float = None
80+
optimize_report: bool = None
8081

8182
def __post_init__(self):
8283
"""Adjusts the specification details."""

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,4 +377,9 @@ spec:
377377
type: dict
378378
required: false
379379

380+
optimize_report:
381+
type: boolean
382+
required: false
383+
default: true
384+
380385
type: dict

anomaly_out/anomali_2_test.csv

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
PPG_Code,last_day_of_week,anomaly
2+
Product Group 107,2019-01-05,0
3+
Product Group 107,2019-01-12,0
4+
Product Group 107,2019-01-19,0
5+
Product Group 107,2019-01-26,0
6+
Product Group 107,2019-04-27,1
7+
Product Group 108,2019-05-04,1
8+
Product Group 108,2019-05-11,0

0 commit comments

Comments
 (0)