Skip to content

Commit 32d031e

Browse files
authored
Optimised report loading for anomaly operator (#897)
2 parents 18b69bc + d156f3b commit 32d031e

File tree

4 files changed

+29
-7
lines changed

4 files changed

+29
-7
lines changed

ads/opctl/operator/lowcode/anomaly/const.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,3 +94,4 @@ class OutputColumns(str, metaclass=ExtendedEnumMeta):
9494

9595

9696
TODS_DEFAULT_MODEL = "ocsvm"
97+
SUBSAMPLE_THRESHOLD = 1000

ads/opctl/operator/lowcode/anomaly/model/base_model.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from ads.common.object_storage_details import ObjectStorageDetails
1818
from ads.opctl import logger
19-
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics
19+
from ads.opctl.operator.lowcode.anomaly.const import OutputColumns, SupportedMetrics, SUBSAMPLE_THRESHOLD
2020
from ads.opctl.operator.lowcode.anomaly.utils import _build_metrics_df, default_signer
2121
from ads.opctl.operator.lowcode.common.utils import (
2222
disable_print,
@@ -79,7 +79,7 @@ def generate_report(self):
7979
anomaly_output, test_data, elapsed_time
8080
)
8181
table_blocks = [
82-
rc.DataTable(df, label=col, index=True)
82+
rc.DataTable(df.head(SUBSAMPLE_THRESHOLD) if self.spec.subsample_report_data and len(df) > SUBSAMPLE_THRESHOLD else df, label=col, index=True)
8383
for col, df in self.datasets.full_data_dict.items()
8484
]
8585
data_table = rc.Select(blocks=table_blocks)
@@ -94,20 +94,36 @@ def generate_report(self):
9494
anomaly_col = anomaly_output.get_anomalies_by_cat(category=target)[
9595
OutputColumns.ANOMALY_COL
9696
]
97+
anomaly_indices = [i for i, index in enumerate(anomaly_col) if index == 1]
98+
downsampled_time_col = time_col
99+
selected_indices = list(range(len(time_col)))
100+
if self.spec.subsample_report_data:
101+
non_anomaly_indices = [i for i in range(len(time_col)) if i not in anomaly_indices]
102+
# Downsample non-anomalous data if it exceeds the threshold (1000)
103+
if len(non_anomaly_indices) > SUBSAMPLE_THRESHOLD:
104+
downsampled_non_anomaly_indices = non_anomaly_indices[::len(non_anomaly_indices)//SUBSAMPLE_THRESHOLD]
105+
selected_indices = anomaly_indices + downsampled_non_anomaly_indices
106+
selected_indices.sort()
107+
downsampled_time_col = time_col[selected_indices]
108+
97109
columns = set(df.columns).difference({date_column})
98110
for col in columns:
99111
y = df[col].reset_index(drop=True)
112+
113+
downsampled_y = y[selected_indices]
114+
100115
fig, ax = plt.subplots(figsize=(8, 3), layout="constrained")
101116
ax.grid()
102-
ax.plot(time_col, y, color="black")
103-
for i, index in enumerate(anomaly_col):
104-
if index == 1:
105-
ax.scatter(time_col[i], y[i], color="red", marker="o")
117+
ax.plot(downsampled_time_col, downsampled_y, color="black")
118+
# Plot anomalies
119+
for i in anomaly_indices:
120+
ax.scatter(time_col[i], y[i], color="red", marker="o")
106121
plt.xlabel(date_column)
107122
plt.ylabel(col)
108123
plt.title(f"`{col}` with reference to anomalies")
109124
figure_blocks.append(rc.Widget(ax))
110-
blocks.append(rc.Group(*figure_blocks, label=target))
125+
126+
blocks.append(rc.Group(*figure_blocks, label=target))
111127
plots = rc.Select(blocks)
112128

113129
report_sections = []

ads/opctl/operator/lowcode/anomaly/operator_config.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,7 @@ class AnomalyOperatorSpec(DataClassSerializable):
7777
model: str = None
7878
model_kwargs: Dict = field(default_factory=dict)
7979
contamination: float = None
80+
subsample_report_data: bool = None
8081

8182
def __post_init__(self):
8283
"""Adjusts the specification details."""

ads/opctl/operator/lowcode/anomaly/schema.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,4 +377,8 @@ spec:
377377
type: dict
378378
required: false
379379

380+
subsample_report_data:
381+
type: boolean
382+
required: false
383+
380384
type: dict

0 commit comments

Comments
 (0)