Skip to content

Commit 97c78b0

Browse files
committed
draft commit rcf
1 parent 077ec39 commit 97c78b0

File tree

1 file changed

+47
-17
lines changed

1 file changed

+47
-17
lines changed

ads/opctl/operator/lowcode/anomaly/model/randomcutforest.py

Lines changed: 47 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,32 +30,62 @@ def _build_model(self) -> AnomalyOutput:
3030

3131
model_kwargs = self.spec.model_kwargs
3232
# map the output as per anomaly dataset class, 1: outlier, 0: inlier
33-
self.outlier_map = {1: 0, -1: 1}
33+
# self.outlier_map = {1: 0, -1: 1}
3434

3535
anomaly_output = AnomalyOutput(date_column="index")
36-
#TODO: PDB
37-
import pdb
36+
# TODO: PDB
3837

39-
pdb.set_trace()
38+
# Set tree parameters
39+
num_trees = model_kwargs.get("num_trees", 200)
40+
shingle_size = model_kwargs.get("shingle_size", 1)
41+
tree_size = model_kwargs.get("tree_size", 1000)
4042

4143
for target, df in self.datasets.full_data_dict.items():
42-
model = RCTree(**model_kwargs)
43-
model.fit(df)
44-
y_pred = model.predict(df)
45-
y_pred = np.vectorize(self.outlier_map.get)(y_pred)
44+
df_values = df[self.spec.target_column].astype(float).values
45+
points = np.vstack(list(rrcf.shingle(df_values, size=4)))
4646

47-
scores = model.score_samples(df)
47+
sample_size_range = (1, 6)
48+
n = points.shape[0]
49+
avg_codisp = pd.Series(0.0, index=np.arange(n))
50+
index = np.zeros(n)
4851

49-
index_col = df.columns[0]
52+
forest = []
53+
while len(forest) < num_trees:
54+
ixs = np.random.choice(n, size=sample_size_range, replace=False)
55+
trees = [rrcf.RCTree(points[ix], index_labels=ix) for ix in ixs]
56+
forest.extend(trees)
57+
print(len(forest))
5058

51-
anomaly = pd.DataFrame(
52-
{index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
53-
).reset_index(drop=True)
54-
score = pd.DataFrame(
55-
{"index": df[index_col], OutputColumns.SCORE_COL: scores}
56-
).reset_index(drop=True)
59+
for tree in forest:
60+
codisp = pd.Series({leaf: tree.codisp(leaf) for leaf in tree.leaves})
61+
avg_codisp[codisp.index] += codisp
62+
np.add.at(index, codisp.index.values, 1)
5763

58-
anomaly_output.add_output(target, anomaly, score)
64+
avg_codisp /= index
65+
avg_codisp.index = df.iloc[(4 - 1) :].index
66+
avg_codisp = (avg_codisp - avg_codisp.min()) / (
67+
avg_codisp.max() - avg_codisp.min()
68+
)
69+
70+
y_pred = (avg_codisp > np.percentile(avg_codisp, 95)).astype(int)
71+
72+
import pdb
73+
74+
pdb.set_trace()
75+
print("Done")
76+
77+
# scores = model.score_samples(df)
78+
79+
# index_col = df.columns[0]
80+
81+
# anomaly = pd.DataFrame(
82+
# {index_col: df[index_col], OutputColumns.ANOMALY_COL: y_pred}
83+
# ).reset_index(drop=True)
84+
# score = pd.DataFrame(
85+
# {"index": df[index_col], OutputColumns.SCORE_COL: scores}
86+
# ).reset_index(drop=True)
87+
88+
# anomaly_output.add_output(target, anomaly, score)
5989

6090
return anomaly_output
6191

0 commit comments

Comments
 (0)