-
Notifications
You must be signed in to change notification settings - Fork 65
Open
Description
Running more or less vanilla github example code -- besides definition of window lengths --, for batch/streaming.
Batch runs through, however streaming throws an error when scoring test values, regarding sklearn and numpy functions.
Stack-Overflowing the error reveals a known and identifiable cause, i think the problem is rooted in the way the data array is given to sklearn/numpy.
Reproducible example at google colab.
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
[<ipython-input-10-50dafe264e90>](https://localhost:8080/#) in <module>
1 scoring_data = data_test.copy()
----> 2 score, scored_window = model.score(scoring_data) # scoring_data is data over a time-window instead of a datapoint
8 frames
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in score(self, data, **kwargs)
710 agg_data = self._params['AggregatedData'][opt_timestamp]
711
--> 712 is_anomaly, prob_of_anomaly, attributes = self._call_scoring(df=data,
713 target_metric=target_metric,
714 anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _call_scoring(self, df, target_metric, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, baseline, detrend_order, detrend_method, agg_data_model, detection_method, attributes, agg_data)
487 """
488
--> 489 is_anomaly, prob_of_anomaly = self._anomalous_region_detection(input_df=df, value_column=target_metric,
490 called_for="scoring",
491 anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _anomalous_region_detection(self, input_df, window_length, value_column, called_for, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, detrend_order, baseline, detrend_method, agg_data_model, past_model, detection_method, agg_data)
776 elif called_for == "scoring":
777
--> 778 return self._get_result(input_df=input_df,
779 detrend_order=detrend_order,
780 agg_data_model=agg_data_model,
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _get_result(self, input_df, detrend_order, agg_data_model, value_column, detrend_method, baseline_type, detection_method, baseline, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, agg_data)
620 baseline_execution_data.append(current_adjusted_data)
621 pca = PCA()
--> 622 scores = pca.fit_transform(StandardScaler().fit_transform(baseline_execution_data))
623 robust_cov = MinCovDet().fit(scores[:, :3])
624 mahalanobis_distance = robust_cov.mahalanobis(scores[:, :3]) # getting the top 3 dimensions
[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in fit_transform(self, X, y, **fit_params)
850 if y is None:
851 # fit method of arity 1 (unsupervised transformation)
--> 852 return self.fit(X, **fit_params).transform(X)
853 else:
854 # fit method of arity 2 (supervised transformation)
[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in fit(self, X, y, sample_weight)
804 # Reset internal state before fitting
805 self._reset()
--> 806 return self.partial_fit(X, y, sample_weight)
807
808 def partial_fit(self, X, y=None, sample_weight=None):
[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in partial_fit(self, X, y, sample_weight)
839 """
840 first_call = not hasattr(self, "n_samples_seen_")
--> 841 X = self._validate_data(
842 X,
843 accept_sparse=("csr", "csc"),
[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in _validate_data(self, X, y, reset, validate_separately, **check_params)
564 raise ValueError("Validation should be done on X, y or both.")
565 elif not no_val_X and no_val_y:
--> 566 X = check_array(X, **check_params)
567 out = X
568 elif no_val_X and not no_val_y:
[/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py](https://localhost:8080/#) in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
744 array = array.astype(dtype, casting="unsafe", copy=False)
745 else:
--> 746 array = np.asarray(array, order=order, dtype=dtype)
747 except ComplexWarning as complex_warning:
748 raise ValueError(
ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (52,) + inhomogeneous part.
Metadata
Metadata
Assignees
Labels
No labels