🐛 sklearn/numpy 'array has an inhomogeneous shape after 1 dimensions' 

Running more or less vanilla github example code -- besides definition of window lengths --, for batch/streaming.

Batch runs through, however streaming throws an error when scoring test values, regarding sklearn and numpy functions. 
Stack-Overflowing the error reveals a known and identifiable cause, i think the problem is rooted in the way the data array is given to sklearn/numpy. 

Reproducible example at [google colab](https://colab.research.google.com/gist/sebapehl/fd77b6d7136f73a56e0eb275ed309345/check_luminaire.ipynb).

```
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
[<ipython-input-10-50dafe264e90>](https://localhost:8080/#) in <module>
      1 scoring_data = data_test.copy()
----> 2 score, scored_window = model.score(scoring_data)    # scoring_data is data over a time-window instead of a datapoint

8 frames
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in score(self, data, **kwargs)
    710         agg_data = self._params['AggregatedData'][opt_timestamp]
    711 
--> 712         is_anomaly, prob_of_anomaly, attributes = self._call_scoring(df=data,
    713                                                                      target_metric=target_metric,
    714                                                                      anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _call_scoring(self, df, target_metric, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, baseline, detrend_order, detrend_method, agg_data_model, detection_method, attributes, agg_data)
    487         """
    488 
--> 489         is_anomaly, prob_of_anomaly = self._anomalous_region_detection(input_df=df, value_column=target_metric,
    490                                                                        called_for="scoring",
    491                                                                        anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _anomalous_region_detection(self, input_df, window_length, value_column, called_for, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, detrend_order, baseline, detrend_method, agg_data_model, past_model, detection_method, agg_data)
    776         elif called_for == "scoring":
    777 
--> 778             return self._get_result(input_df=input_df,
    779                                     detrend_order=detrend_order,
    780                                     agg_data_model=agg_data_model,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _get_result(self, input_df, detrend_order, agg_data_model, value_column, detrend_method, baseline_type, detection_method, baseline, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, agg_data)
    620                     baseline_execution_data.append(current_adjusted_data)
    621                     pca = PCA()
--> 622                     scores = pca.fit_transform(StandardScaler().fit_transform(baseline_execution_data))
    623                     robust_cov = MinCovDet().fit(scores[:, :3])
    624                     mahalanobis_distance = robust_cov.mahalanobis(scores[:, :3])        # getting the top 3 dimensions

[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in fit_transform(self, X, y, **fit_params)
    850         if y is None:
    851             # fit method of arity 1 (unsupervised transformation)
--> 852             return self.fit(X, **fit_params).transform(X)
    853         else:
    854             # fit method of arity 2 (supervised transformation)

[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in fit(self, X, y, sample_weight)
    804         # Reset internal state before fitting
    805         self._reset()
--> 806         return self.partial_fit(X, y, sample_weight)
    807 
    808     def partial_fit(self, X, y=None, sample_weight=None):

[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in partial_fit(self, X, y, sample_weight)
    839         """
    840         first_call = not hasattr(self, "n_samples_seen_")
--> 841         X = self._validate_data(
    842             X,
    843             accept_sparse=("csr", "csc"),

[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in _validate_data(self, X, y, reset, validate_separately, **check_params)
    564             raise ValueError("Validation should be done on X, y or both.")
    565         elif not no_val_X and no_val_y:
--> 566             X = check_array(X, **check_params)
    567             out = X
    568         elif no_val_X and not no_val_y:

[/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py](https://localhost:8080/#) in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    744                     array = array.astype(dtype, casting="unsafe", copy=False)
    745                 else:
--> 746                     array = np.asarray(array, order=order, dtype=dtype)
    747             except ComplexWarning as complex_warning:
    748                 raise ValueError(

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (52,) + inhomogeneous part.
```



Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

🐛 sklearn/numpy 'array has an inhomogeneous shape after 1 dimensions' #127

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

🐛 sklearn/numpy 'array has an inhomogeneous shape after 1 dimensions' #127

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions