Skip to content

🐛 sklearn/numpy 'array has an inhomogeneous shape after 1 dimensions'  #127

@sebapehl

Description

@sebapehl

Running more or less vanilla github example code -- besides definition of window lengths --, for batch/streaming.

Batch runs through, however streaming throws an error when scoring test values, regarding sklearn and numpy functions.
Stack-Overflowing the error reveals a known and identifiable cause, i think the problem is rooted in the way the data array is given to sklearn/numpy.

Reproducible example at google colab.

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
[<ipython-input-10-50dafe264e90>](https://localhost:8080/#) in <module>
      1 scoring_data = data_test.copy()
----> 2 score, scored_window = model.score(scoring_data)    # scoring_data is data over a time-window instead of a datapoint

8 frames
[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in score(self, data, **kwargs)
    710         agg_data = self._params['AggregatedData'][opt_timestamp]
    711 
--> 712         is_anomaly, prob_of_anomaly, attributes = self._call_scoring(df=data,
    713                                                                      target_metric=target_metric,
    714                                                                      anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _call_scoring(self, df, target_metric, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, baseline, detrend_order, detrend_method, agg_data_model, detection_method, attributes, agg_data)
    487         """
    488 
--> 489         is_anomaly, prob_of_anomaly = self._anomalous_region_detection(input_df=df, value_column=target_metric,
    490                                                                        called_for="scoring",
    491                                                                        anomaly_scores_gamma_alpha=anomaly_scores_gamma_alpha,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _anomalous_region_detection(self, input_df, window_length, value_column, called_for, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, detrend_order, baseline, detrend_method, agg_data_model, past_model, detection_method, agg_data)
    776         elif called_for == "scoring":
    777 
--> 778             return self._get_result(input_df=input_df,
    779                                     detrend_order=detrend_order,
    780                                     agg_data_model=agg_data_model,

[/usr/local/lib/python3.8/dist-packages/luminaire/model/window_density.py](https://localhost:8080/#) in _get_result(self, input_df, detrend_order, agg_data_model, value_column, detrend_method, baseline_type, detection_method, baseline, anomaly_scores_gamma_alpha, anomaly_scores_gamma_loc, anomaly_scores_gamma_beta, agg_data)
    620                     baseline_execution_data.append(current_adjusted_data)
    621                     pca = PCA()
--> 622                     scores = pca.fit_transform(StandardScaler().fit_transform(baseline_execution_data))
    623                     robust_cov = MinCovDet().fit(scores[:, :3])
    624                     mahalanobis_distance = robust_cov.mahalanobis(scores[:, :3])        # getting the top 3 dimensions

[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in fit_transform(self, X, y, **fit_params)
    850         if y is None:
    851             # fit method of arity 1 (unsupervised transformation)
--> 852             return self.fit(X, **fit_params).transform(X)
    853         else:
    854             # fit method of arity 2 (supervised transformation)

[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in fit(self, X, y, sample_weight)
    804         # Reset internal state before fitting
    805         self._reset()
--> 806         return self.partial_fit(X, y, sample_weight)
    807 
    808     def partial_fit(self, X, y=None, sample_weight=None):

[/usr/local/lib/python3.8/dist-packages/sklearn/preprocessing/_data.py](https://localhost:8080/#) in partial_fit(self, X, y, sample_weight)
    839         """
    840         first_call = not hasattr(self, "n_samples_seen_")
--> 841         X = self._validate_data(
    842             X,
    843             accept_sparse=("csr", "csc"),

[/usr/local/lib/python3.8/dist-packages/sklearn/base.py](https://localhost:8080/#) in _validate_data(self, X, y, reset, validate_separately, **check_params)
    564             raise ValueError("Validation should be done on X, y or both.")
    565         elif not no_val_X and no_val_y:
--> 566             X = check_array(X, **check_params)
    567             out = X
    568         elif no_val_X and not no_val_y:

[/usr/local/lib/python3.8/dist-packages/sklearn/utils/validation.py](https://localhost:8080/#) in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
    744                     array = array.astype(dtype, casting="unsafe", copy=False)
    745                 else:
--> 746                     array = np.asarray(array, order=order, dtype=dtype)
    747             except ComplexWarning as complex_warning:
    748                 raise ValueError(

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (52,) + inhomogeneous part.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions