4
4
5
5
import numpy as np
6
6
import pytest
7
- from scipy .sparse import coo_matrix , csc_matrix , csr_matrix , dok_matrix , lil_matrix
8
7
9
8
from sklearn import datasets
10
9
from sklearn .base import BaseEstimator , clone
22
21
assert_array_equal ,
23
22
assert_array_less ,
24
23
)
24
+ from sklearn .utils .fixes import (
25
+ COO_CONTAINERS ,
26
+ CSC_CONTAINERS ,
27
+ CSR_CONTAINERS ,
28
+ DOK_CONTAINERS ,
29
+ LIL_CONTAINERS ,
30
+ )
25
31
26
32
# Common random state
27
33
rng = np .random .RandomState (0 )
@@ -308,7 +314,20 @@ def test_sample_weights_infinite():
308
314
clf .fit (iris .data , iris .target )
309
315
310
316
311
- def test_sparse_classification ():
317
+ @pytest .mark .parametrize (
318
+ "sparse_container, expected_internal_type" ,
319
+ zip (
320
+ [
321
+ * CSC_CONTAINERS ,
322
+ * CSR_CONTAINERS ,
323
+ * LIL_CONTAINERS ,
324
+ * COO_CONTAINERS ,
325
+ * DOK_CONTAINERS ,
326
+ ],
327
+ CSC_CONTAINERS + 4 * CSR_CONTAINERS ,
328
+ ),
329
+ )
330
+ def test_sparse_classification (sparse_container , expected_internal_type ):
312
331
# Check classification with sparse input.
313
332
314
333
class CustomSVC (SVC ):
@@ -328,80 +347,92 @@ def fit(self, X, y, sample_weight=None):
328
347
329
348
X_train , X_test , y_train , y_test = train_test_split (X , y , random_state = 0 )
330
349
331
- for sparse_format in [csc_matrix , csr_matrix , lil_matrix , coo_matrix , dok_matrix ]:
332
- X_train_sparse = sparse_format (X_train )
333
- X_test_sparse = sparse_format (X_test )
334
-
335
- # Trained on sparse format
336
- sparse_classifier = AdaBoostClassifier (
337
- estimator = CustomSVC (probability = True ),
338
- random_state = 1 ,
339
- algorithm = "SAMME" ,
340
- ).fit (X_train_sparse , y_train )
341
-
342
- # Trained on dense format
343
- dense_classifier = AdaBoostClassifier (
344
- estimator = CustomSVC (probability = True ),
345
- random_state = 1 ,
346
- algorithm = "SAMME" ,
347
- ).fit (X_train , y_train )
348
-
349
- # predict
350
- sparse_results = sparse_classifier .predict (X_test_sparse )
351
- dense_results = dense_classifier .predict (X_test )
352
- assert_array_equal (sparse_results , dense_results )
353
-
354
- # decision_function
355
- sparse_results = sparse_classifier .decision_function (X_test_sparse )
356
- dense_results = dense_classifier .decision_function (X_test )
357
- assert_array_almost_equal (sparse_results , dense_results )
358
-
359
- # predict_log_proba
360
- sparse_results = sparse_classifier .predict_log_proba (X_test_sparse )
361
- dense_results = dense_classifier .predict_log_proba (X_test )
362
- assert_array_almost_equal (sparse_results , dense_results )
363
-
364
- # predict_proba
365
- sparse_results = sparse_classifier .predict_proba (X_test_sparse )
366
- dense_results = dense_classifier .predict_proba (X_test )
367
- assert_array_almost_equal (sparse_results , dense_results )
368
-
369
- # score
370
- sparse_results = sparse_classifier .score (X_test_sparse , y_test )
371
- dense_results = dense_classifier .score (X_test , y_test )
372
- assert_array_almost_equal (sparse_results , dense_results )
373
-
374
- # staged_decision_function
375
- sparse_results = sparse_classifier .staged_decision_function (X_test_sparse )
376
- dense_results = dense_classifier .staged_decision_function (X_test )
377
- for sprase_res , dense_res in zip (sparse_results , dense_results ):
378
- assert_array_almost_equal (sprase_res , dense_res )
379
-
380
- # staged_predict
381
- sparse_results = sparse_classifier .staged_predict (X_test_sparse )
382
- dense_results = dense_classifier .staged_predict (X_test )
383
- for sprase_res , dense_res in zip (sparse_results , dense_results ):
384
- assert_array_equal (sprase_res , dense_res )
385
-
386
- # staged_predict_proba
387
- sparse_results = sparse_classifier .staged_predict_proba (X_test_sparse )
388
- dense_results = dense_classifier .staged_predict_proba (X_test )
389
- for sprase_res , dense_res in zip (sparse_results , dense_results ):
390
- assert_array_almost_equal (sprase_res , dense_res )
391
-
392
- # staged_score
393
- sparse_results = sparse_classifier .staged_score (X_test_sparse , y_test )
394
- dense_results = dense_classifier .staged_score (X_test , y_test )
395
- for sprase_res , dense_res in zip (sparse_results , dense_results ):
396
- assert_array_equal (sprase_res , dense_res )
397
-
398
- # Verify sparsity of data is maintained during training
399
- types = [i .data_type_ for i in sparse_classifier .estimators_ ]
400
-
401
- assert all ([(t == csc_matrix or t == csr_matrix ) for t in types ])
402
-
403
-
404
- def test_sparse_regression ():
350
+ X_train_sparse = sparse_container (X_train )
351
+ X_test_sparse = sparse_container (X_test )
352
+
353
+ # Trained on sparse format
354
+ sparse_classifier = AdaBoostClassifier (
355
+ estimator = CustomSVC (probability = True ),
356
+ random_state = 1 ,
357
+ algorithm = "SAMME" ,
358
+ ).fit (X_train_sparse , y_train )
359
+
360
+ # Trained on dense format
361
+ dense_classifier = AdaBoostClassifier (
362
+ estimator = CustomSVC (probability = True ),
363
+ random_state = 1 ,
364
+ algorithm = "SAMME" ,
365
+ ).fit (X_train , y_train )
366
+
367
+ # predict
368
+ sparse_clf_results = sparse_classifier .predict (X_test_sparse )
369
+ dense_clf_results = dense_classifier .predict (X_test )
370
+ assert_array_equal (sparse_clf_results , dense_clf_results )
371
+
372
+ # decision_function
373
+ sparse_clf_results = sparse_classifier .decision_function (X_test_sparse )
374
+ dense_clf_results = dense_classifier .decision_function (X_test )
375
+ assert_array_almost_equal (sparse_clf_results , dense_clf_results )
376
+
377
+ # predict_log_proba
378
+ sparse_clf_results = sparse_classifier .predict_log_proba (X_test_sparse )
379
+ dense_clf_results = dense_classifier .predict_log_proba (X_test )
380
+ assert_array_almost_equal (sparse_clf_results , dense_clf_results )
381
+
382
+ # predict_proba
383
+ sparse_clf_results = sparse_classifier .predict_proba (X_test_sparse )
384
+ dense_clf_results = dense_classifier .predict_proba (X_test )
385
+ assert_array_almost_equal (sparse_clf_results , dense_clf_results )
386
+
387
+ # score
388
+ sparse_clf_results = sparse_classifier .score (X_test_sparse , y_test )
389
+ dense_clf_results = dense_classifier .score (X_test , y_test )
390
+ assert_array_almost_equal (sparse_clf_results , dense_clf_results )
391
+
392
+ # staged_decision_function
393
+ sparse_clf_results = sparse_classifier .staged_decision_function (X_test_sparse )
394
+ dense_clf_results = dense_classifier .staged_decision_function (X_test )
395
+ for sparse_clf_res , dense_clf_res in zip (sparse_clf_results , dense_clf_results ):
396
+ assert_array_almost_equal (sparse_clf_res , dense_clf_res )
397
+
398
+ # staged_predict
399
+ sparse_clf_results = sparse_classifier .staged_predict (X_test_sparse )
400
+ dense_clf_results = dense_classifier .staged_predict (X_test )
401
+ for sparse_clf_res , dense_clf_res in zip (sparse_clf_results , dense_clf_results ):
402
+ assert_array_equal (sparse_clf_res , dense_clf_res )
403
+
404
+ # staged_predict_proba
405
+ sparse_clf_results = sparse_classifier .staged_predict_proba (X_test_sparse )
406
+ dense_clf_results = dense_classifier .staged_predict_proba (X_test )
407
+ for sparse_clf_res , dense_clf_res in zip (sparse_clf_results , dense_clf_results ):
408
+ assert_array_almost_equal (sparse_clf_res , dense_clf_res )
409
+
410
+ # staged_score
411
+ sparse_clf_results = sparse_classifier .staged_score (X_test_sparse , y_test )
412
+ dense_clf_results = dense_classifier .staged_score (X_test , y_test )
413
+ for sparse_clf_res , dense_clf_res in zip (sparse_clf_results , dense_clf_results ):
414
+ assert_array_equal (sparse_clf_res , dense_clf_res )
415
+
416
+ # Verify sparsity of data is maintained during training
417
+ types = [i .data_type_ for i in sparse_classifier .estimators_ ]
418
+
419
+ assert all ([t == expected_internal_type for t in types ])
420
+
421
+
422
+ @pytest .mark .parametrize (
423
+ "sparse_container, expected_internal_type" ,
424
+ zip (
425
+ [
426
+ * CSC_CONTAINERS ,
427
+ * CSR_CONTAINERS ,
428
+ * LIL_CONTAINERS ,
429
+ * COO_CONTAINERS ,
430
+ * DOK_CONTAINERS ,
431
+ ],
432
+ CSC_CONTAINERS + 4 * CSR_CONTAINERS ,
433
+ ),
434
+ )
435
+ def test_sparse_regression (sparse_container , expected_internal_type ):
405
436
# Check regression with sparse input.
406
437
407
438
class CustomSVR (SVR ):
@@ -419,34 +450,33 @@ def fit(self, X, y, sample_weight=None):
419
450
420
451
X_train , X_test , y_train , y_test = train_test_split (X , y , random_state = 0 )
421
452
422
- for sparse_format in [csc_matrix , csr_matrix , lil_matrix , coo_matrix , dok_matrix ]:
423
- X_train_sparse = sparse_format (X_train )
424
- X_test_sparse = sparse_format (X_test )
453
+ X_train_sparse = sparse_container (X_train )
454
+ X_test_sparse = sparse_container (X_test )
425
455
426
- # Trained on sparse format
427
- sparse_classifier = AdaBoostRegressor (
428
- estimator = CustomSVR (), random_state = 1
429
- ). fit ( X_train_sparse , y_train )
456
+ # Trained on sparse format
457
+ sparse_regressor = AdaBoostRegressor ( estimator = CustomSVR (), random_state = 1 ). fit (
458
+ X_train_sparse , y_train
459
+ )
430
460
431
- # Trained on dense format
432
- dense_classifier = dense_results = AdaBoostRegressor (
433
- estimator = CustomSVR (), random_state = 1
434
- ). fit ( X_train , y_train )
461
+ # Trained on dense format
462
+ dense_regressor = AdaBoostRegressor ( estimator = CustomSVR (), random_state = 1 ). fit (
463
+ X_train , y_train
464
+ )
435
465
436
- # predict
437
- sparse_results = sparse_classifier .predict (X_test_sparse )
438
- dense_results = dense_classifier .predict (X_test )
439
- assert_array_almost_equal (sparse_results , dense_results )
466
+ # predict
467
+ sparse_regr_results = sparse_regressor .predict (X_test_sparse )
468
+ dense_regr_results = dense_regressor .predict (X_test )
469
+ assert_array_almost_equal (sparse_regr_results , dense_regr_results )
440
470
441
- # staged_predict
442
- sparse_results = sparse_classifier .staged_predict (X_test_sparse )
443
- dense_results = dense_classifier .staged_predict (X_test )
444
- for sprase_res , dense_res in zip (sparse_results , dense_results ):
445
- assert_array_almost_equal (sprase_res , dense_res )
471
+ # staged_predict
472
+ sparse_regr_results = sparse_regressor .staged_predict (X_test_sparse )
473
+ dense_regr_results = dense_regressor .staged_predict (X_test )
474
+ for sparse_regr_res , dense_regr_res in zip (sparse_regr_results , dense_regr_results ):
475
+ assert_array_almost_equal (sparse_regr_res , dense_regr_res )
446
476
447
- types = [i .data_type_ for i in sparse_classifier .estimators_ ]
477
+ types = [i .data_type_ for i in sparse_regressor .estimators_ ]
448
478
449
- assert all ([( t == csc_matrix or t == csr_matrix ) for t in types ])
479
+ assert all ([t == expected_internal_type for t in types ])
450
480
451
481
452
482
def test_sample_weight_adaboost_regressor ():
0 commit comments