@@ -371,6 +371,40 @@ def test_select_heuristics_regression():
371
371
assert_less (np .sum (support [5 :] == 1 ), 3 )
372
372
373
373
374
+ def test_boundary_case_ch2 ():
375
+ # Test boundary case, and always aim to select 1 feature.
376
+ X = np .array ([[10 , 20 ], [20 , 20 ], [20 , 30 ]])
377
+ y = np .array ([[1 ], [0 ], [0 ]])
378
+ scores , pvalues = chi2 (X , y )
379
+ assert_array_almost_equal (scores , np .array ([4. , 0.71428571 ]))
380
+ assert_array_almost_equal (pvalues , np .array ([0.04550026 , 0.39802472 ]))
381
+
382
+ filter_fdr = SelectFdr (chi2 , alpha = 0.1 )
383
+ filter_fdr .fit (X , y )
384
+ support_fdr = filter_fdr .get_support ()
385
+ assert_array_equal (support_fdr , np .array ([True , False ]))
386
+
387
+ filter_kbest = SelectKBest (chi2 , k = 1 )
388
+ filter_kbest .fit (X , y )
389
+ support_kbest = filter_kbest .get_support ()
390
+ assert_array_equal (support_kbest , np .array ([True , False ]))
391
+
392
+ filter_percentile = SelectPercentile (chi2 , percentile = 50 )
393
+ filter_percentile .fit (X , y )
394
+ support_percentile = filter_percentile .get_support ()
395
+ assert_array_equal (support_percentile , np .array ([True , False ]))
396
+
397
+ filter_fpr = SelectFpr (chi2 , alpha = 0.1 )
398
+ filter_fpr .fit (X , y )
399
+ support_fpr = filter_fpr .get_support ()
400
+ assert_array_equal (support_fpr , np .array ([True , False ]))
401
+
402
+ filter_fwe = SelectFwe (chi2 , alpha = 0.1 )
403
+ filter_fwe .fit (X , y )
404
+ support_fwe = filter_fwe .get_support ()
405
+ assert_array_equal (support_fwe , np .array ([True , False ]))
406
+
407
+
374
408
def test_select_fdr_regression ():
375
409
# Test that fdr heuristic actually has low FDR.
376
410
def single_fdr (alpha , n_informative , random_state ):
@@ -404,7 +438,7 @@ def single_fdr(alpha, n_informative, random_state):
404
438
# FDR = E(FP / (TP + FP)) <= alpha
405
439
false_discovery_rate = np .mean ([single_fdr (alpha , n_informative ,
406
440
random_state ) for
407
- random_state in range (30 )])
441
+ random_state in range (100 )])
408
442
assert_greater_equal (alpha , false_discovery_rate )
409
443
410
444
# Make sure that the empirical false discovery rate increases
0 commit comments