Skip to content

Commit ac2ea19

Browse files
fixed some more unit tests
1 parent 221443e commit ac2ea19

File tree

5 files changed

+336
-367
lines changed

5 files changed

+336
-367
lines changed

diffxpy/testing/tests.py

Lines changed: 60 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -479,17 +479,6 @@ def wald(
479479
`training_strategy(estimator)`.
480480
- list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
481481
method arguments.
482-
483-
Example:
484-
485-
.. code-block:: python
486-
487-
[
488-
{"learning_rate": 0.5, },
489-
{"learning_rate": 0.05, },
490-
]
491-
492-
This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
493482
:param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
494483
495484
Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
@@ -731,7 +720,7 @@ def two_sample(
731720
The exact unit_test are as follows (assuming the group labels
732721
are saved in a column named "group"):
733722
734-
- lrt(log-likelihood ratio test):
723+
- "lrt" - (log-likelihood ratio test):
735724
Requires the fitting of 2 generalized linear models (full and reduced).
736725
The models are automatically assembled as follows, use the de.test.lrt()
737726
function if you would like to perform a different test.
@@ -740,15 +729,15 @@ def two_sample(
740729
* full model scale parameter: ~ 1 + group
741730
* reduced model location parameter: ~ 1
742731
* reduced model scale parameter: ~ 1 + group
743-
- Wald test:
732+
- "wald" - Wald test:
744733
Requires the fitting of 1 generalized linear models.
745734
model location parameter: ~ 1 + group
746735
model scale parameter: ~ 1 + group
747736
Test the group coefficient of the location parameter model against 0.
748-
- t-test:
737+
- "t-test" - Welch's t-test:
749738
Doesn't require fitting of generalized linear models.
750739
Welch's t-test between both observation groups.
751-
- wilcoxon:
740+
- "rank" - Wilcoxon rank sum (Mann-Whitney U) test:
752741
Doesn't require fitting of generalized linear models.
753742
Wilcoxon rank sum (Mann-Whitney U) test between both observation groups.
754743
@@ -769,33 +758,22 @@ def two_sample(
769758
- 'wald': default
770759
- 'lrt'
771760
- 't-test'
772-
- 'wilcoxon'
761+
- 'rank'
773762
:param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
774763
:param sample_description: optional pandas.DataFrame containing sample annotations
764+
:param size_factors: 1D array of transformed library size factors for each cell in the
765+
same order as in data
775766
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
776767
777768
- 'nb': default
778-
:param size_factors: 1D array of transformed library size factors for each cell in the
779-
same order as in data
780-
:param batch_size: the batch size to use for the estimator
769+
:param batch_size: The batch size to use for the estimator.
781770
:param training_strategy: {str, function, list} training strategy to use. Can be:
782771
783772
- str: will use Estimator.TrainingStrategy[training_strategy] to train
784773
- function: Can be used to implement custom training function will be called as
785774
`training_strategy(estimator)`.
786775
- list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
787776
method arguments.
788-
789-
Example:
790-
791-
.. code-block:: python
792-
793-
[
794-
{"learning_rate": 0.5, },
795-
{"learning_rate": 0.05, },
796-
]
797-
798-
This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
799777
:param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
800778
801779
Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
@@ -804,8 +782,8 @@ def two_sample(
804782
Should be "float32" for single precision or "float64" for double precision.
805783
:param kwargs: [Debugging] Additional arguments will be passed to the _fit method.
806784
"""
807-
if test in ['t-test', 'wilcoxon'] and noise_model is not None:
808-
raise ValueError('base.two_sample(): Do not specify `noise_model` if using test t-test or wilcoxon: ' +
785+
if test in ['t-test', 'rank'] and noise_model is not None:
786+
raise ValueError('base.two_sample(): Do not specify `noise_model` if using test t-test or rank_test: ' +
809787
'The t-test is based on a gaussian noise model and wilcoxon is model free.')
810788

811789
gene_names = parse_gene_names(data, gene_names)
@@ -849,9 +827,9 @@ def two_sample(
849827
if noise_model is None:
850828
raise ValueError("Please specify noise_model")
851829
full_formula_loc = '~ 1 + grouping'
852-
full_formula_scale = '~ 1 + grouping'
830+
full_formula_scale = '~ 1'
853831
reduced_formula_loc = '~ 1'
854-
reduced_formula_scale = '~ 1 + grouping'
832+
reduced_formula_scale = '~ 1'
855833
de_test = lrt(
856834
data=X,
857835
full_formula_loc=full_formula_loc,
@@ -876,7 +854,7 @@ def two_sample(
876854
grouping=grouping,
877855
dtype=dtype
878856
)
879-
elif test.lower() == 'wilcoxon':
857+
elif test.lower() == 'rank':
880858
de_test = rank_test(
881859
data=X,
882860
gene_names=gene_names,
@@ -898,12 +876,12 @@ def pairwise(
898876
gene_names: Union[np.ndarray, list] = None,
899877
sample_description: pd.DataFrame = None,
900878
noise_model: str = None,
901-
pval_correction: str = "global",
902879
size_factors: np.ndarray = None,
903880
batch_size: int = None,
904881
training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
905882
quick_scale: bool = None,
906883
dtype="float64",
884+
pval_correction: str = "global",
907885
keep_full_test_objs: bool = False,
908886
**kwargs
909887
):
@@ -922,22 +900,22 @@ def pairwise(
922900
on the subset of the data that only contains observations of a given
923901
pair of groups:
924902
925-
- lrt(log-likelihood ratio test):
903+
- "lrt" -log-likelihood ratio test:
926904
Requires the fitting of 2 generalized linear models (full and reduced).
927905
928906
* full model location parameter: ~ 1 + group
929907
* full model scale parameter: ~ 1 + group
930908
* reduced model location parameter: ~ 1
931909
* reduced model scale parameter: ~ 1 + group
932-
- Wald test:
910+
- "wald" - Wald test:
933911
Requires the fitting of 1 generalized linear models.
934912
model location parameter: ~ 1 + group
935913
model scale parameter: ~ 1 + group
936914
Test the group coefficient of the location parameter model against 0.
937-
- t-test:
915+
- "t-test" - Welch's t-test:
938916
Doesn't require fitting of generalized linear models.
939917
Welch's t-test between both observation groups.
940-
- wilcoxon:
918+
- "rank" - Wilcoxon rank sum (Mann-Whitney U) test:
941919
Doesn't require fitting of generalized linear models.
942920
Wilcoxon rank sum (Mann-Whitney U) test between both observation groups.
943921
@@ -959,7 +937,7 @@ def pairwise(
959937
- 'wald'
960938
- 'lrt'
961939
- 't-test'
962-
- 'wilcoxon'
940+
- 'rank'
963941
:param lazy: bool, whether to enable lazy results evaluation.
964942
This is only possible if test=="ztest" and yields an output object which computes
965943
p-values etc. only upon request of certain pairs. This makes sense if the entire
@@ -968,41 +946,30 @@ def pairwise(
968946
a certain subset of the pairwise comparisons is desired anyway.
969947
:param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
970948
:param sample_description: optional pandas.DataFrame containing sample annotations
949+
:param size_factors: 1D array of transformed library size factors for each cell in the
950+
same order as in data
971951
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
972952
973953
- 'nb': default
974-
:param pval_correction: Choose between global and test-wise correction.
975-
Can be:
976-
977-
- "global": correct all p-values in one operation
978-
- "by_test": correct the p-values of each test individually
979-
:param size_factors: 1D array of transformed library size factors for each cell in the
980-
same order as in data
981-
:param batch_size: the batch size to use for the estimator
954+
:param batch_size: The batch size to use for the estimator.
982955
:param training_strategy: {str, function, list} training strategy to use. Can be:
983956
984957
- str: will use Estimator.TrainingStrategy[training_strategy] to train
985958
- function: Can be used to implement custom training function will be called as
986959
`training_strategy(estimator)`.
987960
- list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
988961
method arguments.
989-
990-
Example:
991-
992-
.. code-block:: python
993-
994-
[
995-
{"learning_rate": 0.5, },
996-
{"learning_rate": 0.05, },
997-
]
998-
999-
This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
1000962
:param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
1001963
1002964
Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
1003965
:param dtype: Allows specifying the precision which should be used to fit data.
1004966
1005967
Should be "float32" for single precision or "float64" for double precision.
968+
:param pval_correction: Choose between global and test-wise correction.
969+
Can be:
970+
971+
- "global": correct all p-values in one operation
972+
- "by_test": correct the p-values of each test individually
1006973
:param keep_full_test_objs: [Debugging] keep the individual test objects; currently valid for test != "z-test"
1007974
:param kwargs: [Debugging] Additional arguments will be passed to the _fit method.
1008975
"""
@@ -1115,12 +1082,12 @@ def versus_rest(
11151082
gene_names: Union[np.ndarray, list] = None,
11161083
sample_description: pd.DataFrame = None,
11171084
noise_model: str = None,
1118-
pval_correction: str = "global",
11191085
size_factors: np.ndarray = None,
11201086
batch_size: int = None,
11211087
training_strategy: Union[str, List[Dict[str, object]], Callable] = "AUTO",
11221088
quick_scale: bool = None,
11231089
dtype="float64",
1090+
pval_correction: str = "global",
11241091
keep_full_test_objs: bool = False,
11251092
**kwargs
11261093
):
@@ -1140,22 +1107,22 @@ def versus_rest(
11401107
is one group and the remaining groups are allocated to the second reference
11411108
group):
11421109
1143-
- lrt(log-likelihood ratio test):
1110+
- "lrt" - log-likelihood ratio test):
11441111
Requires the fitting of 2 generalized linear models (full and reduced).
11451112
11461113
* full model location parameter: ~ 1 + group
11471114
* full model scale parameter: ~ 1 + group
11481115
* reduced model location parameter: ~ 1
11491116
* reduced model scale parameter: ~ 1 + group
1150-
- Wald test:
1117+
- "wald" - Wald test:
11511118
Requires the fitting of 1 generalized linear models.
11521119
model location parameter: ~ 1 + group
11531120
model scale parameter: ~ 1 + group
11541121
Test the group coefficient of the location parameter model against 0.
1155-
- t-test:
1122+
- "t-test" - Welch's t-test:
11561123
Doesn't require fitting of generalized linear models.
11571124
Welch's t-test between both observation groups.
1158-
- wilcoxon:
1125+
- "rank" - Wilcoxon rank sum (Mann-Whitney U) test:
11591126
Doesn't require fitting of generalized linear models.
11601127
Wilcoxon rank sum (Mann-Whitney U) test between both observation groups.
11611128
@@ -1171,50 +1138,43 @@ def versus_rest(
11711138
which do not correpond to one-hot encoded discrete factors.
11721139
This makes sense for number of genes, time, pseudotime or space
11731140
for example.
1174-
:param test: str, statistical test to use. Possible options:
1141+
:param test: str, statistical test to use. Possible options (see function description):
11751142
11761143
- 'wald'
11771144
- 'lrt'
11781145
- 't-test'
1179-
- 'wilcoxon'
1146+
- 'rank'
11801147
:param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
11811148
:param sample_description: optional pandas.DataFrame containing sample annotations
1182-
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
1183-
1184-
- 'nb': default
11851149
:param pval_correction: Choose between global and test-wise correction.
11861150
Can be:
11871151
11881152
- "global": correct all p-values in one operation
11891153
- "by_test": correct the p-values of each test individually
11901154
:param size_factors: 1D array of transformed library size factors for each cell in the
11911155
same order as in data
1192-
:param batch_size: the batch size to use for the estimator
1156+
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
1157+
1158+
- 'nb': default
1159+
:param batch_size: The batch size to use for the estimator.
11931160
:param training_strategy: {str, function, list} training strategy to use. Can be:
11941161
11951162
- str: will use Estimator.TrainingStrategy[training_strategy] to train
11961163
- function: Can be used to implement custom training function will be called as
11971164
`training_strategy(estimator)`.
11981165
- list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
11991166
method arguments.
1200-
1201-
Example:
1202-
1203-
.. code-block:: python
1204-
1205-
[
1206-
{"learning_rate": 0.5, },
1207-
{"learning_rate": 0.05, },
1208-
]
1209-
1210-
This will run training first with learning rate = 0.5 and then with learning rate = 0.05.
12111167
:param quick_scale: Depending on the optimizer, `scale` will be fitted faster and maybe less accurate.
12121168
1213-
Useful in scenarios where fitting the exact `scale` is not
1169+
Useful in scenarios where fitting the exact `scale` is not absolutely necessary.
12141170
:param dtype: Allows specifying the precision which should be used to fit data.
12151171
12161172
Should be "float32" for single precision or "float64" for double precision.
1217-
:param keep_full_test_objs: [Debugging] keep the individual test objects; currently valid for test != "z-test"
1173+
:param pval_correction: Choose between global and test-wise correction.
1174+
Can be:
1175+
1176+
- "global": correct all p-values in one operation
1177+
- "by_test": correct the p-values of each test individually
12181178
:param kwargs: [Debugging] Additional arguments will be passed to the _fit method.
12191179
"""
12201180
if len(kwargs) != 0:
@@ -1350,7 +1310,9 @@ def two_sample(
13501310
**kwargs
13511311
) -> _DifferentialExpressionTestMulti:
13521312
"""
1353-
See annotation of de.test.two_sample()
1313+
Performs a two-sample test within each partition of a data set.
1314+
1315+
See also annotation of de.test.two_sample()
13541316
13551317
:param grouping: str
13561318
@@ -1366,7 +1328,7 @@ def two_sample(
13661328
- 'wald': default
13671329
- 'lrt'
13681330
- 't-test'
1369-
- 'wilcoxon'
1331+
- 'rank'
13701332
:param size_factors: 1D array of transformed library size factors for each cell in the
13711333
same order as in data
13721334
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
@@ -1410,7 +1372,9 @@ def t_test(
14101372
dtype="float64"
14111373
):
14121374
"""
1413-
See annotation of de.test.t_test()
1375+
Performs a Welch's t-test within each partition of a data set.
1376+
1377+
See also annotation of de.test.t_test()
14141378
14151379
:param grouping: str
14161380
@@ -1442,7 +1406,9 @@ def rank_test(
14421406
dtype="float64"
14431407
):
14441408
"""
1445-
See annotation of de.test.wilcoxon()
1409+
Performs a Wilcoxon rank sum test within each partition of a data set.
1410+
1411+
See also annotation of de.test.rank_test()
14461412
14471413
:param grouping: str, array
14481414
@@ -1481,7 +1447,9 @@ def lrt(
14811447
**kwargs
14821448
):
14831449
"""
1484-
See annotation of de.test.lrt()
1450+
Performs a likelihood-ratio test within each partition of a data set.
1451+
1452+
See also annotation of de.test.lrt()
14851453
14861454
:param full_formula_loc: formula
14871455
Full model formula for location parameter model.
@@ -1577,8 +1545,9 @@ def wald(
15771545
**kwargs
15781546
):
15791547
"""
1580-
This function performs a wald test within each partition of a data set.
1581-
See annotation of de.test.wald()
1548+
Performs a wald test within each partition of a data set.
1549+
1550+
See also annotation of de.test.wald()
15821551
15831552
:param factor_loc_totest: str, list of strings
15841553
List of factors of formula to test with Wald test.

0 commit comments

Comments
 (0)