@@ -207,7 +207,7 @@ def lrt(
207
207
gene_names : Union [np .ndarray , list ] = None ,
208
208
sample_description : pd .DataFrame = None ,
209
209
noise_model = "nb" ,
210
- size_factors : np .ndarray = None ,
210
+ size_factors : Union [ np .ndarray , pd . core . series . Series , np . ndarray ] = None ,
211
211
batch_size : int = None ,
212
212
training_strategy : Union [str , List [Dict [str , object ]], Callable ] = "DEFAULT" ,
213
213
quick_scale : bool = False ,
@@ -220,8 +220,7 @@ def lrt(
220
220
Note that lrt() does not support constraints in its current form. Please
221
221
use wald() for constraints.
222
222
223
- :param data: Array-like, xr.DataArray, xr.Dataset or anndata.Anndata object containing observations.
224
- Input data matrix (observations x features) or (cells x genes).
223
+ :param data: Input data matrix (observations x features) or (cells x genes).
225
224
:param full_formula_loc: formula
226
225
Full model formula for location parameter model.
227
226
If not specified, `full_formula` will be used instead.
@@ -264,7 +263,8 @@ def lrt(
264
263
265
264
- 'nb': default
266
265
:param size_factors: 1D array of transformed library size factors for each cell in the
267
- same order as in data
266
+ same order as in data or string-type column identifier of size-factor containing
267
+ column in sample description.
268
268
:param batch_size: the batch size to use for the estimator
269
269
:param training_strategy: {str, function, list} training strategy to use. Can be:
270
270
@@ -302,27 +302,35 @@ def lrt(
302
302
gene_names = parse_gene_names (data , gene_names )
303
303
X = parse_data (data , gene_names )
304
304
sample_description = parse_sample_description (data , sample_description )
305
- size_factors = parse_size_factors (size_factors = size_factors , data = X )
305
+ size_factors = parse_size_factors (
306
+ size_factors = size_factors ,
307
+ data = X ,
308
+ sample_description = sample_description
309
+ )
306
310
307
311
full_design_loc = data_utils .design_matrix (
308
312
sample_description = sample_description ,
309
313
formula = full_formula_loc ,
310
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
314
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
315
+ return_type = "patsy"
311
316
)
312
317
reduced_design_loc = data_utils .design_matrix (
313
318
sample_description = sample_description ,
314
319
formula = reduced_formula_loc ,
315
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
320
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
321
+ return_type = "patsy"
316
322
)
317
323
full_design_scale = data_utils .design_matrix (
318
324
sample_description = sample_description ,
319
325
formula = full_formula_scale ,
320
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
326
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
327
+ return_type = "patsy"
321
328
)
322
329
reduced_design_scale = data_utils .design_matrix (
323
330
sample_description = sample_description ,
324
331
formula = reduced_formula_scale ,
325
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
332
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
333
+ return_type = "patsy"
326
334
)
327
335
328
336
reduced_model = _fit (
@@ -388,7 +396,7 @@ def wald(
388
396
constraints_loc : np .ndarray = None ,
389
397
constraints_scale : np .ndarray = None ,
390
398
noise_model : str = "nb" ,
391
- size_factors : np .ndarray = None ,
399
+ size_factors : Union [ np .ndarray , pd . core . series . Series , str ] = None ,
392
400
batch_size : int = None ,
393
401
training_strategy : Union [str , List [Dict [str , object ]], Callable ] = "AUTO" ,
394
402
quick_scale : bool = False ,
@@ -417,7 +425,7 @@ def wald(
417
425
:param as_numeric:
418
426
Which columns of sample_description to treat as numeric and
419
427
not as categorical. This yields columns in the design matrix
420
- which do not correpond to one-hot encoded discrete factors.
428
+ which do not correspond to one-hot encoded discrete factors.
421
429
This makes sense for number of genes, time, pseudotime or space
422
430
for example.
423
431
:param init_a: (Optional) Low-level initial values for a.
@@ -465,7 +473,8 @@ def wald(
465
473
are indicated by a 1. It is highly recommended to only use this option
466
474
together with prebuilt design matrix for the scale model, dmat_scale.
467
475
:param size_factors: 1D array of transformed library size factors for each cell in the
468
- same order as in data
476
+ same order as in data or string-type column identifier of size-factor containing
477
+ column in sample description.
469
478
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
470
479
471
480
- 'nb': default
@@ -488,10 +497,14 @@ def wald(
488
497
if len (kwargs ) != 0 :
489
498
logging .getLogger ("diffxpy" ).debug ("additional kwargs: %s" , str (kwargs ))
490
499
491
- if dmat_loc is None and formula_loc is None :
492
- raise ValueError ("Supply either dmat_loc or formula_loc or formula." )
493
- if dmat_scale is None and formula_scale is None :
494
- raise ValueError ("Supply either dmat_loc or formula_loc or formula." )
500
+ if (dmat_loc is None and formula_loc is None ) or \
501
+ (dmat_loc is not None and formula_loc is not None ):
502
+ raise ValueError ("Supply either dmat_loc or formula_loc." )
503
+ if (dmat_scale is None and formula_scale is None ) or \
504
+ (dmat_scale is not None and formula_scale != "~1" ):
505
+ raise ValueError ("Supply either dmat_scale or formula_scale." )
506
+ if dmat_loc is not None and factor_loc_totest is not None :
507
+ raise ValueError ("Supply coef_to_test and not factor_loc_totest if dmat_loc is supplied." )
495
508
# Check that factor_loc_totest and coef_to_test are lists and not single strings:
496
509
if isinstance (factor_loc_totest , str ):
497
510
factor_loc_totest = [factor_loc_totest ]
@@ -505,13 +518,18 @@ def wald(
505
518
X = parse_data (data , gene_names )
506
519
if dmat_loc is None and dmat_scale is None :
507
520
sample_description = parse_sample_description (data , sample_description )
508
- size_factors = parse_size_factors (size_factors = size_factors , data = X )
521
+ size_factors = parse_size_factors (
522
+ size_factors = size_factors ,
523
+ data = X ,
524
+ sample_description = sample_description
525
+ )
509
526
510
527
if dmat_loc is None :
511
528
design_loc = data_utils .design_matrix (
512
529
sample_description = sample_description ,
513
530
formula = formula_loc ,
514
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
531
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
532
+ return_type = "patsy"
515
533
)
516
534
# Check that closed-form is not used if numeric predictors are used and model is not "norm".
517
535
if isinstance (init_a , str ):
@@ -533,7 +551,8 @@ def wald(
533
551
design_scale = data_utils .design_matrix (
534
552
sample_description = sample_description ,
535
553
formula = formula_scale ,
536
- as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ]
554
+ as_categorical = [False if x in as_numeric else True for x in sample_description .columns .values ],
555
+ return_type = "patsy"
537
556
)
538
557
# Check that closed-form is not used if numeric predictors are used and model is not "norm".
539
558
if isinstance (init_b , str ):
@@ -1645,10 +1664,8 @@ def continuous_1d(
1645
1664
init_b : Union [np .ndarray , str ] = "standard" ,
1646
1665
gene_names : Union [np .ndarray , list ] = None ,
1647
1666
sample_description = None ,
1648
- dmat_loc : Union [patsy .design_info .DesignMatrix , xr .Dataset ] = None ,
1649
- dmat_scale : Union [patsy .design_info .DesignMatrix , xr .Dataset ] = None ,
1650
- constraints_loc : np .ndarray = None ,
1651
- constraints_scale : np .ndarray = None ,
1667
+ constraints_loc : Union [Tuple [str ], List [str ]] = (),
1668
+ constraints_scale : Union [Tuple [str ], List [str ]] = (),
1652
1669
noise_model : str = 'nb' ,
1653
1670
size_factors : np .ndarray = None ,
1654
1671
batch_size : int = None ,
@@ -1696,11 +1713,10 @@ def continuous_1d(
1696
1713
this will be propagated across all coefficients which represent this covariate
1697
1714
in the spline basis space.
1698
1715
:param as_numeric:
1699
- Which columns of sample_description to treat as numeric and
1700
- not as categorical. This yields columns in the design matrix
1701
- which do not correpond to one-hot encoded discrete factors.
1702
- This makes sense for number of genes, time, pseudotime or space
1703
- for example.
1716
+ Which columns of sample_description to treat as numeric and not as categorical.
1717
+ This yields columns in the design matrix which do not correpond to one-hot encoded discrete factors.
1718
+ This makes sense for library depth for example. Do not use this for the covariate that you
1719
+ want to extrpolate with using a spline-basis!
1704
1720
:param test: str, statistical test to use. Possible options:
1705
1721
1706
1722
- 'wald': default
@@ -1719,34 +1735,33 @@ def continuous_1d(
1719
1735
* "auto": automatically choose best initialization
1720
1736
* "standard": initialize with zeros
1721
1737
- np.ndarray: direct initialization of 'b'
1722
- :param gene_names: optional list/array of gene names which will be used if `data` does not implicitly store these
1738
+ :param gene_names: optional list/array of gene names which will be used if `data` does
1739
+ not implicitly store these
1723
1740
:param sample_description: optional pandas.DataFrame containing sample annotations
1724
- :param dmat_loc: Pre-built location model design matrix.
1725
- This over-rides formula_loc and sample description information given in
1726
- data or sample_description.
1727
- :param dmat_scale: Pre-built scale model design matrix.
1728
- This over-rides formula_scale and sample description information given in
1729
- data or sample_description.
1730
- :param constraints_loc: : Constraints for location model.
1731
- Array with constraints in rows and model parameters in columns.
1732
- Each constraint contains non-zero entries for the a of parameters that
1733
- has to sum to zero. This constraint is enforced by binding one parameter
1734
- to the negative sum of the other parameters, effectively representing that
1735
- parameter as a function of the other parameters. This dependent
1736
- parameter is indicated by a -1 in this array, the independent parameters
1737
- of that constraint (which may be dependent at an earlier constraint)
1738
- are indicated by a 1. It is highly recommended to only use this option
1739
- together with prebuilt design matrix for the location model, dmat_loc.
1740
- :param constraints_scale: : Constraints for scale model.
1741
- Array with constraints in rows and model parameters in columns.
1742
- Each constraint contains non-zero entries for the a of parameters that
1743
- has to sum to zero. This constraint is enforced by binding one parameter
1744
- to the negative sum of the other parameters, effectively representing that
1745
- parameter as a function of the other parameters. This dependent
1746
- parameter is indicated by a -1 in this array, the independent parameters
1747
- of that constraint (which may be dependent at an earlier constraint)
1748
- are indicated by a 1. It is highly recommended to only use this option
1749
- together with prebuilt design matrix for the scale model, dmat_scale.
1741
+ :param constraints_loc: Grouped factors to enfore equality constraints on for location model.
1742
+ Every element of the iteratable corresponds to one set of equality constraints.
1743
+ Each set has to be a dictionary of the form {x: y} where x is the factor to be constrained
1744
+ and y is a factor by which levels of x are grouped and then constrained. Set y="1" to constrain
1745
+ all levels of x to sum to one, a single equality constraint.
1746
+
1747
+ E.g.: {"batch": "condition"} Batch levels within each condition are constrained to sum to
1748
+ zero. This is applicable if repeats of a an experiment within each condition
1749
+ are independent so that the set-up ~1+condition+batch is perfectly confounded.
1750
+
1751
+ Can only group by non-constrained effects right now, use constraint_matrix_from_string
1752
+ for other cases.
1753
+ :param constraints_scale: Grouped factors to enfore equality constraints on for scale model.
1754
+ Every element of the iteratable corresponds to one set of equality constraints.
1755
+ Each set has to be a dictionary of the form {x: y} where x is the factor to be constrained
1756
+ and y is a factor by which levels of x are grouped and then constrained. Set y="1" to constrain
1757
+ all levels of x to sum to one, a single equality constraint.
1758
+
1759
+ E.g.: {"batch": "condition"} Batch levels within each condition are constrained to sum to
1760
+ zero. This is applicable if repeats of a an experiment within each condition
1761
+ are independent so that the set-up ~1+condition+batch is perfectly confounded.
1762
+
1763
+ Can only group by non-constrained effects right now, use constraint_matrix_from_string
1764
+ for other cases.
1750
1765
:param noise_model: str, noise model to use in model-based unit_test. Possible options:
1751
1766
1752
1767
- 'nb': default
@@ -1757,9 +1772,9 @@ def continuous_1d(
1757
1772
1758
1773
- str: will use Estimator.TrainingStrategy[training_strategy] to train
1759
1774
- function: Can be used to implement custom training function will be called as
1760
- `training_strategy(estimator)`.
1761
- - list of keyword dicts containing method arguments: Will call Estimator.train() once with each dict of
1762
- method arguments.
1775
+ `training_strategy(estimator)`.
1776
+ - list of keyword dicts containing method arguments: Will call Estimator.train()
1777
+ once with each dict of method arguments.
1763
1778
1764
1779
Example:
1765
1780
0 commit comments