diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index c4f9dcf..077e5ce 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -21,16 +21,13 @@ jobs: - name: Checkout source uses: actions/checkout@v2 - - name: Setup R - uses: r-lib/actions/setup-r@v2 - with: - r-version: '4.2.0' + - name: Setup r2u + uses: eddelbuettel/github-actions/r2u-setup@master - - name: install fwildclusterboot for testing - run: Rscript -e 'install.packages("fwildclusterboot", repos="https://cloud.r-project.org")' + - name: install R packages + run: Rscript -e 'install.packages(c("fwildclusterboot"))' shell: bash - - name: Setup python uses: actions/setup-python@v2 with: diff --git a/pyproject.toml b/pyproject.toml index dfec8b6..f934802 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ pymdown-extensions = ">=10.0" mkdocstrings-python-legacy = "^0.2.3" mkdocstrings = {version = "^0.19.0", extras = ["python"], optional = true } pymdown-extensions = ">=10.0" +rpy2 = "^3.5.16" [build-system] diff --git a/tests/test_seeds.py b/tests/test_seeds.py index 26ea201..abe6cab 100644 --- a/tests/test_seeds.py +++ b/tests/test_seeds.py @@ -44,3 +44,16 @@ def test_results_from_same_seed(data): np.random.seed(123) b2 = wildboottest(model, param = "X1", cluster = x, B= 999) pd.testing.assert_frame_equal(a2,b2) + +def test_seeds_and_rng(data): + model = sm.ols(formula='Y ~ X1 + X2', data=data) + + cluster_list = [data.cluster, None] + + for x in cluster_list: + + # specifying seed and rng with that seed -> same results + a = wildboottest(model, param = "X1", cluster = x, B= 999, seed=876587) + rng = np.random.default_rng(seed=876587) + b = wildboottest(model, param = "X1", cluster = x, B= 999, seed=rng) + pd.testing.assert_frame_equal(a,b) \ No newline at end of file diff --git a/tests/test_weights.py b/tests/test_weights.py index 090af40..002217b 100644 --- a/tests/test_weights.py +++ b/tests/test_weights.py @@ -4,7 +4,6 @@ import numpy as np import pandas as pd -np.random.seed(89756) ts = list(wild_draw_fun_dict.keys()) full_enum = [True, False] @@ -13,6 +12,7 @@ @pytest.fixture def data(): + np.random.seed(12315) N = 100 k = 2 G= 20 @@ -46,9 +46,11 @@ def test_different_weights(data): X, y, cluster, bootcluster, R, B = data results_dict = {} + + rng = np.random.default_rng(seed=0) for w in ts: - boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = 12341) + boot = WildboottestCL(X = X, Y = y, cluster = cluster, bootcluster = bootcluster, R = R, B = 99999, seed = rng) boot.get_scores(bootstrap_type = "11", impose_null = True) boot.get_weights(weights_type = w) boot.get_numer() @@ -60,7 +62,9 @@ def test_different_weights(data): results_dict[w] = boot.pvalue results_series = pd.Series(results_dict) + print(results_series) mapd = (results_series - results_series.mean()).abs().mean() / results_series.mean() + print(mapd) assert mapd <= .1# make sure mean absolute percentage deviation is less than 10% (ad hoc) \ No newline at end of file diff --git a/wildboottest/wildboottest.py b/wildboottest/wildboottest.py index 4733c11..9805ddd 100644 --- a/wildboottest/wildboottest.py +++ b/wildboottest/wildboottest.py @@ -5,6 +5,13 @@ from wildboottest.weights import draw_weights import warnings from typing import Union, Tuple, Callable +from numpy.random import Generator +from statsmodels.regression.linear_model import OLS + + +_allowed_models = ( + OLS, +) class WildDrawFunctionException(Exception): pass @@ -55,7 +62,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], r: Union[np.ndarray, float], B: int, - seed: Union[int, None] = None) -> None: + seed: Union[int, Generator, None] = None) -> None: """Initializes the Heteroskedastic Wild Bootstrap Class @@ -64,7 +71,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], Y (Union[np.ndarray, pd.DataFrame, pd.Series]): Endogenous variable array or dataframe R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. Raises: TypeError: Raise if input arrays are lists @@ -85,10 +94,12 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], else: self.Y = Y - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() self.N = X.shape[0] self.k = X.shape[1] @@ -274,7 +285,7 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R : Union[np.ndarray, pd.DataFrame], B: int, bootcluster: Union[np.ndarray, pd.DataFrame, pd.Series, None] = None, - seed: Union[int, None] = None, + seed: Union[int, Generator, None] = None, parallel: bool = True) -> None: """Initializes the Wild Cluster Bootstrap Class @@ -285,7 +296,9 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], R (Union[np.ndarray, pd.DataFrame]): Constraint matrix for running bootstrap B (int): bootstrap iterations bootcluster (Union[np.ndarray, pd.DataFrame, pd.Series, None], optional): Sub-cluster array. Defaults to None. - seed (Union[int, None], optional): Random seed for random weight types. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True. Raises: TypeError: Raise if input arrays are lists @@ -326,11 +339,13 @@ def __init__(self, X : Union[np.ndarray, pd.DataFrame, pd.Series], self.bootclustid = np.unique(bootcluster) self.bootcluster = bootcluster - if seed is None: - seed = np.random.randint(low = 1, high = (2**32 - 1), size = 1, dtype=np.int64) - - self.rng = np.random.default_rng(seed = seed) - + if isinstance(seed, int): + self.rng = np.random.default_rng(seed=seed) + elif isinstance(seed, Generator): + self.rng = seed + else: + self.rng = np.random.default_rng() + self.N_G_bootcluster = len(self.bootclustid) self.G = len(self.clustid) @@ -640,14 +655,14 @@ def get_pvalue(self, pval_type = "two-tailed"): self.pvalue = np.mean(self.t_stat > self.t_boot) -def wildboottest(model : 'OLS', +def wildboottest(model : OLS, B:int, cluster : Union[np.ndarray, pd.Series, pd.DataFrame, None] = None, param : Union[str, None] = None, weights_type: str = 'rademacher', impose_null: bool = True, bootstrap_type: str = '11', - seed: Union[str, None] = None, + seed: Union[int, Generator, None] = None, adj: bool = True, cluster_adj: bool = True, parallel: bool = True, @@ -666,7 +681,9 @@ def wildboottest(model : 'OLS', Defaults to True. bootstrap_type (str, optional):A string of length one. Allows to choose the bootstrap type to be run. Either '11', '31', '13' or '33'. '11' by default. Defaults to '11'. - seed (Union[str, None], optional): Option to provide a random seed. Defaults to None. + seed (Union[int, Generator, None], optional): Random seed for random weight types. + If an integer, will be used as a seed in a numpy default random generator, or a numpy random generator + can also be specified and used. Defaults to None. adj (bool, optional): Whether to adjust for small sample. Defaults to True. cluster_adj (bool, optional): Whether to do a cluster-robust small sample correction. Defaults to True. parallel (bool, optional): Whether to run the bootstrap in parallel. Defaults to True. @@ -702,6 +719,9 @@ def wildboottest(model : 'OLS', >>> wildboottest(model, param = "X1", cluster = cluster, B = 9999) >>> wildboottest(model, cluster = cluster, B = 9999) """ + + if not isinstance(model, _allowed_models): + raise NotImplementedError(f"Only allow models of type {' ,'.join([str(i) for i in _allowed_models])}") # does model.exog already exclude missing values? X = model.exog