diff --git a/linearmodels/panel/results.py b/linearmodels/panel/results.py index 9e48922a0e..2bca27b963 100644 --- a/linearmodels/panel/results.py +++ b/linearmodels/panel/results.py @@ -1,7 +1,8 @@ from linearmodels.compat.statsmodels import Summary import datetime as dt -from typing import Dict, List, Optional, Union +from typing import Dict, List, Optional, Union, Tuple +import warnings import numpy as np from pandas import DataFrame, Series, concat @@ -11,7 +12,12 @@ from linearmodels.iv.results import default_txt_fmt, stub_concat, table_concat from linearmodels.shared.base import _ModelComparison, _SummaryStr -from linearmodels.shared.hypotheses import WaldTestStatistic, quadratic_form_test +from linearmodels.shared.hypotheses import ( + WaldTestStatistic, + InvalidTestStatistic, + quadratic_form_test, +) +from linearmodels.panel.covariance import ClusteredCovariance, HeteroskedasticCovariance from linearmodels.shared.io import _str, pval_format from linearmodels.shared.utility import AttrDict from linearmodels.typing import NDArray, OptionalArrayLike @@ -790,6 +796,128 @@ def theta(self) -> DataFrame: """Values used in generalized demeaning""" return self._theta + def wu_hausman( + self, + other: PanelResults, + include_constant: bool = False, + sigmamore: bool = False, + sigmaless: bool = False, + ) -> Tuple[Union[InvalidTestStatistic, WaldTestStatistic], DataFrame]: + r""" + Perform Hausman specification test against other regression results. + + Parameters + ---------- + other : PanelResults + Result from panel regression known to be consistent. Typically + fixed effects regression. + include_constant : bool, optional + Flag indicating whether to include the constant term in the comparison. + sigmamore : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the efficient model. + sigmaless : bool, optional + Flag indicating whether to base the test on the estimated parameter + covariance from the consistent model. + + Returns + ------- + WaldTestStatistic + Object containing test statistic, p-value, distribution and null + DataFrame + Overview of coefficients used in the test, and their differences and standard errors + + Notes + ----- + The test is computed by + .. math:: + H=(b_{1}-b_{0})'\big(\operatorname{Var}(b_{0})-\operatorname{Var}(b_{1})\big)^{-1}(b_{1}-b_{0}) + + where :math:`b_{1}` is the array of coefficients from the model known to be consistent, + and :math:`b_{1}` is the array of coefficients from the model known to be efficient + (this model). + + """ + + def alt_cov(res: PanelResults, s2: float) -> DataFrame: + """ + Calculate covariance using the supplied error variance. Based on + https://github.com/bashtage/linearmodels/blob/4.17/linearmodels/panel/covariance.py#L119 + """ + cov_obj = res._deferred_cov.__self__ + x = cov_obj._x + out = s2 * np.linalg.inv(x.T @ x) + out = (out + out.T) / 2 + return DataFrame( + out, columns=res.model.exog.vars, index=res.model.exog.vars + ) + + def matrix_positive_definite(mat: Union[NDArray, DataFrame]) -> bool: + """ + Check if matrix is positive definite. + """ + if np.array_equal(mat, mat.T): + try: + np.linalg.cholesky(mat) + return True + except np.linalg.LinAlgError: + pass + return False + + if sigmamore and sigmaless: + raise ValueError("Conflicting test parameters") + + invalid_cov = (ClusteredCovariance, HeteroskedasticCovariance) + if any( + isinstance(res._deferred_cov.__self__, invalid_cov) for res in (other, self) + ): + raise TypeError( + "Hausman test cannot be used with clustered or robust covariance" + ) + + common_cols = set(other.params.index) & set(self.params.index) + if not include_constant: + if other.model.has_constant: + common_cols.discard(other.model.exog.vars[other.model._constant_index]) + if self.model.has_constant: + common_cols.discard(self.model.exog.vars[self.model._constant_index]) + + b0 = other.params[common_cols] + b1 = self.params[common_cols] + var0 = (other.cov if not sigmamore else alt_cov(other, s2=self.s2)).loc[ + common_cols, common_cols + ] + var1 = (self.cov if not sigmaless else alt_cov(self, s2=other.s2)).loc[ + common_cols, common_cols + ] + + var_diff = var0 - var1 + b_diff = b0 - b1 + std_errors = Series(np.sqrt(np.diagonal(var_diff)), index=var0.index) + estimates = DataFrame( + data={"b0": b0, "b1": b1, "b0-b1": b_diff, "Std. Err.": std_errors} + ) + if not matrix_positive_definite(var_diff): + warnings.warn("(Var(b0) - Var(b1) is not positive definite)") + inv = np.linalg.inv + else: + inv = np.linalg.pinv + test_stat = b_diff.T @ inv(var_diff) @ b_diff + + test: Union[InvalidTestStatistic, WaldTestStatistic] + if test_stat >= 0: + test = WaldTestStatistic( + test_stat, + null="No systematic difference in coefficients between models", + df=b0.size, + name="Hausman specification test", + ) + else: + test = InvalidTestStatistic( + "chi2<0. Model does not meet the assumptions of the Hausman test." + ) + return test, estimates + PanelModelResults = Union[PanelEffectsResults, PanelResults, RandomEffectsResults] diff --git a/linearmodels/tests/panel/_utility.py b/linearmodels/tests/panel/_utility.py index e46b5eb525..fd83f9a2a9 100644 --- a/linearmodels/tests/panel/_utility.py +++ b/linearmodels/tests/panel/_utility.py @@ -236,7 +236,7 @@ def assert_frame_similar(result, expected): def access_attributes(result): d = dir(result) for key in d: - if not key.startswith("_") and key not in ("wald_test",): + if not key.startswith("_") and key not in ("wald_test", "wu_hausman"): val = getattr(result, key) if callable(val): val() diff --git a/linearmodels/tests/panel/test_results.py b/linearmodels/tests/panel/test_results.py index 3f0876c2af..ce9e5405e8 100644 --- a/linearmodels/tests/panel/test_results.py +++ b/linearmodels/tests/panel/test_results.py @@ -170,3 +170,40 @@ def test_wald_test(data): with pytest.raises(ValueError): res.wald_test(restriction, np.zeros(2), formula=formula) + + +@pytest.mark.parametrize("constant", (False, True), ids=("", "constant")) +@pytest.mark.parametrize("sigmamore", (False, True), ids=("", "sigmamore")) +@pytest.mark.parametrize("sigmaless", (False, True), ids=("", "sigmaless")) +def test_wu_hausman(recwarn, data, constant, sigmamore, sigmaless): + if sigmamore and sigmaless: + return + data = data.set_index(["nr", "year"]) + dependent = data["hours"] + exog = add_constant(data[["exper", "expersq"]]) + re_res = RandomEffects(dependent, exog).fit() + fe_res = PanelOLS(dependent, exog, entity_effects=True).fit() + opts = { + "include_constant": constant, + "sigmamore": sigmamore, + "sigmaless": sigmaless, + } + wald, estimates = re_res.wu_hausman(other=fe_res, **opts) + if constant: + warnings = {str(warn.message) for warn in recwarn} + assert "(Var(b0) - Var(b1) is not positive definite)" in warnings + assert estimates.shape == (3, 4) + else: + assert estimates.shape == (2, 4) + stata_results = { + "": (7.190854126884934, 0.0274489582259534), + "include_constant": (7.190854126885962, 0.0660570908629669), + "sigmaless": (6.953506564342694, 0.0309075965524561), + "include_constant-sigmaless": (6.953506564340507, 0.0733945334224529), + "sigmamore": (6.945610047252053, 0.0310298689573541), + "include_constant-sigmamore": (6.94561004725098, 0.0736517192483979), + } + test_id = "-".join([key for key, val in opts.items() if val is True]) + expected_stat, expected_pval = stata_results[test_id] + assert wald.stat == pytest.approx(expected_stat, abs=1e-9) + assert wald.pval == pytest.approx(expected_pval, abs=1e-9)