feat: change qmu API and remove test_stat return (#712)

lukasheinrich · matthewfeickert · commit ed91b0a25ee4 · 2020-01-02T19:14:05.000-06:00
* Make 'pyhf.infer.test_statistics.qmu' return a scalar
* Remove API to return test statistics from hypothesis tests (which only makes sense for asymptotics)
diff --git a/docs/examples/notebooks/binderexample/StatisticalAnalysis.ipynb b/docs/examples/notebooks/binderexample/StatisticalAnalysis.ipynb
diff --git a/docs/examples/notebooks/hello-world.ipynb b/docs/examples/notebooks/hello-world.ipynb
@@ -41,7 +41,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Observed: [0.05290116], Expected: [0.06445521]\n"
+      "Observed: 0.05290116224852556, Expected: 0.06445521290832805\n"
      ]
     }
    ],
@@ -67,7 +67,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Observed CL_s: [0.05290116], CL_sb: [0.0236], CL_b: [0.44611493]\n"
+      "Observed CL_s: 0.05290116224852556, CL_sb: 0.023599998519978738, CL_b: 0.4461149342826869\n"
      ]
     }
    ],
@@ -120,13 +120,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Observed CL_s: [0.05290116]\n",
+      "Observed CL_s: 0.05290116224852556\n",
       "\n",
-      "Expected CL_s(-2 σ): [0.00260641]\n",
-      "Expected CL_s(-1 σ): [0.01382066]\n",
-      "Expected CL_s      : [0.06445521]\n",
-      "Expected CL_s(1 σ): [0.23526104]\n",
-      "Expected CL_s(2 σ): [0.57304182]\n"
+      "Expected CL_s(-2 σ): 0.0026064088679947964\n",
+      "Expected CL_s(-1 σ): 0.013820657528619273\n",
+      "Expected CL_s      : 0.06445521290832805\n",
+      "Expected CL_s(1 σ): 0.23526103626937836\n",
+      "Expected CL_s(2 σ): 0.5730418174887743\n"
      ]
     }
    ],
@@ -136,31 +136,6 @@
     "for p_value, n_sigma in enumerate(np.arange(-2,3)):\n",
     "    print('Expected CL_s{}: {}'.format('      ' if n_sigma==0 else '({} σ)'.format(n_sigma),CLs_exp_band[p_value]))"
    ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "**Returning the test statistics for the observed and Asimov data**"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "q_mu: [3.93824492], Asimov q_mu: [3.41886758]\n"
-     ]
-    }
-   ],
-   "source": [
-    "CLs_obs, test_statistics = pyhf.infer.hypotest(1.0, [51, 48] + pdf.config.auxdata, pdf, return_test_statistics=True)\n",
-    "print('q_mu: {}, Asimov q_mu: {}'.format(test_statistics[0], test_statistics[1]))"
-   ]
   }
  ],
  "metadata": {
@@ -179,7 +154,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.6"
+   "version": "3.7.5"
   }
  },
  "nbformat": 4,
diff --git a/src/pyhf/infer/__init__.py b/src/pyhf/infer/__init__.py
@@ -27,7 +27,6 @@ def hypotest(
         return_tail_probs (bool): Bool for returning :math:`\textrm{CL}_{s+b}` and :math:`\textrm{CL}_{b}`
         return_expected (bool): Bool for returning :math:`\textrm{CL}_{\textrm{exp}}`
         return_expected_set (bool): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\textrm{CL}_{\textrm{exp}}` --- the "Brazil band"
-        return_test_statistics (bool): Bool for returning :math:`q_{\mu}` and :math:`q_{\mu,A}`
 
     Returns:
         Tuple of Floats and lists of Floats:
@@ -74,8 +73,6 @@ def hypotest(
 
             for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`. These values define the boundaries of an uncertainty band sometimes referred to as the "Brazil band". Only returned when ``return_expected_set`` is ``True``.
 
-            - :math:`\left[q_{\mu}, q_{\mu,A}\right]`: The test statistics for the observed and Asimov datasets respectively. Only returned when ``return_test_statistics`` is ``True``.
-
     """
     init_pars = init_pars or pdf.config.suggested_init()
     par_bounds = par_bounds or pdf.config.suggested_bounds()
@@ -84,14 +81,10 @@ def hypotest(
     asimov_mu = 0.0
     asimov_data = generate_asimov_data(asimov_mu, data, pdf, init_pars, par_bounds)
 
-    qmu_v = tensorlib.clip(
-        qmu(poi_test, data, pdf, init_pars, par_bounds), 0, max_value=None
-    )
+    qmu_v = qmu(poi_test, data, pdf, init_pars, par_bounds)
     sqrtqmu_v = tensorlib.sqrt(qmu_v)
 
-    qmuA_v = tensorlib.clip(
-        qmu(poi_test, asimov_data, pdf, init_pars, par_bounds), 0, max_value=None
-    )
+    qmuA_v = qmu(poi_test, asimov_data, pdf, init_pars, par_bounds)
     sqrtqmuA_v = tensorlib.sqrt(qmuA_v)
 
     CLsb, CLb, CLs = pvals_from_teststat(sqrtqmu_v, sqrtqmuA_v, qtilde=qtilde)
@@ -109,8 +102,6 @@ def hypotest(
         _returns.append(CLs_exp)
     elif kwargs.get('return_expected'):
         _returns.append(pvals_from_teststat_expected(sqrtqmuA_v)[-1])
-    if kwargs.get('return_test_statistics'):
-        _returns.append([qmu_v, qmuA_v])
     # Enforce a consistent return type of the observed CLs
     return tuple(_returns) if len(_returns) > 1 else _returns[0]
 
diff --git a/src/pyhf/infer/test_statistics.py b/src/pyhf/infer/test_statistics.py
@@ -38,6 +38,6 @@ def qmu(mu, data, pdf, init_pars, par_bounds):
     )
     qmu = fixed_poi_fit_lhood_val - unconstrained_fit_lhood_val
     qmu = tensorlib.where(
-        muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor([0]), qmu
-    )
-    return qmu
+        muhatbhat[pdf.config.poi_index] > mu, tensorlib.astensor(0.0), qmu
+    )[0]
+    return tensorlib.clip(qmu, 0, max_value=None)
diff --git a/src/pyhf/infer/utils.py b/src/pyhf/infer/utils.py
@@ -55,12 +55,16 @@ def _false_case():
             return nullval, altval
 
         nullval, altval = tensorlib.conditional(
-            (sqrtqmu_v < sqrtqmuA_v)[0], _true_case, _false_case
+            (sqrtqmu_v < sqrtqmuA_v), _true_case, _false_case
         )
     CLsb = 1 - tensorlib.normal_cdf(nullval)
     CLb = 1 - tensorlib.normal_cdf(altval)
     CLs = CLsb / CLb
-    return CLsb, CLb, CLs
+    return (
+        tensorlib.reshape(CLsb, (1,)),
+        tensorlib.reshape(CLb, (1,)),
+        tensorlib.reshape(CLs, (1,)),
+    )
 
 
 def pvals_from_teststat_expected(sqrtqmuA_v, nsigma=0):
@@ -86,4 +90,8 @@ def pvals_from_teststat_expected(sqrtqmuA_v, nsigma=0):
     CLsb = tensorlib.normal_cdf(nsigma - sqrtqmuA_v)
     CLb = tensorlib.normal_cdf(nsigma)
     CLs = CLsb / CLb
-    return CLsb, CLb, CLs
+    return (
+        tensorlib.reshape(CLsb, (1,)),
+        tensorlib.reshape(CLb, (1,)),
+        tensorlib.reshape(CLs, (1,)),
+    )
diff --git a/tests/benchmarks/test_benchmark.py b/tests/benchmarks/test_benchmark.py
@@ -62,7 +62,6 @@ def hypotest(pdf, data):
         return_tail_probs=True,
         return_expected=True,
         return_expected_set=True,
-        return_test_statistics=True,
     )
 
 
diff --git a/tests/test_backend_consistency.py b/tests/test_backend_consistency.py
@@ -117,14 +117,9 @@ def test_hypotest_q_mu(
             backend.session = tf.compat.v1.Session()
         pyhf.set_backend(backend)
 
-        q_mu = pyhf.infer.hypotest(
-            1.0,
-            data,
-            pdf,
-            pdf.config.suggested_init(),
-            pdf.config.suggested_bounds(),
-            return_test_statistics=True,
-        )[-1][0]
+        q_mu = pyhf.infer.test_statistics.qmu(
+            1.0, data, pdf, pdf.config.suggested_init(), pdf.config.suggested_bounds(),
+        )
         test_statistic.append(pyhf.tensorlib.tolist(q_mu))
 
     # compare to NumPy/SciPy
diff --git a/tests/test_infer.py b/tests/test_infer.py
@@ -86,29 +86,3 @@ def test_hypotest_return_expected_set(tmpdir, hypotest_args):
     assert isinstance(result[2], type(tb.astensor(result[2])))
     assert len(result[3]) == 5
     assert check_uniform_type(result[3])
-
-
-def test_hypotest_return_test_statistics(tmpdir, hypotest_args):
-    """
-    Check that the return structure of pyhf.infer.hypotest with the
-    additon of the return_test_statistics keyword arg is as expected
-    """
-    tb = pyhf.tensorlib
-
-    kwargs = {
-        'return_tail_probs': True,
-        'return_expected': True,
-        'return_expected_set': True,
-        'return_test_statistics': True,
-    }
-    result = pyhf.infer.hypotest(*hypotest_args, **kwargs)
-    # CLs_obs, [CLsb, CLb], CLs_exp, CLs_exp @[-2, -1, 0, +1, +2]sigma, [q_mu, q_mu_Asimov]
-    assert len(list(result)) == 5
-    assert isinstance(result[0], type(tb.astensor(result[0])))
-    assert len(result[1]) == 2
-    assert check_uniform_type(result[1])
-    assert isinstance(result[2], type(tb.astensor(result[2])))
-    assert len(result[3]) == 5
-    assert check_uniform_type(result[3])
-    assert len(result[4]) == 2
-    assert check_uniform_type(result[4])
diff --git a/tests/test_public_api.py b/tests/test_public_api.py
@@ -73,7 +73,6 @@ def test_hypotest(backend, model_setup):
         init_pars,
         model.config.suggested_bounds(),
         return_expected_set=True,
-        return_test_statistics=True,
     )
 
 

Original file line number	Diff line number	Diff line change
`@@ -62,7 +62,6 @@ def hypotest(pdf, data):`
`62`	`62`	`return_tail_probs=True,`
`63`	`63`	`return_expected=True,`
`64`	`64`	`return_expected_set=True,`
`65`		`- return_test_statistics=True,`
`66`	`65`	`)`
`67`	`66`
`68`	`67`
Original file line number	Diff line number	Diff line change
`@@ -73,7 +73,6 @@ def test_hypotest(backend, model_setup):`
`73`	`73`	`init_pars,`
`74`	`74`	`model.config.suggested_bounds(),`
`75`	`75`	`return_expected_set=True,`
`76`		`- return_test_statistics=True,`
`77`	`76`	`)`
`78`	`77`
`79`	`78`