Skip to content

Commit b11535a

Browse files
committed
Add some tests
1 parent 0b2b9b2 commit b11535a

File tree

6 files changed

+403
-3
lines changed

6 files changed

+403
-3
lines changed

src/skstats/__init__.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from skstats.log_uniform import *
2+
from skstats.normal import *
3+
from skstats.uniform import *
4+
5+
__all__ = [
6+
"LogUniform",
7+
"Normal",
8+
"StandardNormal",
9+
"Uniform",
10+
]

src/skstats/log_uniform.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,14 @@
1010
)
1111

1212

13+
__all__ = ["LogUniform"]
14+
15+
1316
def _log_diff(log_p, log_q):
1417
return special.logsumexp([log_p, log_q + np.pi * 1j], axis=0)
1518

1619

17-
class _LogUniform(ContinuousDistribution):
20+
class LogUniform(ContinuousDistribution):
1821
r"""Log-uniform distribution.
1922
2023
The probability density function of the log-uniform distribution is:

src/skstats/uniform.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99
)
1010

1111

12+
__all__ = ["Uniform"]
13+
14+
1215
class Uniform(ContinuousDistribution):
1316
r"""Uniform distribution.
1417

tests/reference_distributions.py

Lines changed: 294 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
import numpy as np
2+
import mpmath
3+
from mpmath import mp
4+
5+
6+
class ReferenceDistribution:
7+
"""Minimalist distribution infrastructure for generating reference data.
8+
9+
The purpose is to generate reference values for unit tests of SciPy
10+
distribution accuracy and robustness.
11+
12+
Handles array input with standard broadcasting rules, and method
13+
implementations are easily compared against their mathematical definitions.
14+
No attempt is made to handle edge cases or be fast, and arbitrary precision
15+
arithmetic is trusted for accuracy rather than making the method
16+
implementations "smart".
17+
18+
Notes
19+
-----
20+
21+
In this infrastructure, distributions families are classes, and
22+
fully-specified distributions (i.e. with definite values of all family
23+
parameters) are instances of these classes. Typically, the public methods
24+
accept as input only the argument at which the at which the function is to
25+
be evaluated. Unlike SciPy distributions, they never accept values of
26+
distribution family shape, location, or scale parameters. A few
27+
other parameters are noteworthy:
28+
29+
- All methods accept `dtype` to control the output data type. The default
30+
is `np.float64`, but `object` or `mp.mpf` may be
31+
specified to output the full `mpf`.
32+
- `ppf`/`isf` accept a `guess` because they use a scalar rootfinder
33+
to invert the `cdf`/`sf`. This is passed directly into the `x0` method
34+
of `mpmath.findroot`; see its documentation for details.
35+
- moment accepts `order`, an integer that specifies the order of the (raw)
36+
moment, and `center`, which is the value about which the moment is
37+
taken. The default is to calculate the mean and use it to calculate
38+
central moments; passing ``0`` results in a noncentral moment. For
39+
efficiency, the mean can be passed explicitly if it is already known.
40+
41+
Follow the example of SkewNormal to generate new reference distributions,
42+
overriding only `__init__` and `_pdf`*. Use the reference distributions to
43+
generate reference values for unit tests of SciPy distribution method
44+
precision and robustness (e.g. for extreme arguments). If the a SciPy
45+
methods implementation is independent and yet the output matches reference
46+
values generated with this infrastructure, it is unlikely that the SciPy
47+
and reference values are both inaccurate.
48+
49+
* If the SciPy output *doesn't* match and the cause appears to be
50+
inaccuracy of the reference values (e.g. due to numerical issues that
51+
mpmath's arbitrary precision arithmetic doesn't handle), then it may be
52+
appropriate to override a method of the reference distribution rather than
53+
relying on the generic implementation. Otherwise, hesitate to override
54+
methods: the generic implementations are mathematically correct and easy
55+
to verify, whereas an override introduces many possibilities of mistakes,
56+
requires more time to write, and requires more time to review.
57+
58+
In general, do not create custom unit tests to ensure that
59+
SciPy distribution methods are *correct* (in the sense of being consistent
60+
with the rest of the distribution methods); generic tests take care of
61+
that.
62+
"""
63+
64+
def __init__(self, **kwargs):
65+
try:
66+
if mpmath.dps is not None:
67+
message = (
68+
"`mpmath.dps` has been assigned. This is not "
69+
"intended usage; instead, assign the desired "
70+
"precision to `mpmath.mp.dps` (e.g. `from mpmath "
71+
"as mp; mp.dps = 50."
72+
)
73+
raise RuntimeError(message)
74+
except AttributeError:
75+
mpmath.dps = None
76+
77+
if mp.dps <= 15:
78+
message = (
79+
"`mpmath.mp.dps <= 15`. Set a higher precision (e.g."
80+
"`50`) to use this distribution."
81+
)
82+
raise RuntimeError(message)
83+
84+
self._params = {key: self._make_mpf_array(val) for key, val in kwargs.items()}
85+
86+
def _make_mpf_array(self, x):
87+
shape = np.shape(x)
88+
x = np.asarray(x, dtype=np.float64).ravel()
89+
return np.asarray([mp.mpf(xi) for xi in x]).reshape(shape)[()]
90+
91+
def _pdf(self, x):
92+
raise NotImplementedError("_pdf must be overridden.")
93+
94+
def _cdf(self, x, **kwargs):
95+
if (self._cdf.__func__ is ReferenceDistribution._cdf) and (
96+
self._sf.__func__ is not ReferenceDistribution._sf
97+
):
98+
return mp.one - self._sf(x, **kwargs)
99+
100+
a, b = self._support(**kwargs)
101+
res = mp.quad(lambda x: self._pdf(x, **kwargs), (a, x))
102+
res = res if res < 0.5 else mp.one - self._sf(x, **kwargs)
103+
return res
104+
105+
def _sf(self, x, **kwargs):
106+
if (self._sf.__func__ is ReferenceDistribution._sf) and (
107+
self._cdf.__func__ is not ReferenceDistribution._cdf
108+
):
109+
return mp.one - self._cdf(x, **kwargs)
110+
111+
a, b = self._support(**kwargs)
112+
res = mp.quad(lambda x: self._pdf(x, **kwargs), (x, b))
113+
res = res if res < 0.5 else mp.one - self._cdf(x, **kwargs)
114+
return res
115+
116+
def _ppf(self, p, guess=0, **kwargs):
117+
if (self._ppf.__func__ is ReferenceDistribution._ppf) and (
118+
self._isf.__func__ is not ReferenceDistribution._isf
119+
):
120+
return self._isf(mp.one - p, guess, **kwargs)
121+
122+
def f(x):
123+
return self._cdf(x, **kwargs) - p
124+
125+
return mp.findroot(f, guess)
126+
127+
def _isf(self, p, guess=0, **kwargs):
128+
if (self._isf.__func__ is ReferenceDistribution._isf) and (
129+
self._ppf.__func__ is not ReferenceDistribution._ppf
130+
):
131+
return self._ppf(mp.one - p, guess, **kwargs)
132+
133+
def f(x):
134+
return self._sf(x, **kwargs) - p
135+
136+
return mp.findroot(f, guess)
137+
138+
def _logpdf(self, x, **kwargs):
139+
return mp.log(self._pdf(x, **kwargs))
140+
141+
def _logcdf(self, x, **kwargs):
142+
return mp.log(self._cdf(x, **kwargs))
143+
144+
def _logsf(self, x, **kwargs):
145+
return mp.log(self._sf(x, **kwargs))
146+
147+
def _support(self, **kwargs):
148+
return -mp.inf, mp.inf
149+
150+
def _entropy(self, **kwargs):
151+
def integrand(x):
152+
logpdf = self._logpdf(x, **kwargs)
153+
pdf = mp.exp(logpdf)
154+
return -pdf * logpdf
155+
156+
a, b = self._support(**kwargs)
157+
return mp.quad(integrand, (a, b))
158+
159+
def _mean(self, **kwargs):
160+
return self._moment(order=1, center=0, **kwargs)
161+
162+
def _var(self, **kwargs):
163+
mu = self._mean(**kwargs)
164+
return self._moment(order=2, center=mu, **kwargs)
165+
166+
def _skew(self, **kwargs):
167+
mu = self._mean(**kwargs)
168+
u2 = self._moment(order=2, center=mu, **kwargs)
169+
sigma = mp.sqrt(u2)
170+
u3 = self._moment(order=3, center=mu, **kwargs)
171+
return u3 / sigma**3
172+
173+
def _kurtosis(self, **kwargs):
174+
mu = self._mean(**kwargs)
175+
u2 = self._moment(order=2, center=mu, **kwargs)
176+
u4 = self._moment(order=4, center=mu, **kwargs)
177+
return u4 / u2**2 - 3
178+
179+
def _moment(self, order, center, **kwargs):
180+
def integrand(x):
181+
return self._pdf(x, **kwargs) * (x - center) ** order
182+
183+
if center is None:
184+
center = self._mean(**kwargs)
185+
186+
a, b = self._support(**kwargs)
187+
return mp.quad(integrand, (a, b))
188+
189+
def pdf(self, x, dtype=np.float64):
190+
fun = np.vectorize(self._pdf)
191+
x = self._make_mpf_array(x)
192+
res = fun(x, **self._params)
193+
return np.asarray(res, dtype=dtype)[()]
194+
195+
def cdf(self, x, dtype=np.float64):
196+
fun = np.vectorize(self._cdf)
197+
x = self._make_mpf_array(x)
198+
res = fun(x, **self._params)
199+
return np.asarray(res, dtype=dtype)[()]
200+
201+
def sf(self, x, dtype=np.float64):
202+
fun = np.vectorize(self._sf)
203+
x = self._make_mpf_array(x)
204+
res = fun(x, **self._params)
205+
return np.asarray(res, dtype=dtype)[()]
206+
207+
def ppf(self, x, guess=0, dtype=np.float64):
208+
fun = np.vectorize(self._ppf, excluded={1}) # don't vectorize guess
209+
x = self._make_mpf_array(x)
210+
res = fun(x, guess, **self._params)
211+
return np.asarray(res, dtype=dtype)[()]
212+
213+
def isf(self, x, guess=0, dtype=np.float64):
214+
fun = np.vectorize(self._isf, excluded={1}) # don't vectorize guess
215+
x = self._make_mpf_array(x)
216+
res = fun(x, guess, **self._params)
217+
return np.asarray(res, dtype=dtype)[()]
218+
219+
def logpdf(self, x, dtype=np.float64):
220+
fun = np.vectorize(self._logpdf)
221+
x = self._make_mpf_array(x)
222+
res = fun(x, **self._params)
223+
return np.asarray(res, dtype=dtype)[()]
224+
225+
def logcdf(self, x, dtype=np.float64):
226+
fun = np.vectorize(self._logcdf)
227+
x = self._make_mpf_array(x)
228+
res = fun(x, **self._params)
229+
return np.asarray(res, dtype=dtype)[()]
230+
231+
def logsf(self, x, dtype=np.float64):
232+
fun = np.vectorize(self._logsf)
233+
x = self._make_mpf_array(x)
234+
res = fun(x, **self._params)
235+
return np.asarray(res, dtype=dtype)[()]
236+
237+
def support(self, dtype=np.float64):
238+
fun = np.vectorize(self._support)
239+
res = fun(**self._params)
240+
return np.asarray(res, dtype=dtype)[()]
241+
242+
def entropy(self, dtype=np.float64):
243+
fun = np.vectorize(self._entropy)
244+
res = fun(**self._params)
245+
return np.asarray(res, dtype=dtype)[()]
246+
247+
def mean(self, dtype=np.float64):
248+
fun = np.vectorize(self._mean)
249+
res = fun(**self._params)
250+
return np.asarray(res, dtype=dtype)[()]
251+
252+
def var(self, dtype=np.float64):
253+
fun = np.vectorize(self._var)
254+
res = fun(**self._params)
255+
return np.asarray(res, dtype=dtype)[()]
256+
257+
def skew(self, dtype=np.float64):
258+
fun = np.vectorize(self._skew)
259+
res = fun(**self._params)
260+
return np.asarray(res, dtype=dtype)[()]
261+
262+
def kurtosis(self, dtype=np.float64):
263+
fun = np.vectorize(self._kurtosis)
264+
res = fun(**self._params)
265+
return np.asarray(res, dtype=dtype)[()]
266+
267+
def moment(self, order, center=None, dtype=np.float64):
268+
fun = np.vectorize(self._moment)
269+
order = self._make_mpf_array(order)
270+
res = fun(order, **self._params)
271+
return np.asarray(res, dtype=dtype)[()]
272+
273+
274+
class LogNormal(ReferenceDistribution):
275+
def __init__(self, *, s):
276+
super().__init__(s=s)
277+
278+
def _support(self, s):
279+
return 0, mp.inf
280+
281+
def _pdf(self, x, s):
282+
return (
283+
mp.one
284+
/ (s * x * mp.sqrt(2 * mp.pi))
285+
* mp.exp(-mp.one / 2 * (mp.log(x) / s) ** 2)
286+
)
287+
288+
def _cdf(self, x, s):
289+
return mp.ncdf(mp.log(x) / s)
290+
291+
292+
class Normal(ReferenceDistribution):
293+
def _pdf(self, x):
294+
return mp.npdf(x)

tests/test_basics.py

Lines changed: 0 additions & 2 deletions
This file was deleted.

0 commit comments

Comments
 (0)