Skip to content

Commit f97b64c

Browse files
authored
Add correction keyword argument to dpnp.std and dpnp.var (#2300)
The PR proposes to add `correction` keyword argument to statistics functions, assuming `dpnp.std`, `dpnp.var`, `dpnp.nanstd`, `dpnp.nanvar` functions and `dpnp.ndarray.std`, `dpnp.ndarray.var` methods. The keyword is mandated according to python array API. The corresponding muted tests are enabled in python array API compliance scope. Also this PR adds improvements to the docstrings of functions from statistics interface.
1 parent 63ce858 commit f97b64c

File tree

7 files changed

+229
-20
lines changed

7 files changed

+229
-20
lines changed

.github/workflows/array-api-skips.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,3 @@ array_api_tests/test_operators_and_elementwise_functions.py::test_clip
2929
# unexpected result is returned - unmute when dpctl-1986 is resolved
3030
array_api_tests/test_operators_and_elementwise_functions.py::test_asin
3131
array_api_tests/test_operators_and_elementwise_functions.py::test_asinh
32-
33-
# missing 'correction' keyword argument
34-
array_api_tests/test_signatures.py::test_func_signature[std]
35-
array_api_tests/test_signatures.py::test_func_signature[var]

dpnp/dpnp_array.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1732,6 +1732,7 @@ def std(
17321732
*,
17331733
where=True,
17341734
mean=None,
1735+
correction=None,
17351736
):
17361737
"""
17371738
Returns the standard deviation of the array elements, along given axis.
@@ -1741,7 +1742,15 @@ def std(
17411742
"""
17421743

17431744
return dpnp.std(
1744-
self, axis, dtype, out, ddof, keepdims, where=where, mean=mean
1745+
self,
1746+
axis,
1747+
dtype,
1748+
out,
1749+
ddof,
1750+
keepdims,
1751+
where=where,
1752+
mean=mean,
1753+
correction=correction,
17451754
)
17461755

17471756
@property
@@ -1942,6 +1951,7 @@ def var(
19421951
*,
19431952
where=True,
19441953
mean=None,
1954+
correction=None,
19451955
):
19461956
"""
19471957
Returns the variance of the array elements, along given axis.
@@ -1951,7 +1961,15 @@ def var(
19511961
"""
19521962

19531963
return dpnp.var(
1954-
self, axis, dtype, out, ddof, keepdims, where=where, mean=mean
1964+
self,
1965+
axis,
1966+
dtype,
1967+
out,
1968+
ddof,
1969+
keepdims,
1970+
where=where,
1971+
mean=mean,
1972+
correction=correction,
19551973
)
19561974

19571975

dpnp/dpnp_iface_nanfunctions.py

Lines changed: 73 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,18 @@ def nanargmax(a, axis=None, out=None, *, keepdims=False):
113113
Input array.
114114
axis : {None, int}, optional
115115
Axis along which to operate. By default flattened input is used.
116+
116117
Default: ``None``.
117118
out : {None, dpnp.ndarray, usm_ndarray}, optional
118119
If provided, the result will be inserted into this array. It should be
119120
of the appropriate shape and dtype.
121+
120122
Default: ``None``.
121123
keepdims : {None, bool}, optional
122124
If this is set to ``True``, the axes which are reduced are left in the
123125
result as dimensions with size one. With this option, the result will
124126
broadcast correctly against the array.
127+
125128
Default: ``False``.
126129
127130
Returns
@@ -184,15 +187,18 @@ def nanargmin(a, axis=None, out=None, *, keepdims=False):
184187
Input array.
185188
axis : {None, int}, optional
186189
Axis along which to operate. By default flattened input is used.
190+
187191
Default: ``None``.
188192
out : {None, dpnp.ndarray, usm_ndarray}, optional
189193
If provided, the result will be inserted into this array. It should be
190194
of the appropriate shape and dtype.
195+
191196
Default: ``None``.
192197
keepdims : {None, bool}, optional
193198
If this is set to ``True``, the axes which are reduced are left in the
194199
result as dimensions with size one. With this option, the result will
195200
broadcast correctly against the array.
201+
196202
Default: ``False``.
197203
198204
Returns
@@ -257,19 +263,24 @@ def nancumprod(a, axis=None, dtype=None, out=None):
257263
Input array.
258264
axis : {None, int}, optional
259265
Axis along which the cumulative product is computed. The default
260-
(``None``) is to compute the cumulative product over the flattened
261-
array.
266+
is to compute the cumulative product over the flattened array.
267+
268+
Default: ``None``.
262269
dtype : {None, dtype}, optional
263270
Type of the returned array and of the accumulator in which the elements
264271
are summed. If `dtype` is not specified, it defaults to the dtype of
265272
`a`, unless `a` has an integer dtype with a precision less than that of
266273
the default platform integer. In that case, the default platform
267274
integer is used.
275+
276+
Default: ``None``.
268277
out : {None, dpnp.ndarray, usm_ndarray}, optional
269278
Alternative output array in which to place the result. It must have the
270279
same shape and buffer length as the expected output but the type will
271280
be cast if necessary.
272281
282+
Default: ``None``.
283+
273284
Returns
274285
-------
275286
out : dpnp.ndarray
@@ -321,19 +332,25 @@ def nancumsum(a, axis=None, dtype=None, out=None):
321332
a : {dpnp.ndarray, usm_ndarray}
322333
Input array.
323334
axis : {None, int}, optional
324-
Axis along which the cumulative sum is computed. The default (``None``)
325-
is to compute the cumulative sum over the flattened array.
335+
Axis along which the cumulative sum is computed. The default is to
336+
compute the cumulative sum over the flattened array.
337+
338+
Default: ``None``.
326339
dtype : {None, dtype}, optional
327340
Type of the returned array and of the accumulator in which the elements
328341
are summed. If `dtype` is not specified, it defaults to the dtype of
329342
`a`, unless `a` has an integer dtype with a precision less than that of
330343
the default platform integer. In that case, the default platform
331344
integer is used.
345+
346+
Default: ``None``.
332347
out : {None, dpnp.ndarray, usm_ndarray}, optional
333348
Alternative output array in which to place the result. It must have the
334349
same shape and buffer length as the expected output but the type will
335350
be cast if necessary.
336351
352+
Default: ``None``.
353+
337354
Returns
338355
-------
339356
out : dpnp.ndarray
@@ -386,15 +403,19 @@ def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
386403
Axis or axes along which maximum values must be computed. By default,
387404
the maximum value must be computed over the entire array. If a tuple
388405
of integers, maximum values must be computed over multiple axes.
406+
389407
Default: ``None``.
390408
out : {None, dpnp.ndarray, usm_ndarray}, optional
391409
If provided, the result will be inserted into this array. It should
392410
be of the appropriate shape and dtype.
411+
412+
Default: ``None``.
393413
keepdims : {None, bool}, optional
394414
If ``True``, the reduced axes (dimensions) must be included in the
395415
result as singleton dimensions, and, accordingly, the result must be
396416
compatible with the input array. Otherwise, if ``False``, the reduced
397417
axes (dimensions) must not be included in the result.
418+
398419
Default: ``False``.
399420
400421
Returns
@@ -476,6 +497,7 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
476497
Axis or axes along which the arithmetic means must be computed. If
477498
a tuple of unique integers, the means are computed over multiple
478499
axes. If ``None``, the mean is computed over the entire array.
500+
479501
Default: ``None``.
480502
dtype : {None, dtype}, optional
481503
Type to use in computing the mean. By default, if `a` has a
@@ -484,16 +506,22 @@ def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
484506
If `a` has a boolean or integral data type, the returned array
485507
will have the default floating point data type for the device
486508
where input array `a` is allocated.
509+
510+
Default: ``None``.
487511
out : {None, dpnp.ndarray, usm_ndarray}, optional
488512
Alternative output array in which to place the result. It must have
489513
the same shape as the expected output but the type (of the calculated
490-
values) will be cast if necessary. Default: ``None``.
514+
values) will be cast if necessary.
515+
516+
Default: ``None``.
491517
keepdims : {None, bool}, optional
492518
If ``True``, the reduced axes (dimensions) are included in the result
493519
as singleton dimensions, so that the returned array remains
494520
compatible with the input array according to Array Broadcasting
495521
rules. Otherwise, if ``False``, the reduced axes are not included in
496-
the returned array. Default: ``False``.
522+
the returned array.
523+
524+
Default: ``False``.
497525
498526
Returns
499527
-------
@@ -588,25 +616,29 @@ def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):
588616
the array. If a sequence of axes, the array is first flattened along
589617
the given axes, then the median is computed along the resulting
590618
flattened axis.
619+
591620
Default: ``None``.
592621
out : {None, dpnp.ndarray, usm_ndarray}, optional
593622
Alternative output array in which to place the result. It must have
594623
the same shape as the expected output but the type (of the calculated
595624
values) will be cast if necessary.
625+
596626
Default: ``None``.
597627
overwrite_input : bool, optional
598628
If ``True``, then allow use of memory of input array `a` for
599629
calculations. The input array will be modified by the call to
600630
:obj:`dpnp.nanmedian`. This will save memory when you do not need to
601631
preserve the contents of the input array. Treat the input as undefined,
602632
but it will probably be fully or partially sorted.
633+
603634
Default: ``False``.
604635
keepdims : bool, optional
605636
If ``True``, the reduced axes (dimensions) are included in the result
606637
as singleton dimensions, so that the returned array remains
607638
compatible with the input array according to Array Broadcasting
608639
rules. Otherwise, if ``False``, the reduced axes are not included in
609640
the returned array.
641+
610642
Default: ``False``.
611643
612644
Returns
@@ -687,15 +719,19 @@ def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
687719
Axis or axes along which minimum values must be computed. By default,
688720
the minimum value must be computed over the entire array. If a tuple
689721
of integers, minimum values must be computed over multiple axes.
722+
690723
Default: ``None``.
691724
out : {None, dpnp.ndarray, usm_ndarray}, optional
692725
If provided, the result will be inserted into this array. It should
693726
be of the appropriate shape and dtype.
727+
728+
Default: ``None``.
694729
keepdims : {None, bool}, optional
695730
If ``True``, the reduced axes (dimensions) must be included in the
696731
result as singleton dimensions, and, accordingly, the result must be
697732
compatible with the input array. Otherwise, if ``False``, the reduced
698733
axes (dimensions) must not be included in the result.
734+
699735
Default: ``False``.
700736
701737
Returns
@@ -785,6 +821,7 @@ def nanprod(
785821
axis : {None, int or tuple of ints}, optional
786822
Axis or axes along which the product is computed. The default is to
787823
compute the product of the flattened array.
824+
788825
Default: ``None``.
789826
dtype : {None, dtype}, optional
790827
The type of the returned array and of the accumulator in which the
@@ -793,17 +830,20 @@ def nanprod(
793830
the platform (u)intp. In that case, the default will be either (u)int32
794831
or (u)int64 depending on whether the platform is 32 or 64 bits. For
795832
inexact inputs, dtype must be inexact.
833+
796834
Default: ``None``.
797835
out : {None, dpnp.ndarray, usm_ndarray}, optional
798836
Alternate output array in which to place the result. If provided, it
799837
must have the same shape as the expected output, but the type will be
800838
cast if necessary. The casting of NaN to integer
801839
can yield unexpected results.
840+
802841
Default: ``None``.
803842
keepdims : {None, bool}, optional
804843
If ``True``, the axes which are reduced are left in the result as
805844
dimensions with size one. With this option, the result will broadcast
806845
correctly against the original `a`.
846+
807847
Default: ``False``.
808848
809849
Returns
@@ -878,6 +918,7 @@ def nansum(
878918
axis : {None, int or tuple of ints}, optional
879919
Axis or axes along which the sum is computed. The default is to compute
880920
the sum of the flattened array.
921+
881922
Default: ``None``.
882923
dtype : {None, dtype}, optional
883924
The type of the returned array and of the accumulator in which the
@@ -886,17 +927,20 @@ def nansum(
886927
(u)intp. In that case, the default will be either (u)int32 or (u)int64
887928
depending on whether the platform is 32 or 64 bits. For inexact inputs,
888929
dtype must be inexact.
930+
889931
Default: ``None``.
890932
out : {None, dpnp.ndarray, usm_ndarray}, optional
891933
Alternate output array in which to place the result. If provided, it
892934
must have the same shape as the expected output, but the type will be
893935
cast if necessary. The casting of NaN to integer can yield unexpected
894936
results.
937+
895938
Default: ``None``.
896939
keepdims : {None, bool}, optional
897940
If this is set to ``True``, the axes which are reduced are left in the
898941
result as dimensions with size one. With this option, the result will
899942
broadcast correctly against the original `a`.
943+
900944
Default: ``False``.
901945
902946
Returns
@@ -966,6 +1010,7 @@ def nanstd(
9661010
*,
9671011
where=True,
9681012
mean=None,
1013+
correction=None,
9691014
):
9701015
"""
9711016
Compute the standard deviation along the specified axis,
@@ -1018,6 +1063,12 @@ def nanstd(
10181063
10191064
Default: ``None``.
10201065
1066+
correction : {None, int, float}, optional
1067+
Array API compatible name for the `ddof` parameter. Only one of them
1068+
can be provided at the same time.
1069+
1070+
Default: ``None``.
1071+
10211072
Returns
10221073
-------
10231074
out : dpnp.ndarray
@@ -1094,6 +1145,7 @@ def nanstd(
10941145
keepdims=keepdims,
10951146
where=where,
10961147
mean=mean,
1148+
correction=correction,
10971149
)
10981150
return dpnp.sqrt(res, out=res)
10991151

@@ -1108,6 +1160,7 @@ def nanvar(
11081160
*,
11091161
where=True,
11101162
mean=None,
1163+
correction=None,
11111164
):
11121165
"""
11131166
Compute the variance along the specified axis, while ignoring NaNs.
@@ -1158,6 +1211,12 @@ def nanvar(
11581211
11591212
Default: ``None``.
11601213
1214+
correction : {None, int, float}, optional
1215+
Array API compatible name for the `ddof` parameter. Only one of them
1216+
can be provided at the same time.
1217+
1218+
Default: ``None``.
1219+
11611220
Returns
11621221
-------
11631222
out : dpnp.ndarray
@@ -1231,6 +1290,7 @@ def nanvar(
12311290
ddof=ddof,
12321291
keepdims=keepdims,
12331292
where=where,
1293+
correction=correction,
12341294
)
12351295

12361296
if dtype is not None:
@@ -1243,6 +1303,13 @@ def nanvar(
12431303
if not dpnp.issubdtype(out.dtype, dpnp.inexact):
12441304
raise TypeError("If input is inexact, then out must be inexact.")
12451305

1306+
if correction is not None:
1307+
if ddof != 0:
1308+
raise ValueError(
1309+
"ddof and correction can't be provided simultaneously."
1310+
)
1311+
ddof = correction
1312+
12461313
# Compute mean
12471314
cnt = dpnp.sum(
12481315
~mask, axis=axis, dtype=dpnp.intp, keepdims=True, where=where

0 commit comments

Comments
 (0)