Skip to content

Commit 0fd57d4

Browse files
authored
Implement dpnp.allclose() for a device without fp64 aspect (#1536)
* Added support of dpnp.allclose() for a device without fp64 aspect * Added tests for SYCL queue and USM type * Handled a corner case with abs(MIN_INT) * Increased test coverage * Fixed typos * Addressed review commets
1 parent 5a2913f commit 0fd57d4

File tree

12 files changed

+272
-82
lines changed

12 files changed

+272
-82
lines changed

.github/workflows/conda-package.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,11 @@ env:
2424
test_random_state.py
2525
test_sort.py
2626
test_special.py
27+
test_sycl_queue.py
2728
test_umath.py
2829
test_usm_type.py
2930
third_party/cupy/linalg_tests/test_product.py
31+
third_party/cupy/logic_tests/test_comparison.py
3032
third_party/cupy/logic_tests/test_truth.py
3133
third_party/cupy/manipulation_tests/test_basic.py
3234
third_party/cupy/manipulation_tests/test_join.py

dpnp/backend/kernels/dpnp_krnl_logic.cpp

Lines changed: 89 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ DPCTLSyclEventRef dpnp_all_c(DPCTLSyclQueueRef q_ref,
7474
sycl::nd_range<1> gws(gws_range, lws_range);
7575

7676
auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
77-
auto gr = nd_it.get_group();
77+
auto gr = nd_it.get_sub_group();
7878
const auto max_gr_size = gr.get_max_local_range()[0];
7979
const size_t start =
8080
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
@@ -127,8 +127,79 @@ DPCTLSyclEventRef (*dpnp_all_ext_c)(DPCTLSyclQueueRef,
127127
const DPCTLEventVectorRef) =
128128
dpnp_all_c<_DataType, _ResultType>;
129129

130-
template <typename _DataType1, typename _DataType2, typename _ResultType>
131-
class dpnp_allclose_c_kernel;
130+
template <typename _DataType1, typename _DataType2, typename _TolType>
131+
class dpnp_allclose_kernel;
132+
133+
template <typename _DataType1, typename _DataType2, typename _TolType>
134+
static sycl::event dpnp_allclose(sycl::queue &q,
135+
const _DataType1 *array1,
136+
const _DataType2 *array2,
137+
bool *result,
138+
const size_t size,
139+
const _TolType rtol_val,
140+
const _TolType atol_val)
141+
{
142+
sycl::event fill_event = q.fill(result, true, 1);
143+
if (!size) {
144+
return fill_event;
145+
}
146+
147+
constexpr size_t lws = 64;
148+
constexpr size_t vec_sz = 8;
149+
150+
auto gws_range =
151+
sycl::range<1>(((size + lws * vec_sz - 1) / (lws * vec_sz)) * lws);
152+
auto lws_range = sycl::range<1>(lws);
153+
sycl::nd_range<1> gws(gws_range, lws_range);
154+
155+
auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
156+
auto gr = nd_it.get_sub_group();
157+
const auto max_gr_size = gr.get_max_local_range()[0];
158+
const auto gr_size = gr.get_local_linear_range();
159+
const size_t start =
160+
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
161+
gr.get_group_linear_id() * max_gr_size);
162+
const size_t end = sycl::min(start + vec_sz * gr_size, size);
163+
164+
// each work-item iterates over "vec_sz" elements in the input arrays
165+
bool partial = true;
166+
167+
for (size_t i = start + gr.get_local_linear_id(); i < end; i += gr_size)
168+
{
169+
if constexpr (std::is_floating_point_v<_DataType1> &&
170+
std::is_floating_point_v<_DataType2>)
171+
{
172+
if (std::isinf(array1[i]) || std::isinf(array2[i])) {
173+
partial &= (array1[i] == array2[i]);
174+
continue;
175+
}
176+
}
177+
178+
// casting integeral to floating type to avoid bad behavior
179+
// on abs(MIN_INT), which leads to undefined result
180+
using _Arr2Type = std::conditional_t<std::is_integral_v<_DataType2>,
181+
_TolType, _DataType2>;
182+
_Arr2Type arr2 = static_cast<_Arr2Type>(array2[i]);
183+
184+
partial &= (std::abs(array1[i] - arr2) <=
185+
(atol_val + rtol_val * std::abs(arr2)));
186+
}
187+
partial = sycl::all_of_group(gr, partial);
188+
189+
if (gr.leader() && (partial == false)) {
190+
result[0] = false;
191+
}
192+
};
193+
194+
auto kernel_func = [&](sycl::handler &cgh) {
195+
cgh.depends_on(fill_event);
196+
cgh.parallel_for<
197+
class dpnp_allclose_kernel<_DataType1, _DataType2, _TolType>>(
198+
gws, kernel_parallel_for_func);
199+
};
200+
201+
return q.submit(kernel_func);
202+
}
132203

133204
template <typename _DataType1, typename _DataType2, typename _ResultType>
134205
DPCTLSyclEventRef dpnp_allclose_c(DPCTLSyclQueueRef q_ref,
@@ -140,6 +211,9 @@ DPCTLSyclEventRef dpnp_allclose_c(DPCTLSyclQueueRef q_ref,
140211
double atol_val,
141212
const DPCTLEventVectorRef dep_event_vec_ref)
142213
{
214+
static_assert(std::is_same_v<_ResultType, bool>,
215+
"Boolean result type is required");
216+
143217
// avoid warning unused variable
144218
(void)dep_event_vec_ref;
145219

@@ -152,40 +226,21 @@ DPCTLSyclEventRef dpnp_allclose_c(DPCTLSyclQueueRef q_ref,
152226
sycl::queue q = *(reinterpret_cast<sycl::queue *>(q_ref));
153227
sycl::event event;
154228

155-
DPNPC_ptr_adapter<_DataType1> input1_ptr(q_ref, array1_in, size);
156-
DPNPC_ptr_adapter<_DataType2> input2_ptr(q_ref, array2_in, size);
157-
DPNPC_ptr_adapter<_ResultType> result1_ptr(q_ref, result1, 1, true, true);
158-
const _DataType1 *array1 = input1_ptr.get_ptr();
159-
const _DataType2 *array2 = input2_ptr.get_ptr();
160-
_ResultType *result = result1_ptr.get_ptr();
161-
162-
result[0] = true;
229+
const _DataType1 *array1 = static_cast<const _DataType1 *>(array1_in);
230+
const _DataType2 *array2 = static_cast<const _DataType2 *>(array2_in);
231+
bool *result = static_cast<bool *>(result1);
163232

164-
if (!size) {
165-
return event_ref;
233+
if (q.get_device().has(sycl::aspect::fp64)) {
234+
event =
235+
dpnp_allclose(q, array1, array2, result, size, rtol_val, atol_val);
236+
}
237+
else {
238+
float rtol = static_cast<float>(rtol_val);
239+
float atol = static_cast<float>(atol_val);
240+
event = dpnp_allclose(q, array1, array2, result, size, rtol, atol);
166241
}
167-
168-
sycl::range<1> gws(size);
169-
auto kernel_parallel_for_func = [=](sycl::id<1> global_id) {
170-
size_t i = global_id[0];
171-
172-
if (std::abs(array1[i] - array2[i]) >
173-
(atol_val + rtol_val * std::abs(array2[i])))
174-
{
175-
result[0] = false;
176-
}
177-
};
178-
179-
auto kernel_func = [&](sycl::handler &cgh) {
180-
cgh.parallel_for<
181-
class dpnp_allclose_c_kernel<_DataType1, _DataType2, _ResultType>>(
182-
gws, kernel_parallel_for_func);
183-
};
184-
185-
event = q.submit(kernel_func);
186242

187243
event_ref = reinterpret_cast<DPCTLSyclEventRef>(&event);
188-
189244
return DPCTLEvent_Copy(event_ref);
190245
}
191246

@@ -269,7 +324,7 @@ DPCTLSyclEventRef dpnp_any_c(DPCTLSyclQueueRef q_ref,
269324
sycl::nd_range<1> gws(gws_range, lws_range);
270325

271326
auto kernel_parallel_for_func = [=](sycl::nd_item<1> nd_it) {
272-
auto gr = nd_it.get_group();
327+
auto gr = nd_it.get_sub_group();
273328
const auto max_gr_size = gr.get_max_local_range()[0];
274329
const size_t start =
275330
vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +

dpnp/dpnp_iface.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
"get_normalized_queue_device",
6767
"get_usm_ndarray",
6868
"get_usm_ndarray_or_scalar",
69+
"is_supported_array_or_scalar",
6970
"is_supported_array_type",
7071
]
7172

@@ -453,14 +454,36 @@ def get_usm_ndarray_or_scalar(a):
453454
return a if isscalar(a) else get_usm_ndarray(a)
454455

455456

457+
def is_supported_array_or_scalar(a):
458+
"""
459+
Return ``True`` if `a` is a scalar or an array of either
460+
:class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` type,
461+
``False`` otherwise.
462+
463+
Parameters
464+
----------
465+
a : {scalar, dpnp_array, usm_ndarray}
466+
An input scalar or an array to check the type of.
467+
468+
Returns
469+
-------
470+
out : bool
471+
``True`` if input `a` is a scalar or an array of supported type,
472+
``False`` otherwise.
473+
474+
"""
475+
476+
return isscalar(a) or is_supported_array_type(a)
477+
478+
456479
def is_supported_array_type(a):
457480
"""
458481
Return ``True`` if an array of either type :class:`dpnp.ndarray`
459482
or :class:`dpctl.tensor.usm_ndarray` type, ``False`` otherwise.
460483
461484
Parameters
462485
----------
463-
a : array
486+
a : {dpnp_array, usm_ndarray}
464487
An input array to check the type.
465488
466489
Returns

dpnp/dpnp_iface_linearalgebra.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -358,14 +358,15 @@ def outer(x1, x2, out=None):
358358
[1, 2, 3]])
359359
360360
"""
361+
361362
x1_is_scalar = dpnp.isscalar(x1)
362363
x2_is_scalar = dpnp.isscalar(x2)
363364

364365
if x1_is_scalar and x2_is_scalar:
365366
pass
366-
elif not (x1_is_scalar or dpnp.is_supported_array_type(x1)):
367+
elif not dpnp.is_supported_array_or_scalar(x1):
367368
pass
368-
elif not (x2_is_scalar or dpnp.is_supported_array_type(x2)):
369+
elif not dpnp.is_supported_array_or_scalar(x2):
369370
pass
370371
else:
371372
x1_in = (

dpnp/dpnp_iface_logic.py

Lines changed: 69 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -152,42 +152,94 @@ def all(x, /, axis=None, out=None, keepdims=False, *, where=True):
152152
)
153153

154154

155-
def allclose(x1, x2, rtol=1.0e-5, atol=1.0e-8, **kwargs):
155+
def allclose(a, b, rtol=1.0e-5, atol=1.0e-8, **kwargs):
156156
"""
157157
Returns True if two arrays are element-wise equal within a tolerance.
158158
159159
For full documentation refer to :obj:`numpy.allclose`.
160160
161+
Returns
162+
-------
163+
out : dpnp.ndarray
164+
A boolean 0-dim array. If its value is ``True``,
165+
two arrays are element-wise equal within a tolerance.
166+
161167
Limitations
162168
-----------
163-
Parameters `x1` and `x2` are supported as either :class:`dpnp.ndarray` or scalar.
169+
Parameters `a` and `b` are supported either as :class:`dpnp.ndarray`,
170+
:class:`dpctl.tensor.usm_ndarray` or scalars, but both `a` and `b`
171+
can not be scalars at the same time.
164172
Keyword argument `kwargs` is currently unsupported.
165173
Otherwise the functions will be executed sequentially on CPU.
166-
Input array data types are limited by supported DPNP :ref:`Data types`.
174+
Parameters `rtol` and `atol` are supported as scalars. Otherwise
175+
``TypeError`` exeption will be raised.
176+
Input array data types are limited by supported integer and
177+
floating DPNP :ref:`Data types`.
178+
179+
See Also
180+
--------
181+
:obj:`dpnp.isclose` : Test whether two arrays are element-wise equal.
182+
:obj:`dpnp.all` : Test whether all elements evaluate to True.
183+
:obj:`dpnp.any` : Test whether any element evaluates to True.
184+
:obj:`dpnp.equal` : Return (x1 == x2) element-wise.
167185
168186
Examples
169187
--------
170188
>>> import dpnp as np
171-
>>> np.allclose([1e10,1e-7], [1.00001e10,1e-8])
172-
>>> False
189+
>>> a = np.array([1e10, 1e-7])
190+
>>> b = np.array([1.00001e10, 1e-8])
191+
>>> np.allclose(a, b)
192+
array([False])
193+
194+
>>> a = np.array([1.0, np.nan])
195+
>>> b = np.array([1.0, np.nan])
196+
>>> np.allclose(a, b)
197+
array([False])
198+
199+
>>> a = np.array([1.0, np.inf])
200+
>>> b = np.array([1.0, np.inf])
201+
>>> np.allclose(a, b)
202+
array([ True])
173203
174204
"""
175205

176-
rtol_is_scalar = dpnp.isscalar(rtol)
177-
atol_is_scalar = dpnp.isscalar(atol)
178-
x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
179-
x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
206+
if dpnp.isscalar(a) and dpnp.isscalar(b):
207+
# at least one of inputs has to be an array
208+
pass
209+
elif not (
210+
dpnp.is_supported_array_or_scalar(a)
211+
and dpnp.is_supported_array_or_scalar(b)
212+
):
213+
pass
214+
elif kwargs:
215+
pass
216+
else:
217+
if not dpnp.isscalar(rtol):
218+
raise TypeError(
219+
"An argument `rtol` must be a scalar, but got {}".format(
220+
type(rtol)
221+
)
222+
)
223+
elif not dpnp.isscalar(atol):
224+
raise TypeError(
225+
"An argument `atol` must be a scalar, but got {}".format(
226+
type(atol)
227+
)
228+
)
180229

181-
if x1_desc and x2_desc and not kwargs:
182-
if not rtol_is_scalar or not atol_is_scalar:
183-
pass
184-
else:
185-
result_obj = dpnp_allclose(x1_desc, x2_desc, rtol, atol).get_pyobj()
186-
result = dpnp.convert_single_elem_array_to_scalar(result_obj)
230+
if dpnp.isscalar(a):
231+
a = dpnp.full_like(b, fill_value=a)
232+
elif dpnp.isscalar(b):
233+
b = dpnp.full_like(a, fill_value=b)
234+
elif a.shape != b.shape:
235+
a, b = dpt.broadcast_arrays(a.get_array(), b.get_array())
187236

188-
return result
237+
a_desc = dpnp.get_dpnp_descriptor(a, copy_when_nondefault_queue=False)
238+
b_desc = dpnp.get_dpnp_descriptor(b, copy_when_nondefault_queue=False)
239+
if a_desc and b_desc:
240+
return dpnp_allclose(a_desc, b_desc, rtol, atol).get_pyobj()
189241

190-
return call_origin(numpy.allclose, x1, x2, rtol=rtol, atol=atol, **kwargs)
242+
return call_origin(numpy.allclose, a, b, rtol=rtol, atol=atol, **kwargs)
191243

192244

193245
def any(x, /, axis=None, out=None, keepdims=False, *, where=True):

tests/skipped_tests.tbl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -438,11 +438,7 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transpose
438438
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_int_axes
439439
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_tensordot_with_list_axes
440440
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_reversed_vdot
441-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_array_scalar
442-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_finite
443-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite
444-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite_equal_nan
445-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_min_int
441+
446442
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed
447443
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal
448444
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_not_equal

tests/skipped_tests_gpu.tbl

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -584,11 +584,7 @@ tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transpose
584584
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_tensordot_zero_dim
585585
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_dot_with_out_f_contiguous
586586
tests/third_party/cupy/linalg_tests/test_product.py::TestProduct::test_transposed_multidim_vdot
587-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_array_scalar
588-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_finite
589-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite
590-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_infinite_equal_nan
591-
tests/third_party/cupy/logic_tests/test_comparison.py::TestAllclose::test_allclose_min_int
587+
592588
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_broadcast_not_allowed
593589
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_is_equal
594590
tests/third_party/cupy/logic_tests/test_comparison.py::TestArrayEqual::test_array_equal_diff_dtypes_not_equal

tests/skipped_tests_gpu_no_fp64.tbl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -443,8 +443,8 @@ tests/test_sycl_queue.py::test_array_creation[opencl:gpu:0-arange-arg0-kwargs0]
443443
tests/test_sycl_queue.py::test_array_creation[level_zero:gpu:0-arange-arg0-kwargs0]
444444
tests/test_sycl_queue.py::test_1in_1out[opencl:gpu:0-gradient-data10]
445445
tests/test_sycl_queue.py::test_1in_1out[level_zero:gpu:0-gradient-data10]
446-
tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data112-data212]
447-
tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-power-data112-data212]
446+
tests/test_sycl_queue.py::test_2in_1out[opencl:gpu:0-power-data113-data213]
447+
tests/test_sycl_queue.py::test_2in_1out[level_zero:gpu:0-power-data113-data213]
448448
tests/test_sycl_queue.py::test_out_2in_1out[opencl:gpu:0-power-data19-data29]
449449
tests/test_sycl_queue.py::test_out_2in_1out[level_zero:gpu:0-power-data19-data29]
450450
tests/test_sycl_queue.py::test_eig[opencl:gpu:0]

0 commit comments

Comments
 (0)