Skip to content

Commit acfc678

Browse files
authored
Add missing MKL function from VM namespace (#2445)
The PR add missing MKL functions from VM namespace to the dedicated extension: - `arg()` to be used by `dpnp.angle` - `copysign()` - `i0()` - `inv()` to be used by `dpnp.reciprocal` Note, `remainder()` is available as MKL VM function but follows C's module operator and so can't be used to implement `dpnp.remainder` which has to follow Python's module operator according to Python array API [specification](https://data-apis.org/array-api/2024.12/API_specification/generated/array_api.remainder.html): > This function is equivalent to the Python modulus operator x1_i % x2_i. Note, `sycl::half` is not intended to be used for VM functions due to accuracy reason and values mismatch with numpy in corner cases.
1 parent d75c842 commit acfc678

File tree

14 files changed

+744
-0
lines changed

14 files changed

+744
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ This release achieves 100% compliance with Python Array API specification (revis
2929
* Removed `einsum_call` keyword from `dpnp.einsum_path` signature [#2421](https://github.com/IntelPython/dpnp/pull/2421)
3030
* Changed `"max dimensions"` to `None` in array API capabilities [#2432](https://github.com/IntelPython/dpnp/pull/2432)
3131
* Updated kernel header `i0.hpp` to expose `cyl_bessel_i0` function depending on build target [#2440](https://github.com/IntelPython/dpnp/pull/2440)
32+
* Added MKL functions `arg`, `copysign`, `i0`, and `inv` from VM namespace to be used by implementation of the appropriate element-wise functions [#2445](https://github.com/IntelPython/dpnp/pull/2445)
3233

3334
### Fixed
3435

dpnp/backend/extensions/vm/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ if(NOT _use_onemkl_interfaces)
2929
${CMAKE_CURRENT_SOURCE_DIR}/acos.cpp
3030
${CMAKE_CURRENT_SOURCE_DIR}/acosh.cpp
3131
${CMAKE_CURRENT_SOURCE_DIR}/add.cpp
32+
${CMAKE_CURRENT_SOURCE_DIR}/arg.cpp
3233
${CMAKE_CURRENT_SOURCE_DIR}/asin.cpp
3334
${CMAKE_CURRENT_SOURCE_DIR}/asinh.cpp
3435
${CMAKE_CURRENT_SOURCE_DIR}/atan.cpp
@@ -37,6 +38,7 @@ if(NOT _use_onemkl_interfaces)
3738
${CMAKE_CURRENT_SOURCE_DIR}/cbrt.cpp
3839
${CMAKE_CURRENT_SOURCE_DIR}/ceil.cpp
3940
${CMAKE_CURRENT_SOURCE_DIR}/conj.cpp
41+
${CMAKE_CURRENT_SOURCE_DIR}/copysign.cpp
4042
${CMAKE_CURRENT_SOURCE_DIR}/cos.cpp
4143
${CMAKE_CURRENT_SOURCE_DIR}/cosh.cpp
4244
${CMAKE_CURRENT_SOURCE_DIR}/div.cpp
@@ -48,6 +50,8 @@ if(NOT _use_onemkl_interfaces)
4850
${CMAKE_CURRENT_SOURCE_DIR}/fmin.cpp
4951
${CMAKE_CURRENT_SOURCE_DIR}/fmod.cpp
5052
${CMAKE_CURRENT_SOURCE_DIR}/hypot.cpp
53+
${CMAKE_CURRENT_SOURCE_DIR}/i0.cpp
54+
${CMAKE_CURRENT_SOURCE_DIR}/inv.cpp
5155
${CMAKE_CURRENT_SOURCE_DIR}/ln.cpp
5256
${CMAKE_CURRENT_SOURCE_DIR}/log10.cpp
5357
${CMAKE_CURRENT_SOURCE_DIR}/log1p.cpp

dpnp/backend/extensions/vm/arg.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
//*****************************************************************************
2+
// Copyright (c) 2025, Intel Corporation
3+
// All rights reserved.
4+
//
5+
// Redistribution and use in source and binary forms, with or without
6+
// modification, are permitted provided that the following conditions are met:
7+
// - Redistributions of source code must retain the above copyright notice,
8+
// this list of conditions and the following disclaimer.
9+
// - Redistributions in binary form must reproduce the above copyright notice,
10+
// this list of conditions and the following disclaimer in the documentation
11+
// and/or other materials provided with the distribution.
12+
//
13+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23+
// THE POSSIBILITY OF SUCH DAMAGE.
24+
//*****************************************************************************
25+
26+
#include <oneapi/mkl.hpp>
27+
#include <sycl/sycl.hpp>
28+
29+
#include "dpctl4pybind11.hpp"
30+
31+
#include "arg.hpp"
32+
#include "common.hpp"
33+
34+
// include a local copy of elementwise common header from dpctl tensor:
35+
// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
36+
// TODO: replace by including dpctl header once available
37+
#include "../elementwise_functions/elementwise_functions.hpp"
38+
39+
// dpctl tensor headers
40+
#include "kernels/elementwise_functions/common.hpp"
41+
#include "utils/type_dispatch.hpp"
42+
#include "utils/type_utils.hpp"
43+
44+
namespace dpnp::extensions::vm
45+
{
46+
namespace py = pybind11;
47+
namespace py_int = dpnp::extensions::py_internal;
48+
namespace td_ns = dpctl::tensor::type_dispatch;
49+
50+
namespace impl
51+
{
52+
namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
53+
namespace mkl_vm = oneapi::mkl::vm; // OneMKL namespace with VM functions
54+
namespace tu_ns = dpctl::tensor::type_utils;
55+
56+
/**
57+
* @brief A factory to define pairs of supported types for which
58+
* MKL VM library provides support in oneapi::mkl::vm::arg<T> function.
59+
*
60+
* @tparam T Type of input vector `a` and of result vector `y`.
61+
*/
62+
template <typename T>
63+
struct OutputType
64+
{
65+
using value_type = typename std::disjunction<
66+
td_ns::TypeMapResultEntry<T, std::complex<double>, double>,
67+
td_ns::TypeMapResultEntry<T, std::complex<float>, float>,
68+
td_ns::DefaultResultEntry<void>>::result_type;
69+
};
70+
71+
template <typename T>
72+
static sycl::event arg_contig_impl(sycl::queue &exec_q,
73+
std::size_t in_n,
74+
const char *in_a,
75+
char *out_y,
76+
const std::vector<sycl::event> &depends)
77+
{
78+
tu_ns::validate_type_for_device<T>(exec_q);
79+
80+
std::int64_t n = static_cast<std::int64_t>(in_n);
81+
const T *a = reinterpret_cast<const T *>(in_a);
82+
83+
using resTy = typename OutputType<T>::value_type;
84+
resTy *y = reinterpret_cast<resTy *>(out_y);
85+
86+
return mkl_vm::arg(exec_q,
87+
n, // number of elements to be calculated
88+
a, // pointer `a` containing input vector of size n
89+
y, // pointer `y` to the output vector of size n
90+
depends);
91+
}
92+
93+
using ew_cmn_ns::unary_contig_impl_fn_ptr_t;
94+
using ew_cmn_ns::unary_strided_impl_fn_ptr_t;
95+
96+
static int output_typeid_vector[td_ns::num_types];
97+
static unary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types];
98+
99+
MACRO_POPULATE_DISPATCH_VECTORS(arg);
100+
} // namespace impl
101+
102+
void init_arg(py::module_ m)
103+
{
104+
using arrayT = dpctl::tensor::usm_ndarray;
105+
using event_vecT = std::vector<sycl::event>;
106+
107+
impl::populate_dispatch_vectors();
108+
using impl::contig_dispatch_vector;
109+
using impl::output_typeid_vector;
110+
111+
auto arg_pyapi = [&](sycl::queue &exec_q, const arrayT &src,
112+
const arrayT &dst, const event_vecT &depends = {}) {
113+
return py_int::py_unary_ufunc(
114+
src, dst, exec_q, depends, output_typeid_vector,
115+
contig_dispatch_vector,
116+
// no support of strided implementation in OneMKL
117+
td_ns::NullPtrVector<impl::unary_strided_impl_fn_ptr_t>{});
118+
};
119+
m.def("_arg", arg_pyapi,
120+
"Call `arg` function from OneMKL VM library to compute "
121+
"the argument of vector elements",
122+
py::arg("sycl_queue"), py::arg("src"), py::arg("dst"),
123+
py::arg("depends") = py::list());
124+
125+
auto arg_need_to_call_pyapi = [&](sycl::queue &exec_q, const arrayT &src,
126+
const arrayT &dst) {
127+
return py_internal::need_to_call_unary_ufunc(
128+
exec_q, src, dst, output_typeid_vector, contig_dispatch_vector);
129+
};
130+
m.def("_mkl_arg_to_call", arg_need_to_call_pyapi,
131+
"Check input arguments to answer if `arg` function from "
132+
"OneMKL VM library can be used",
133+
py::arg("sycl_queue"), py::arg("src"), py::arg("dst"));
134+
}
135+
} // namespace dpnp::extensions::vm

dpnp/backend/extensions/vm/arg.hpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//*****************************************************************************
2+
// Copyright (c) 2025, Intel Corporation
3+
// All rights reserved.
4+
//
5+
// Redistribution and use in source and binary forms, with or without
6+
// modification, are permitted provided that the following conditions are met:
7+
// - Redistributions of source code must retain the above copyright notice,
8+
// this list of conditions and the following disclaimer.
9+
// - Redistributions in binary form must reproduce the above copyright notice,
10+
// this list of conditions and the following disclaimer in the documentation
11+
// and/or other materials provided with the distribution.
12+
//
13+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23+
// THE POSSIBILITY OF SUCH DAMAGE.
24+
//*****************************************************************************
25+
26+
#pragma once
27+
28+
#include <pybind11/pybind11.h>
29+
30+
namespace py = pybind11;
31+
32+
namespace dpnp::extensions::vm
33+
{
34+
void init_arg(py::module_ m);
35+
} // namespace dpnp::extensions::vm
Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
//*****************************************************************************
2+
// Copyright (c) 2025, Intel Corporation
3+
// All rights reserved.
4+
//
5+
// Redistribution and use in source and binary forms, with or without
6+
// modification, are permitted provided that the following conditions are met:
7+
// - Redistributions of source code must retain the above copyright notice,
8+
// this list of conditions and the following disclaimer.
9+
// - Redistributions in binary form must reproduce the above copyright notice,
10+
// this list of conditions and the following disclaimer in the documentation
11+
// and/or other materials provided with the distribution.
12+
//
13+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23+
// THE POSSIBILITY OF SUCH DAMAGE.
24+
//*****************************************************************************
25+
26+
#include <stdexcept>
27+
28+
#include <oneapi/mkl.hpp>
29+
#include <sycl/sycl.hpp>
30+
31+
#include "dpctl4pybind11.hpp"
32+
33+
#include "common.hpp"
34+
#include "copysign.hpp"
35+
36+
// include a local copy of elementwise common header from dpctl tensor:
37+
// dpctl/tensor/libtensor/source/elementwise_functions/elementwise_functions.hpp
38+
// TODO: replace by including dpctl header once available
39+
#include "../elementwise_functions/elementwise_functions.hpp"
40+
41+
// dpctl tensor headers
42+
#include "kernels/elementwise_functions/common.hpp"
43+
#include "utils/type_dispatch.hpp"
44+
#include "utils/type_utils.hpp"
45+
46+
namespace dpnp::extensions::vm
47+
{
48+
namespace py = pybind11;
49+
namespace py_int = dpnp::extensions::py_internal;
50+
namespace td_ns = dpctl::tensor::type_dispatch;
51+
52+
namespace impl
53+
{
54+
namespace ew_cmn_ns = dpctl::tensor::kernels::elementwise_common;
55+
namespace mkl_vm = oneapi::mkl::vm; // OneMKL namespace with VM functions
56+
namespace tu_ns = dpctl::tensor::type_utils;
57+
58+
/**
59+
* @brief A factory to define pairs of supported types for which
60+
* MKL VM library provides support in oneapi::mkl::vm::copysign<T> function.
61+
*
62+
* @tparam T Type of input vectors `a` and `b` and of result vector `y`.
63+
*/
64+
template <typename T1, typename T2>
65+
struct OutputType
66+
{
67+
using value_type = typename std::disjunction<
68+
td_ns::BinaryTypeMapResultEntry<T1, double, T2, double, double>,
69+
td_ns::BinaryTypeMapResultEntry<T1, float, T2, float, float>,
70+
td_ns::DefaultResultEntry<void>>::result_type;
71+
};
72+
73+
template <typename T1, typename T2>
74+
static sycl::event copysign_contig_impl(sycl::queue &exec_q,
75+
std::size_t in_n,
76+
const char *in_a,
77+
py::ssize_t a_offset,
78+
const char *in_b,
79+
py::ssize_t b_offset,
80+
char *out_y,
81+
py::ssize_t out_offset,
82+
const std::vector<sycl::event> &depends)
83+
{
84+
tu_ns::validate_type_for_device<T1>(exec_q);
85+
tu_ns::validate_type_for_device<T2>(exec_q);
86+
87+
if ((a_offset != 0) || (b_offset != 0) || (out_offset != 0)) {
88+
throw std::runtime_error("Arrays offsets have to be equals to 0");
89+
}
90+
91+
std::int64_t n = static_cast<std::int64_t>(in_n);
92+
const T1 *a = reinterpret_cast<const T1 *>(in_a);
93+
const T2 *b = reinterpret_cast<const T2 *>(in_b);
94+
95+
using resTy = typename OutputType<T1, T2>::value_type;
96+
resTy *y = reinterpret_cast<resTy *>(out_y);
97+
98+
return mkl_vm::copysign(
99+
exec_q,
100+
n, // number of elements to be calculated
101+
a, // pointer `a` containing 1st input vector of size n
102+
b, // pointer `b` containing 2nd input vector of size n
103+
y, // pointer `y` to the output vector of size n
104+
depends);
105+
}
106+
107+
using ew_cmn_ns::binary_contig_impl_fn_ptr_t;
108+
using ew_cmn_ns::binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t;
109+
using ew_cmn_ns::binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t;
110+
using ew_cmn_ns::binary_strided_impl_fn_ptr_t;
111+
112+
static int output_typeid_vector[td_ns::num_types][td_ns::num_types];
113+
static binary_contig_impl_fn_ptr_t contig_dispatch_vector[td_ns::num_types]
114+
[td_ns::num_types];
115+
116+
MACRO_POPULATE_DISPATCH_TABLES(copysign);
117+
} // namespace impl
118+
119+
void init_copysign(py::module_ m)
120+
{
121+
using arrayT = dpctl::tensor::usm_ndarray;
122+
using event_vecT = std::vector<sycl::event>;
123+
124+
impl::populate_dispatch_tables();
125+
using impl::contig_dispatch_vector;
126+
using impl::output_typeid_vector;
127+
128+
auto copysign_pyapi = [&](sycl::queue &exec_q, const arrayT &src1,
129+
const arrayT &src2, const arrayT &dst,
130+
const event_vecT &depends = {}) {
131+
return py_int::py_binary_ufunc(
132+
src1, src2, dst, exec_q, depends, output_typeid_vector,
133+
contig_dispatch_vector,
134+
// no support of strided implementation in OneMKL
135+
td_ns::NullPtrTable<impl::binary_strided_impl_fn_ptr_t>{},
136+
// no support of C-contig row with broadcasting in OneMKL
137+
td_ns::NullPtrTable<
138+
impl::
139+
binary_contig_matrix_contig_row_broadcast_impl_fn_ptr_t>{},
140+
td_ns::NullPtrTable<
141+
impl::
142+
binary_contig_row_contig_matrix_broadcast_impl_fn_ptr_t>{});
143+
};
144+
m.def(
145+
"_copysign", copysign_pyapi,
146+
"Call `copysign` function from OneMKL VM library to return `dst` with "
147+
"the elements of `src1` with the sign changed to match the sign "
148+
"of the corresponding elements of `src2`",
149+
py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"), py::arg("dst"),
150+
py::arg("depends") = py::list());
151+
152+
auto copysign_need_to_call_pyapi = [&](sycl::queue &exec_q,
153+
const arrayT &src1,
154+
const arrayT &src2,
155+
const arrayT &dst) {
156+
return py_internal::need_to_call_binary_ufunc(exec_q, src1, src2, dst,
157+
output_typeid_vector,
158+
contig_dispatch_vector);
159+
};
160+
m.def("_mkl_copysign_to_call", copysign_need_to_call_pyapi,
161+
"Check input arguments to answer if `copysign` function from "
162+
"OneMKL VM library can be used",
163+
py::arg("sycl_queue"), py::arg("src1"), py::arg("src2"),
164+
py::arg("dst"));
165+
}
166+
} // namespace dpnp::extensions::vm

0 commit comments

Comments
 (0)