Skip to content

[enhancement] enable array_api return values from from_table #2441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 38 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
402f498
startup
icfaust Apr 17, 2025
f779aee
refactor from_table
icfaust Apr 17, 2025
048cf58
Update _data_conversion.py
icfaust Apr 17, 2025
8039919
Update _data_conversion.py
icfaust Apr 17, 2025
81b454f
Update data_conversion.hpp
icfaust Apr 17, 2025
7b2c8a5
Update _data_conversion.py
icfaust Apr 17, 2025
4524be9
Update _data_conversion.py
icfaust Apr 17, 2025
6aec73a
Update dbscan.py
icfaust Apr 17, 2025
62a7966
Update dbscan.py
icfaust Apr 17, 2025
227d857
Update _data_conversion.py
icfaust Apr 17, 2025
06761b7
Update _data_conversion.py
icfaust Apr 17, 2025
044441a
Update _data_conversion.py
icfaust Apr 17, 2025
44d2c99
Update base.py
icfaust Apr 17, 2025
e6b7532
Update covariance.py
icfaust Apr 17, 2025
278054a
Update linear_model.py
icfaust Apr 17, 2025
9bcb63a
Update logistic_regression.py
icfaust Apr 17, 2025
3b1d96d
Update forest.py
icfaust Apr 18, 2025
9b35c20
Update incremental_linear_model.py
icfaust Apr 18, 2025
8b2f186
Update incremental_linear_model.py
icfaust Apr 18, 2025
a9bd2cf
formatting
icfaust Apr 18, 2025
230a4ab
Update __init__.py
icfaust Apr 18, 2025
29d3db8
Update forest.py
icfaust Apr 18, 2025
0714ede
Update covariance.py
icfaust Apr 18, 2025
1053040
Update test_data.py
icfaust Apr 18, 2025
dc5ca8d
Update data_conversion.cpp
icfaust Apr 19, 2025
8cc801b
Update _data_conversion.py
icfaust Apr 19, 2025
9bfcb8a
Update data_conversion.cpp
icfaust Apr 19, 2025
8287874
Update data_conversion.cpp
icfaust Apr 19, 2025
5519888
Update _data_conversion.py
icfaust Apr 20, 2025
7db4432
Update data_conversion.cpp
icfaust Apr 20, 2025
a55c723
Update _data_conversion.py
icfaust Apr 20, 2025
dbfb85c
Update test_data.py
icfaust Apr 20, 2025
be918d7
Update test_data.py
icfaust Apr 20, 2025
08fc37a
Update test_data.py
icfaust Apr 20, 2025
2873b6d
Update test_data.py
icfaust Apr 20, 2025
a4338b8
formatting
icfaust Apr 22, 2025
486a2c8
clang-formatting
icfaust Apr 22, 2025
3b9ba91
Update _data_conversion.py
icfaust Apr 22, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions onedal/cluster/dbscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ def fit(self, X, y=None, sample_weight=None, queue=None):
params = self._get_onedal_params(X_table.dtype)
result = self.compute(params, X_table, sample_weight_table)

self.labels_ = from_table(result.responses, sycl_queue=queue).ravel()
self.labels_ = from_table(result.responses, like=X).ravel()
if (
result.core_observation_indices is not None
and not result.core_observation_indices.kind == "empty"
):
self.core_sample_indices_ = from_table(
result.core_observation_indices,
sycl_queue=queue,
like=X,
).ravel()
else:
# construct keyword arguments for different namespaces (dptcl takes sycl_queue)
Expand Down
1 change: 1 addition & 0 deletions onedal/common/sycl_interfaces.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ sycl::queue get_queue_from_python(const py::object& syclobj);

using dp_policy_t = detail::data_parallel_policy;

std::uint32_t get_device_id(const sycl::queue& queue);
std::uint32_t get_device_id(const dp_policy_t& policy);
std::size_t get_used_memory(const py::object& syclobj);
std::string get_device_name(const dp_policy_t& policy);
Expand Down
16 changes: 7 additions & 9 deletions onedal/covariance/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,23 +108,21 @@ def fit(self, X, y=None, queue=None):

if not use_raw_input:
X = _check_array(X, dtype=[np.float64, np.float32])
X = to_table(X, queue=queue)
X_t = to_table(X, queue=queue)

params = self._get_onedal_params(X.dtype)
params = self._get_onedal_params(X_t.dtype)
hparams = get_hyperparameters("covariance", "compute")
if hparams is not None and not hparams.is_default:
result = self.compute(params, hparams.backend, X)
result = self.compute(params, hparams.backend, X_t)
else:
result = self.compute(params, X)
result = self.compute(params, X_t)
if daal_check_version((2024, "P", 1)) or (not self.bias):
self.covariance_ = from_table(result.cov_matrix, sycl_queue=queue)
self.covariance_ = from_table(result.cov_matrix, like=X)
else:
self.covariance_ = (
from_table(result.cov_matrix, sycl_queue=queue)
* (X.shape[0] - 1)
/ X.shape[0]
from_table(result.cov_matrix, like=X) * (X.shape[0] - 1) / X.shape[0]
)

self.location_ = from_table(result.means, sycl_queue=queue).ravel()
self.location_ = from_table(result.means, like=X).ravel()

return self
4 changes: 2 additions & 2 deletions onedal/datatypes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
# limitations under the License.
# ==============================================================================

from ._data_conversion import from_table, to_table
from ._data_conversion import from_table, return_type_constructor, to_table

__all__ = ["from_table", "to_table"]
__all__ = ["from_table", "return_type_constructor", "to_table"]
158 changes: 91 additions & 67 deletions onedal/datatypes/_data_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,84 +34,108 @@ def to_table(*args, queue=None):
"""Create oneDAL tables from scalars and/or arrays.

Note: this implementation can be used with scipy.sparse, numpy ndarrays,
DPCTL/DPNP usm_ndarrays and scalars. Tables will use pointers to the
original array data. Scalars and non-contiguous arrays will be copies.
Arrays may be modified in-place by oneDAL during computation. This works
for data located on CPU and SYCL-enabled Intel GPUs. Each array may only
be of a single datatype (i.e. each must be homogeneous).
dpctl/dpnp usm_ndarrays, array API standard arrays, and scalars. Tables
will use pointers to the original array data. Scalars and non-contiguous
arrays will be copies. Arrays may be modified in-place by oneDAL during
computation. This works for data located on CPU and SYCL-enabled Intel GPUs.
Each array may only be of a single datatype (i.e. each must be homogeneous).

Parameters
----------
*args : {scalar, numpy array, sycl_usm_ndarray, csr_matrix, or csr_array}
*args : scalar, numpy array, sycl_usm_ndarray, array API standard array,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

csr_matrix, or csr_array
arg1, arg2... The arrays should be given as arguments.

queue : SyclQueue or None, default=None
A dpctl or oneDAL backend python representation of a SYCL Queue or None

Returns
-------
tables: {oneDAL homogeneous tables}
tables: oneDAL homogeneous tables
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Format here would need to be:

name_of_returned_variable : type_of_returned variable
    Description about it.

"""
return _apply_and_pass(_convert_one_to_table, *args, queue=queue)


if backend.is_dpc:

try:
# try/catch is used here instead of dpep_helpers because
# of circular import issues of _data_conversion.py and
# utils/validation.py. This is a temporary fix until the
# issue with dpnp is addressed, at which point this can
# be removed entirely.
import dpnp

def _table_to_array(table, xp=None):
# By default DPNP ndarray created with a copy.
# TODO:
# investigate why dpnp.array(table, copy=False) doesn't work.
# Work around with using dpctl.tensor.asarray.
if xp == dpnp:
return dpnp.array(dpnp.dpctl.tensor.asarray(table), copy=False)
else:
return xp.asarray(table)

except ImportError:

def _table_to_array(table, xp=None):
return xp.asarray(table)

def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
# Currently only `__sycl_usm_array_interface__` protocol used to
# convert into dpnp/dpctl tensors.
if sua_iface:
if (
sycl_queue
and sycl_queue.sycl_device.is_cpu
and table.__sycl_usm_array_interface__["syclobj"] is None
):
# oneDAL returns tables with None sycl queue for CPU sycl queue inputs.
# This workaround is necessary for the functional preservation
# of the compute-follows-data execution.
# Host tables first converted into numpy.narrays and then to array from xp
# namespace.
return xp.asarray(
backend.from_table(table), usm_type="device", sycl_queue=sycl_queue
)
else:
return _table_to_array(table, xp=xp)

return backend.from_table(table)

else:

def convert_one_from_table(table, sycl_queue=None, sua_iface=None, xp=None):
# Currently only `__sycl_usm_array_interface__` protocol used to
# convert into dpnp/dpctl tensors.
if sua_iface:
raise RuntimeError(
"SYCL usm array conversion from table requires the DPC backend"
def return_type_constructor(array):
"""generator function for converting oneDAL tables to arrays.

Note: this implementation will convert any table to numpy ndarrays,
scipy csr_arrays, dpctl/dpnp usm_ndarrays, and array API standard
arrays of designated type. By default, from_table will return numpy
arrays and can only return other types when necessary object
attributes exist (i.e. ``__sycl_usm_array_interface__`` or
``__array_namespace__``).

Parameters
----------
array : array-like or None
python object representing an array instance of the return type
for converting oneDAL tables. Arrays are queried for conversion
namespace when of sycl_usm_array type or array API standard type.
When set to None, will return numpy arrays or scipy csr arrays.

Returns
-------
func : callable
a function which takes in a single table input and returns an array
"""
func = backend.from_table
if isinstance(array, np.ndarray) or array is None:
pass
elif hasattr(array, "__sycl_usm_array_interface__"):
# oneDAL returns tables without sycl queues for CPU sycl queue inputs.
# This workaround is necessary for the functional preservation
# of the compute-follows-data execution.
device = array.sycl_queue
# Its important to note why the __sycl_usm_array_interface__ is
# prioritized: it provides finer-grained control of SYCL queues and the
# related SYCL devices which are generally unavailable via DLPack
# representations (such as SYCL contexts, SYCL sub-devices, etc.).
if hasattr(array, "__array_namespace__"):
xp = array.__array_namespace__()
func = lambda x: (
xp.asarray(x)
if hasattr(x, "__sycl_usm_array_interface__")
else xp.asarray(backend.from_table(x), device=device)
)
elif hasattr(array, "_create_from_usm_ndarray"): # signifier of dpnp < 0.19
xp = array._array_obj.__array_namespace__()
from_usm = array._create_from_usm_ndarray
func = lambda x: from_usm(
xp.asarray(x)
if hasattr(x, "__sycl_usm_array_interface__")
else xp.asarray(backend.from_table(x), device=device)
)
return backend.from_table(table)
elif hasattr(array, "__array_namespace__"):
func = array.__array_namespace__().from_dlpack
return func


def from_table(*args, sycl_queue=None, sua_iface=None, xp=None):
return _apply_and_pass(
convert_one_from_table, *args, sycl_queue=sycl_queue, sua_iface=sua_iface, xp=xp
)
def from_table(*args, like=None):
"""Create 2 dimensional arrays from oneDAL tables.

Note: this implementation will convert any table to numpy ndarrays,
scipy csr_arrays, dpctl/dpnp usm_ndarrays, and array API standard
arrays of designated type. By default, from_table will return numpy
arrays and can only return other types when necessary object
attributes exist (i.e. ``__sycl_usm_array_interface__`` or
``__array_namespace__``).

Parameters
----------
*args : single or multiple python oneDAL tables
arg1, arg2... The arrays should be given as arguments.

like : callable, array-like or None, default=None
python object representing an array instance of the return type
or function capable of converting oneDAL tables into arrays of
desired type. Arrays are queried for conversion namespace when
of sycl_usm_array type or array API standard type. When set to
None, will return numpy arrays or scipy csr arrays.

Returns
-------
arrays : numpy arrays, sycl_usm_ndarrays, or array API standard arrays
"""
func = like if callable(like) else return_type_constructor(like)
return _apply_and_pass(func, *args)
99 changes: 99 additions & 0 deletions onedal/datatypes/dlpack/data_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,105 @@ dal::table convert_to_table(py::object obj, py::object q_obj, bool recursed) {
return res;
}

DLDevice get_dlpack_device(const dal::array<byte_t>& array) {
DLDevice device;
#ifdef ONEDAL_DATA_PARALLEL
// std::optional<sycl::queue>
auto queue = array.get_queue();
device = queue.has_value()
? DLDevice{ kDLOneAPI, static_cast<std::int32_t>(get_device_id(queue.value())) }
: DLDevice{ kDLCPU, std::int32_t(0) };
#else
device = DLDevice{ kDLCPU, std::int32_t(0) };
#endif //ONEDAL_DATA_PARALLEL
return device;
}

DLDevice get_dlpack_device(const dal::table& input) {
if (input.get_kind() == dal::homogen_table::kind()) {
auto homogen_input = reinterpret_cast<const dal::homogen_table&>(input);
dal::array<byte_t> array = dal::detail::get_original_data(homogen_input);
return get_dlpack_device(array);
}
else {
return DLDevice{ kDLCPU, std::int32_t(0) };
}
}

DLTensor construct_dlpack_tensor(const dal::array<byte_t>& array,
std::int64_t row_count,
std::int64_t column_count,
const dal::data_type& dtype,
const dal::data_layout& layout) {
DLTensor tensor;

// set data
tensor.data = const_cast<byte_t*>(array.get_data());
tensor.device = get_dlpack_device(array);
tensor.ndim = std::int32_t(2);
tensor.dtype = convert_dal_to_dlpack_type(dtype);

// set shape int64_t, which is the output type of a homogen table and for shape and strides
if (layout == dal::data_layout::row_major) {
tensor.shape =
new std::int64_t[4]{ row_count, column_count, column_count, std::int64_t(1) };
}
else {
tensor.shape = new std::int64_t[4]{ row_count, column_count, std::int64_t(1), row_count };
}

// take strategy from dpctl tensors in having a single array allocation by tensor.shape.
tensor.strides = &tensor.shape[2];
tensor.byte_offset = std::uint64_t(0);

return tensor;
}

static void free_capsule(PyObject* cap) {
DLManagedTensor* dlm = nullptr;
if (PyCapsule_IsValid(cap, "dltensor")) {
dlm = reinterpret_cast<DLManagedTensor*>(PyCapsule_GetPointer(cap, "dltensor"));
if (dlm->deleter) {
dlm->deleter(dlm);
}
}
}

py::capsule construct_dlpack(const dal::table& input) {
// DLManagedTensor is used instead of DLManagedTensorVersioned
// due to major frameworks not yet supporting the latter.
DLManagedTensor* dlm = new DLManagedTensor;

// check table type and expose oneDAL array
if (input.get_kind() != dal::homogen_table::kind())
throw pybind11::type_error("Unsupported table type for dlpack conversion");

auto homogen_input = reinterpret_cast<const dal::homogen_table&>(input);
dal::array<byte_t> array = dal::detail::get_original_data(homogen_input);
dlm->manager_ctx = static_cast<void*>(new dal::array<byte_t>(array));

// set tensor
dlm->dl_tensor = construct_dlpack_tensor(array,
homogen_input.get_row_count(),
homogen_input.get_column_count(),
homogen_input.get_metadata().get_data_type(0),
homogen_input.get_data_layout());

// generate tensor deleter
dlm->deleter = [](struct DLManagedTensor* self) -> void {
auto stored_array = static_cast<dal::array<byte_t>*>(self->manager_ctx);
if (stored_array) {
delete stored_array;
}
delete[] self->dl_tensor.shape;
delete self;
};

// create capsule
py::capsule capsule(reinterpret_cast<void*>(dlm), "dltensor", free_capsule);
return capsule;
}

py::object dlpack_memory_order(py::object obj) {
DLManagedTensor* dlm;
DLManagedTensorVersioned* dlmv;
Expand Down
3 changes: 3 additions & 0 deletions onedal/datatypes/dlpack/data_conversion.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,8 @@ namespace py = pybind11;

dal::table convert_to_table(py::object obj, py::object q_obj = py::none(), bool recursed = false);

DLDevice get_dlpack_device(const dal::table& input);
py::capsule construct_dlpack(const dal::table& input);

py::object dlpack_memory_order(py::object obj);
} // namespace oneapi::dal::python::dlpack
Loading
Loading