Skip to content

Commit eec1594

Browse files
Merge pull request #1142 from IntelPython/refactor-indexers-utilities
2 parents 3352c3d + 3ccb307 commit eec1594

19 files changed

+807
-878
lines changed

dpctl/tensor/libtensor/include/kernels/boolean_advanced_indexing.hpp

Lines changed: 11 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
#include <utility>
3232
#include <vector>
3333

34-
#include "utils/strided_iters.hpp"
34+
#include "utils/offset_utils.hpp"
3535
#include "utils/type_dispatch.hpp"
3636

3737
namespace dpctl
@@ -45,6 +45,8 @@ namespace indexing
4545

4646
namespace py = pybind11;
4747

48+
using namespace dpctl::tensor::offset_utils;
49+
4850
template <typename T> T ceiling_quotient(T n, T m)
4951
{
5052
return (n + m - 1) / m;
@@ -67,82 +69,6 @@ template <typename inputT,
6769
typename TransformerT>
6870
class inclusive_scan_rec_chunk_update_krn;
6971

70-
struct NoOpIndexer
71-
{
72-
size_t operator()(size_t gid) const
73-
{
74-
return gid;
75-
}
76-
};
77-
78-
struct StridedIndexer
79-
{
80-
StridedIndexer(int _nd,
81-
py::ssize_t _offset,
82-
py::ssize_t const *_packed_shape_strides)
83-
: nd(_nd), starting_offset(_offset),
84-
shape_strides(_packed_shape_strides)
85-
{
86-
}
87-
88-
size_t operator()(size_t gid) const
89-
{
90-
CIndexer_vector _ind(nd);
91-
py::ssize_t relative_offset(0);
92-
_ind.get_displacement<const py::ssize_t *, const py::ssize_t *>(
93-
static_cast<py::ssize_t>(gid),
94-
shape_strides, // shape ptr
95-
shape_strides + nd, // strides ptr
96-
relative_offset);
97-
return starting_offset + relative_offset;
98-
}
99-
100-
private:
101-
int nd;
102-
py::ssize_t starting_offset;
103-
py::ssize_t const *shape_strides;
104-
};
105-
106-
struct Strided1DIndexer
107-
{
108-
Strided1DIndexer(py::ssize_t _offset, py::ssize_t _size, py::ssize_t _step)
109-
: offset(_offset), size(static_cast<size_t>(_size)), step(_step)
110-
{
111-
}
112-
113-
size_t operator()(size_t gid) const
114-
{
115-
// ensure 0 <= gid < size
116-
return static_cast<size_t>(offset +
117-
std::min<size_t>(gid, size - 1) * step);
118-
}
119-
120-
private:
121-
py::ssize_t offset = 0;
122-
size_t size = 1;
123-
py::ssize_t step = 1;
124-
};
125-
126-
struct Strided1DCyclicIndexer
127-
{
128-
Strided1DCyclicIndexer(py::ssize_t _offset,
129-
py::ssize_t _size,
130-
py::ssize_t _step)
131-
: offset(_offset), size(static_cast<size_t>(_size)), step(_step)
132-
{
133-
}
134-
135-
size_t operator()(size_t gid) const
136-
{
137-
return static_cast<size_t>(offset + (gid % size) * step);
138-
}
139-
140-
private:
141-
py::ssize_t offset = 0;
142-
size_t size = 1;
143-
py::ssize_t step = 1;
144-
};
145-
14672
template <typename inputT, typename outputT> struct NonZeroIndicator
14773
{
14874
NonZeroIndicator() {}
@@ -200,9 +126,9 @@ sycl::event inclusive_scan_rec(sycl::queue exec_q,
200126

201127
slmT slm_iscan_tmp(lws, cgh);
202128

203-
cgh.parallel_for<class inclusive_scan_rec_local_scan_krn<inputT, outputT, n_wi, IndexerT, decltype(transformer)>>(
204-
sycl::nd_range<1>(gws, lws),
205-
[=](sycl::nd_item<1> it)
129+
cgh.parallel_for<class inclusive_scan_rec_local_scan_krn<
130+
inputT, outputT, n_wi, IndexerT, decltype(transformer)>>(
131+
sycl::nd_range<1>(gws, lws), [=](sycl::nd_item<1> it)
206132
{
207133
auto chunk_gid = it.get_global_id(0);
208134
auto lid = it.get_local_id(0);
@@ -245,8 +171,7 @@ sycl::event inclusive_scan_rec(sycl::queue exec_q,
245171
for (size_t m_wi = 0; m_wi < n_wi && i + m_wi < n_elems; ++m_wi) {
246172
output[i + m_wi] = local_isum[m_wi];
247173
}
248-
}
249-
);
174+
});
250175
});
251176

252177
sycl::event out_event = inc_scan_phase1_ev;
@@ -266,15 +191,14 @@ sycl::event inclusive_scan_rec(sycl::queue exec_q,
266191
// output[ chunk_size * (i + 1) + j] += temp[i]
267192
auto e3 = exec_q.submit([&](sycl::handler &cgh) {
268193
cgh.depends_on(e2);
269-
cgh.parallel_for<class inclusive_scan_rec_chunk_update_krn<inputT, outputT, IndexerT, decltype(transformer)>>(
270-
{n_elems},
271-
[=](auto wiid)
194+
cgh.parallel_for<class inclusive_scan_rec_chunk_update_krn<
195+
inputT, outputT, IndexerT, decltype(transformer)>>(
196+
{n_elems}, [=](auto wiid)
272197
{
273198
auto gid = wiid[0];
274199
auto i = (gid / chunk_size);
275200
output[gid] += (i > 0) ? temp[i - 1] : 0;
276-
}
277-
);
201+
});
278202
});
279203

280204
sycl::event e4 = exec_q.submit([&](sycl::handler &cgh) {
@@ -289,73 +213,6 @@ sycl::event inclusive_scan_rec(sycl::queue exec_q,
289213
return out_event;
290214
}
291215

292-
template <typename displacementT> struct TwoOffsets
293-
{
294-
TwoOffsets() : first_offset(0), second_offset(0) {}
295-
TwoOffsets(displacementT first_offset_, displacementT second_offset_)
296-
: first_offset(first_offset_), second_offset(second_offset_)
297-
{
298-
}
299-
300-
displacementT get_first_offset() const
301-
{
302-
return first_offset;
303-
}
304-
displacementT get_second_offset() const
305-
{
306-
return second_offset;
307-
}
308-
309-
private:
310-
displacementT first_offset = 0;
311-
displacementT second_offset = 0;
312-
};
313-
314-
struct TwoOffsets_StridedIndexer
315-
{
316-
TwoOffsets_StridedIndexer(int common_nd,
317-
py::ssize_t first_offset_,
318-
py::ssize_t second_offset_,
319-
py::ssize_t const *_packed_shape_strides)
320-
: nd(common_nd), starting_first_offset(first_offset_),
321-
starting_second_offset(second_offset_),
322-
shape_strides(_packed_shape_strides)
323-
{
324-
}
325-
326-
TwoOffsets<py::ssize_t> operator()(py::ssize_t gid) const
327-
{
328-
CIndexer_vector _ind(nd);
329-
py::ssize_t relative_first_offset(0);
330-
py::ssize_t relative_second_offset(0);
331-
_ind.get_displacement<const py::ssize_t *, const py::ssize_t *>(
332-
gid,
333-
shape_strides, // shape ptr
334-
shape_strides + nd, // src strides ptr
335-
shape_strides + 2 * nd, // src strides ptr
336-
relative_first_offset, relative_second_offset);
337-
return TwoOffsets<py::ssize_t>(
338-
starting_first_offset + relative_first_offset,
339-
starting_second_offset + relative_second_offset);
340-
}
341-
342-
private:
343-
int nd;
344-
py::ssize_t starting_first_offset;
345-
py::ssize_t starting_second_offset;
346-
py::ssize_t const *shape_strides;
347-
};
348-
349-
struct TwoZeroOffsets_Indexer
350-
{
351-
TwoZeroOffsets_Indexer() {}
352-
353-
TwoOffsets<py::ssize_t> operator()(py::ssize_t) const
354-
{
355-
return TwoOffsets<py::ssize_t>();
356-
}
357-
};
358-
359216
template <typename OrthogIndexerT,
360217
typename MaskedSrcIndexerT,
361218
typename MaskedDstIndexerT,

dpctl/tensor/libtensor/include/kernels/constructors.hpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
//===----------------------------------------------------------------------===//
2525

2626
#pragma once
27+
#include "utils/offset_utils.hpp"
2728
#include "utils/strided_iters.hpp"
2829
#include "utils/type_utils.hpp"
2930
#include <CL/sycl.hpp>
@@ -48,6 +49,7 @@ template <typename Ty, typename wTy> class linear_sequence_affine_kernel;
4849
template <typename Ty> class eye_kernel;
4950

5051
namespace py = pybind11;
52+
using namespace dpctl::tensor::offset_utils;
5153

5254
/* =========== Unboxing Python scalar =============== */
5355

@@ -601,16 +603,17 @@ sycl::event tri_impl(sycl::queue exec_q,
601603
sycl::event tri_ev = exec_q.submit([&](sycl::handler &cgh) {
602604
cgh.depends_on(depends);
603605
cgh.depends_on(additional_depends);
606+
604607
cgh.parallel_for<tri_kernel<Ty, upper>>(
605608
sycl::range<1>(inner_range * outer_range), [=](sycl::id<1> idx) {
606609
py::ssize_t outer_gid = idx[0] / inner_range;
607610
py::ssize_t inner_gid = idx[0] - inner_range * outer_gid;
608611

609-
py::ssize_t src_inner_offset, dst_inner_offset;
610-
bool to_copy;
612+
py::ssize_t src_inner_offset = 0, dst_inner_offset = 0;
613+
bool to_copy(true);
611614

612615
{
613-
// py::ssize_t inner_gid = idx.get_id(0);
616+
using dpctl::tensor::strides::CIndexer_array;
614617
CIndexer_array<d2, py::ssize_t> indexer_i(
615618
{shape_and_strides[nd_2], shape_and_strides[nd_1]});
616619
indexer_i.set(inner_gid);
@@ -631,7 +634,7 @@ sycl::event tri_impl(sycl::queue exec_q,
631634
py::ssize_t src_offset = 0;
632635
py::ssize_t dst_offset = 0;
633636
{
634-
// py::ssize_t outer_gid = idx.get_id(1);
637+
using dpctl::tensor::strides::CIndexer_vector;
635638
CIndexer_vector<py::ssize_t> outer(nd - d2);
636639
outer.get_displacement(
637640
outer_gid, shape_and_strides, shape_and_strides + src_s,

0 commit comments

Comments
 (0)