Skip to content

Commit 52e2e1d

Browse files
authored
SYCL memory type detector. experimental (#872)
* SYCL memory type detector. experimental
1 parent 3b91baf commit 52e2e1d

File tree

3 files changed

+179
-11
lines changed

3 files changed

+179
-11
lines changed

dpnp/backend/include/dpnp_iface.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,12 +234,12 @@ INP_DLLEXPORT void
234234
* @ingroup BACKEND_API
235235
* @brief absolute function.
236236
*
237-
* @param [in] array1_in Input array.
237+
* @param [in] input1_in Input array.
238238
* @param [out] result1 Output array.
239239
* @param [in] size Number of elements in input arrays.
240240
*/
241241
template <typename _DataType>
242-
INP_DLLEXPORT void dpnp_elemwise_absolute_c(void* array1_in, void* result1, size_t size);
242+
INP_DLLEXPORT void dpnp_elemwise_absolute_c(const void* input1_in, void* result1, size_t size);
243243

244244
/**
245245
* @ingroup BACKEND_API

dpnp/backend/kernels/dpnp_krnl_mathematical.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,11 @@
2828
#include <vector>
2929

3030
#include <dpnp_iface.hpp>
31+
3132
#include "dpnp_fptr.hpp"
3233
#include "dpnp_iterator.hpp"
3334
#include "dpnp_utils.hpp"
35+
#include "dpnpc_memory_adapter.hpp"
3436
#include "queue_sycl.hpp"
3537

3638
template <typename _KernelNameSpecialization>
@@ -78,15 +80,16 @@ template <typename _KernelNameSpecialization>
7880
class dpnp_elemwise_absolute_c_kernel;
7981

8082
template <typename _DataType>
81-
void dpnp_elemwise_absolute_c(void* array1_in, void* result1, size_t size)
83+
void dpnp_elemwise_absolute_c(const void* input1_in, void* result1, size_t size)
8284
{
8385
if (!size)
8486
{
8587
return;
8688
}
8789

8890
cl::sycl::event event;
89-
_DataType* array1 = reinterpret_cast<_DataType*>(array1_in);
91+
DPNPC_ptr_adapter<_DataType> input1_ptr(input1_in, size);
92+
_DataType* array1 = input1_ptr.get_ptr();
9093
_DataType* result = reinterpret_cast<_DataType*>(result1);
9194

9295
if constexpr (std::is_same<_DataType, double>::value || std::is_same<_DataType, float>::value)
@@ -120,10 +123,10 @@ void dpnp_elemwise_absolute_c(void* array1_in, void* result1, size_t size)
120123
event.wait();
121124
}
122125

123-
template void dpnp_elemwise_absolute_c<double>(void* array1_in, void* result1, size_t size);
124-
template void dpnp_elemwise_absolute_c<float>(void* array1_in, void* result1, size_t size);
125-
template void dpnp_elemwise_absolute_c<long>(void* array1_in, void* result1, size_t size);
126-
template void dpnp_elemwise_absolute_c<int>(void* array1_in, void* result1, size_t size);
126+
// template void dpnp_elemwise_absolute_c<double>(void* array1_in, void* result1, size_t size);
127+
// template void dpnp_elemwise_absolute_c<float>(void* array1_in, void* result1, size_t size);
128+
// template void dpnp_elemwise_absolute_c<long>(void* array1_in, void* result1, size_t size);
129+
// template void dpnp_elemwise_absolute_c<int>(void* array1_in, void* result1, size_t size);
127130

128131
template <typename _DataType_output, typename _DataType_input1, typename _DataType_input2>
129132
void dpnp_cross_c(void* result_out,
@@ -145,9 +148,12 @@ void dpnp_cross_c(void* result_out,
145148
(void)input2_shape_ndim;
146149
(void)where;
147150

148-
const _DataType_input1* input1 = reinterpret_cast<const _DataType_input1*>(input1_in);
149-
const _DataType_input2* input2 = reinterpret_cast<const _DataType_input2*>(input2_in);
150-
_DataType_output* result = reinterpret_cast<_DataType_output*>(result_out);
151+
DPNPC_ptr_adapter<_DataType_input1> input1_ptr(input1_in, input1_size, true);
152+
DPNPC_ptr_adapter<_DataType_input2> input2_ptr(input2_in, input2_size, true);
153+
DPNPC_ptr_adapter<_DataType_output> result_ptr(result_out, input1_size, true, true);
154+
const _DataType_input1* input1 = input1_ptr.get_ptr();
155+
const _DataType_input2* input2 = input2_ptr.get_ptr();
156+
_DataType_output* result = result_ptr.get_ptr();
151157

152158
result[0] = input1[1] * input2[2] - input1[2] * input2[1];
153159

Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
//*****************************************************************************
2+
// Copyright (c) 2016-2020, Intel Corporation
3+
// All rights reserved.
4+
//
5+
// Redistribution and use in source and binary forms, with or without
6+
// modification, are permitted provided that the following conditions are met:
7+
// - Redistributions of source code must retain the above copyright notice,
8+
// this list of conditions and the following disclaimer.
9+
// - Redistributions in binary form must reproduce the above copyright notice,
10+
// this list of conditions and the following disclaimer in the documentation
11+
// and/or other materials provided with the distribution.
12+
//
13+
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
14+
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15+
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16+
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
17+
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
18+
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
19+
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
20+
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
21+
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
22+
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
23+
// THE POSSIBILITY OF SUCH DAMAGE.
24+
//*****************************************************************************
25+
26+
#pragma once
27+
#ifndef DPNP_MEMORY_ADAPTER_H // Cython compatibility
28+
#define DPNP_MEMORY_ADAPTER_H
29+
30+
#include "queue_sycl.hpp"
31+
32+
/**
33+
* @ingroup BACKEND_UTILS
34+
* @brief Adapter for the memory given by parameters in the DPNPC functions
35+
*
36+
* This type should be used to accommodate memory in the function. For example,
37+
* if the kernel must be executed in "queue_1" which is host based, but
38+
* input arrays are located on other "queue_2" or unknown place.
39+
*
40+
* Also, some functions completely host based and has no SYCL environment.
41+
*
42+
*/
43+
template <typename _DataType>
44+
class DPNPC_ptr_adapter final
45+
{
46+
void* aux_ptr = nullptr; /**< pointer to allocated memory by this adapter */
47+
void* orig_ptr = nullptr; /**< original pointer to memory given by parameters */
48+
size_t size_in_bytes = 0; /**< size of bytes of the memory */
49+
bool allocated = false; /**< True if the memory allocated by this procedure and needs to be free */
50+
bool target_no_queue = false; /**< Indicates that original memory will be accessed from non SYCL environment */
51+
bool copy_back = false; /**< If the memory is 'result' it needs to be copied back to original */
52+
const bool verbose = false;
53+
54+
public:
55+
DPNPC_ptr_adapter() = delete;
56+
57+
DPNPC_ptr_adapter(const void* src_ptr,
58+
const size_t size,
59+
bool target_no_sycl = false,
60+
bool copy_back_request = false)
61+
{
62+
target_no_queue = target_no_sycl;
63+
copy_back = copy_back_request;
64+
orig_ptr = const_cast<void*>(src_ptr);
65+
size_in_bytes = size * sizeof(_DataType);
66+
67+
// enum class alloc { host = 0, device = 1, shared = 2, unknown = 3 };
68+
cl::sycl::usm::alloc src_ptr_type = cl::sycl::usm::alloc::unknown;
69+
src_ptr_type = cl::sycl::get_pointer_type(src_ptr, DPNP_QUEUE.get_context());
70+
if (verbose)
71+
{
72+
std::cerr << "DPNPC_ptr_converter:";
73+
std::cerr << "\n\t target_no_queue=" << target_no_queue;
74+
std::cerr << "\n\t copy_back=" << copy_back;
75+
std::cerr << "\n\t pointer=" << src_ptr;
76+
std::cerr << "\n\t size=" << size;
77+
std::cerr << "\n\t size_in_bytes=" << size_in_bytes;
78+
std::cerr << "\n\t pointer type=" << (long)src_ptr_type;
79+
std::cerr << "\n\t queue inorder=" << DPNP_QUEUE.is_in_order();
80+
std::cerr << "\n\t queue is_host=" << DPNP_QUEUE.is_host();
81+
std::cerr << "\n\t queue device is_host=" << DPNP_QUEUE.get_device().is_host();
82+
std::cerr << "\n\t queue device is_cpu=" << DPNP_QUEUE.get_device().is_cpu();
83+
std::cerr << "\n\t queue device is_gpu=" << DPNP_QUEUE.get_device().is_gpu();
84+
std::cerr << "\n\t queue device is_accelerator=" << DPNP_QUEUE.get_device().is_accelerator();
85+
std::cerr << std::endl;
86+
}
87+
88+
if (is_memcpy_required(src_ptr_type))
89+
{
90+
aux_ptr = dpnp_memory_alloc_c(size_in_bytes);
91+
dpnp_memory_memcpy_c(aux_ptr, src_ptr, size_in_bytes);
92+
allocated = true;
93+
if (verbose)
94+
{
95+
std::cerr << "DPNPC_ptr_converter::alloc and copy memory"
96+
<< " from=" << src_ptr
97+
<< " to=" << aux_ptr
98+
<< std::endl;
99+
}
100+
}
101+
else
102+
{
103+
aux_ptr = const_cast<void*>(src_ptr);
104+
}
105+
}
106+
107+
~DPNPC_ptr_adapter()
108+
{
109+
if (allocated)
110+
{
111+
if (verbose)
112+
{
113+
std::cerr << "DPNPC_ptr_converter::free_memory at=" << aux_ptr << std::endl;
114+
}
115+
116+
if (copy_back)
117+
{
118+
copy_data_back();
119+
}
120+
121+
dpnp_memory_free_c(aux_ptr);
122+
}
123+
}
124+
125+
bool is_memcpy_required(cl::sycl::usm::alloc src_ptr_type)
126+
{
127+
if (target_no_queue || DPNP_QUEUE.get_device().is_gpu())
128+
{
129+
if (src_ptr_type == cl::sycl::usm::alloc::unknown)
130+
{
131+
return true;
132+
}
133+
else if (target_no_queue && src_ptr_type == cl::sycl::usm::alloc::device)
134+
{
135+
return true;
136+
}
137+
}
138+
139+
return false;
140+
}
141+
142+
_DataType* get_ptr() const
143+
{
144+
return reinterpret_cast<_DataType*>(aux_ptr);
145+
}
146+
147+
void copy_data_back() const
148+
{
149+
if (verbose)
150+
{
151+
std::cerr << "DPNPC_ptr_converter::copy_data_back:"
152+
<< " from=" << aux_ptr
153+
<< " to=" << orig_ptr
154+
<< " size_in_bytes=" << size_in_bytes
155+
<< std::endl;
156+
}
157+
158+
dpnp_memory_memcpy_c(orig_ptr, aux_ptr, size_in_bytes);
159+
}
160+
};
161+
162+
#endif // DPNP_MEMORY_ADAPTER_H

0 commit comments

Comments
 (0)