Skip to content

Commit 8c03253

Browse files
refactor kernel start functions
- remove macro `__cudaKernel` - add kernel wrapper `Kernel<>` - add hernel creation function `kernel()` - add helper macro `PMACC_KERNEL()` and `PMACC_TYPEKERNEL` - add cuda entry function `gpuEntryFunction()`
1 parent e2e21fd commit 8c03253

File tree

3 files changed

+357
-39
lines changed

3 files changed

+357
-39
lines changed

src/libPMacc/include/eventSystem/events/kernelEvents.hpp

Lines changed: 244 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,17 @@
2222

2323
#pragma once
2424

25+
26+
#include "pmacc_types.hpp"
2527
#include "dimensions/DataSpace.hpp"
28+
#include "traits/GetNComponents.hpp"
2629
#include "eventSystem/EventSystem.hpp"
27-
#include "ppFunctions.hpp"
28-
#include "pmacc_types.hpp"
30+
#include "Environment.hpp"
31+
#include "nvidia/gpuEntryFunction.hpp"
32+
33+
#include <string>
34+
2935

30-
#include <boost/preprocessor/control/if.hpp>
3136

3237
/* No namespace in this file since we only declare macro defines */
3338

@@ -41,43 +46,243 @@
4146
#define CUDA_CHECK_KERNEL_MSG(...) ;
4247
#endif
4348

44-
/** Call activate kernel from taskKernel.
45-
* If PMACC_SYNC_KERNEL is 1 cudaDeviceSynchronize() is called before
46-
* and after activation.
47-
*/
48-
#define PMACC_ACTIVATE_KERNEL \
49-
CUDA_CHECK_KERNEL_MSG(cudaGetLastError( ),"Last error after kernel launch"); \
50-
CUDA_CHECK_KERNEL_MSG(cudaDeviceSynchronize(),"Crash after kernel launch"); \
51-
taskKernel->activateChecks(); \
52-
CUDA_CHECK_KERNEL_MSG(cudaDeviceSynchronize(),"Crash after kernel activation");
5349

54-
/**
55-
* Appends kernel arguments to generated code and activates kernel task.
56-
*
57-
* @param ... parameters to pass to kernel
58-
*/
59-
#define PMACC_CUDAPARAMS(...) (__VA_ARGS__); \
60-
PMACC_ACTIVATE_KERNEL \
61-
} /*this is used if call is EventTask.waitforfinished();*/
50+
namespace PMacc
51+
{
52+
/** configured kernel object
53+
*
54+
* this objects contains the functor and the starting parameter
55+
*
56+
* @tparam T_Kernel pmacc Kernel object
57+
* @tparam T_VectorGrid type which defines the grid extents (type must be cast able to CUDA dim3)
58+
* @tparam T_VectorBlock type which defines the block extents (type must be cast able to CUDA dim3)
59+
*/
60+
template<
61+
typename T_Kernel,
62+
typename T_VectorGrid,
63+
typename T_VectorBlock
64+
>
65+
struct KernelStarter;
66+
67+
/** wrapper for the user kernel functor
68+
*
69+
* contains debug information like filename and lien of the kernel call
70+
*/
71+
template< typename T_KernelFunctor >
72+
struct Kernel
73+
{
74+
/** functor */
75+
T_KernelFunctor const m_kernelFunctor;
76+
/** file name from where the kernel is called */
77+
std::string const m_file;
78+
/** line number in the file */
79+
size_t const m_line;
6280

63-
/**
64-
* Configures block and grid sizes and shared memory for the kernel.
65-
*
66-
* @param grid sizes of grid on gpu
67-
* @param block sizes of block on gpu
68-
* @param ... amount of shared memory for the kernel (optional)
69-
*/
70-
#define PMACC_CUDAKERNELCONFIG(grid,block,...) <<<(grid),(block), \
71-
/*we need +0 if VA_ARGS is empty, because we must put in a value*/ \
72-
__VA_ARGS__+0, \
73-
taskKernel->getCudaStream()>>> PMACC_CUDAPARAMS
81+
/**
82+
*
83+
* @param gridExtent grid extent configuration for the kernel
84+
* @param blockExtent block extent configuration for the kernel
85+
* @param sharedMemByte dynamic shared memory used by the kernel (in byte )
86+
* @return
87+
*/
88+
HINLINE Kernel(
89+
T_KernelFunctor const & kernelFunctor,
90+
std::string const & file = std::string(),
91+
size_t const line = 0
92+
) :
93+
m_kernelFunctor( kernelFunctor ),
94+
m_file( file ),
95+
m_line( line )
96+
{
7497

75-
/**
76-
* Calls a CUDA kernel and creates an EventTask which represents the kernel.
77-
*
78-
* @param kernelname name of the CUDA kernel (can also used with templates etc. myKernel<1>)
98+
}
99+
100+
/** configured kernel object
101+
*
102+
* this objects contains the functor and the starting parameter
103+
*
104+
* @tparam T_VectorGrid type which defines the grid extents (type must be cast able to CUDA dim3)
105+
* @tparam T_VectorBlock type which defines the block extents (type must be cast able to CUDA dim3)
106+
*
107+
* @param gridExtent grid extent configuration for the kernel
108+
* @param blockExtent block extent configuration for the kernel
109+
* @param sharedMemByte dynamic shared memory used by the kernel (in byte )
110+
*/
111+
template<
112+
typename T_VectorGrid,
113+
typename T_VectorBlock
114+
>
115+
HINLINE
116+
auto
117+
operator()(
118+
T_VectorGrid const & gridExtent,
119+
T_VectorBlock const & blockExtent,
120+
size_t const sharedMemByte = 0
121+
) const
122+
-> KernelStarter<
123+
Kernel,
124+
T_VectorGrid,
125+
T_VectorBlock
126+
>;
127+
};
128+
129+
130+
template<
131+
typename T_Kernel,
132+
typename T_VectorGrid,
133+
typename T_VectorBlock
134+
>
135+
struct KernelStarter
136+
{
137+
/** kernel functor */
138+
T_Kernel const m_kernel;
139+
/** grid extents for the kernel */
140+
T_VectorGrid const m_gridExtent;
141+
/** block extents for the kernel */
142+
T_VectorBlock const m_blockExtent;
143+
/** dynamic shared memory consumed by the kernel (in byte)*/
144+
size_t const m_sharedMemByte;
145+
146+
/** kernel starter object
147+
*
148+
* @param kernel pmacc Kernel
149+
*/
150+
HINLINE KernelStarter(
151+
T_Kernel const & kernel,
152+
T_VectorGrid const & gridExtent,
153+
T_VectorBlock const & blockExtent,
154+
size_t const sharedMemByte
155+
) :
156+
m_kernel( kernel ),
157+
m_gridExtent( gridExtent ),
158+
m_blockExtent( blockExtent ),
159+
m_sharedMemByte( sharedMemByte )
160+
{
161+
162+
}
163+
164+
/** execute the kernel functor
165+
*
166+
* @tparam T_Args types of the arguments
167+
* @param args arguments for the kernel functor
168+
*
169+
* @{
170+
*/
171+
template<
172+
typename ... T_Args
173+
>
174+
HINLINE
175+
void
176+
operator()(
177+
T_Args const & ... args
178+
) const
179+
{
180+
181+
std::string const kernelName = typeid( m_kernel.m_kernelFunctor ).name();
182+
std::string const kernelInfo = kernelName +
183+
std::string( " [" ) + m_kernel.m_file + std::string( ":" ) +
184+
std::to_string( m_kernel.m_line ) + std::string( " ]" );
185+
186+
CUDA_CHECK_KERNEL_MSG(
187+
cudaDeviceSynchronize( ),
188+
std::string( "Crash before kernel call " ) + kernelInfo
189+
);
190+
191+
PMacc::TaskKernel* taskKernel = PMacc::Environment<>::get().Factory().createTaskKernel(
192+
typeid( kernelName ).name()
193+
);
194+
195+
DataSpace<
196+
traits::GetNComponents<
197+
T_VectorGrid
198+
>::value
199+
> gridExtent( m_gridExtent );
200+
201+
DataSpace<
202+
traits::GetNComponents<
203+
T_VectorBlock
204+
>::value
205+
> blockExtent( m_blockExtent );
206+
207+
nvidia::gpuEntryFunction<<<
208+
gridExtent,
209+
blockExtent,
210+
m_sharedMemByte,
211+
taskKernel->getCudaStream()
212+
>>>(
213+
m_kernel.m_kernelFunctor,
214+
args ...
215+
);
216+
CUDA_CHECK_KERNEL_MSG(
217+
cudaGetLastError( ),
218+
std::string( "Last error after kernel launch " ) + kernelInfo
219+
);
220+
CUDA_CHECK_KERNEL_MSG(
221+
cudaDeviceSynchronize( ),
222+
std::string( "Crash after kernel launch " ) + kernelInfo
223+
);
224+
taskKernel->activateChecks( );
225+
CUDA_CHECK_KERNEL_MSG(
226+
cudaDeviceSynchronize( ),
227+
std::string( "Crash after kernel activation" ) + kernelInfo
228+
);
229+
}
230+
231+
template<
232+
typename ... T_Args
233+
>
234+
HINLINE
235+
void
236+
operator()(
237+
T_Args const &... args
238+
)
239+
{
240+
return static_cast< const KernelStarter >(*this)( args ... );
241+
}
242+
243+
/** @} */
244+
245+
};
246+
247+
248+
/** creates a kernel object
249+
*
250+
* @tparam T_KernelFunctor type of the kernel functor
251+
* @param kernelFunctor instance of the functor
252+
* @param file file name (for debug)
253+
* @param line line number in the file (for debug)
254+
*/
255+
template< typename T_KernelFunctor >
256+
auto kernel(
257+
T_KernelFunctor const & kernelFunctor,
258+
std::string const & file = std::string(),
259+
size_t const line = 0
260+
) -> PMacc::Kernel< T_KernelFunctor >
261+
{
262+
return PMacc::Kernel< T_KernelFunctor >(
263+
kernelFunctor,
264+
file,
265+
line
266+
);
267+
}
268+
269+
} // namespace PMacc
270+
271+
272+
/** create a kernel object out of a functor instance
273+
*
274+
* this macro add the current filename and line number to the kernel object
275+
*
276+
* @param ... instance of kernel functor
79277
*/
80-
#define __cudaKernel(kernelname) { \
81-
CUDA_CHECK_KERNEL_MSG(cudaDeviceSynchronize(),"Crash before kernel call"); \
82-
PMacc::TaskKernel *taskKernel = PMacc::Environment<>::get().Factory().createTaskKernel(#kernelname); \
83-
kernelname PMACC_CUDAKERNELCONFIG
278+
#define PMACC_KERNEL( ... ) PMacc::kernel( __VA_ARGS__, __FILE__, static_cast< size_t >( __LINE__ ) )
279+
280+
/** create a kernel object out of a functor type name
281+
*
282+
* this macro add the current filename and line number to the kernel object
283+
*
284+
* @param ... type of the kernel functor
285+
*/
286+
#define PMACC_TYPEKERNEL( ... ) PMacc::kernel( __VA_ARGS__{}, __FILE__, static_cast< size_t >( __LINE__ ) )
287+
288+
#include "eventSystem/events/kernelEvents.tpp"
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/**
2+
* Copyright 2016 Rene Widera
3+
*
4+
* This file is part of libPMacc.
5+
*
6+
* libPMacc is free software: you can redistribute it and/or modify
7+
* it under the terms of either the GNU General Public License or
8+
* the GNU Lesser General Public License as published by
9+
* the Free Software Foundation, either version 3 of the License, or
10+
* (at your option) any later version.
11+
*
12+
* libPMacc is distributed in the hope that it will be useful,
13+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
14+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15+
* GNU General Public License and the GNU Lesser General Public License
16+
* for more details.
17+
*
18+
* You should have received a copy of the GNU General Public License
19+
* and the GNU Lesser General Public License along with libPMacc.
20+
* If not, see <http://www.gnu.org/licenses/>.
21+
*/
22+
23+
#pragma once
24+
25+
26+
#include "pmacc_types.hpp"
27+
#include "eventSystem/events/kernelEvents.hpp"
28+
29+
30+
namespace PMacc
31+
{
32+
template< typename T_KernelFunctor >
33+
template<
34+
typename T_VectorGrid,
35+
typename T_VectorBlock
36+
>
37+
HINLINE
38+
auto
39+
Kernel< T_KernelFunctor >::operator()(
40+
T_VectorGrid const & gridExtent,
41+
T_VectorBlock const & blockExtent,
42+
size_t const sharedMemByte
43+
) const
44+
-> KernelStarter<
45+
Kernel,
46+
T_VectorGrid,
47+
T_VectorBlock
48+
>
49+
{
50+
return KernelStarter<
51+
Kernel,
52+
T_VectorGrid,
53+
T_VectorBlock
54+
>(
55+
*this,
56+
gridExtent,
57+
blockExtent,
58+
sharedMemByte
59+
);
60+
}
61+
} // namespace PMacc

0 commit comments

Comments
 (0)