Skip to content

Commit d0aa883

Browse files
[gol] transform kernel functions to functors
- use `PMACC_TYPEKERNEL` macro to start a kernel - transform global kernel function to a functor
1 parent 9a97a20 commit d0aa883

File tree

1 file changed

+73
-67
lines changed

1 file changed

+73
-67
lines changed

src/libPMacc/examples/gameOfLife2D/include/Evolution.hpp

Lines changed: 73 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -38,73 +38,79 @@ namespace gol
3838
{
3939
using namespace PMacc;
4040

41-
template<class BoxReadOnly, class BoxWriteOnly, class Mapping>
42-
__global__ void evolution(BoxReadOnly buffRead,
43-
BoxWriteOnly buffWrite,
44-
uint32_t rule,
45-
Mapping mapper)
41+
struct evolution
4642
{
47-
typedef typename BoxReadOnly::ValueType Type;
48-
typedef SuperCellDescription<
49-
typename Mapping::SuperCellSize,
50-
math::CT::Int< 1, 1 >,
51-
math::CT::Int< 1, 1 >
52-
> BlockArea;
53-
PMACC_AUTO(cache, CachedBox::create < 0, Type > (BlockArea()));
54-
55-
const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
56-
const Space blockCell = block * Mapping::SuperCellSize::toRT();
57-
const Space threadIndex(threadIdx);
58-
PMACC_AUTO(buffRead_shifted, buffRead.shift(blockCell));
59-
60-
ThreadCollective<BlockArea> collective(threadIndex);
61-
62-
nvidia::functors::Assign assign;
63-
collective(
64-
assign,
65-
cache,
66-
buffRead_shifted
67-
);
68-
__syncthreads();
69-
70-
Type neighbors = 0;
71-
for (uint32_t i = 1; i < 9; ++i)
43+
template<class BoxReadOnly, class BoxWriteOnly, class Mapping>
44+
DINLINE void operator()(BoxReadOnly buffRead,
45+
BoxWriteOnly buffWrite,
46+
uint32_t rule,
47+
Mapping mapper) const
7248
{
73-
Space offset(Mask::getRelativeDirections<DIM2 > (i));
74-
neighbors += cache(threadIndex + offset);
49+
typedef typename BoxReadOnly::ValueType Type;
50+
typedef SuperCellDescription<
51+
typename Mapping::SuperCellSize,
52+
math::CT::Int< 1, 1 >,
53+
math::CT::Int< 1, 1 >
54+
> BlockArea;
55+
PMACC_AUTO(cache, CachedBox::create < 0, Type > (BlockArea()));
56+
57+
const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
58+
const Space blockCell = block * Mapping::SuperCellSize::toRT();
59+
const Space threadIndex(threadIdx);
60+
PMACC_AUTO(buffRead_shifted, buffRead.shift(blockCell));
61+
62+
ThreadCollective<BlockArea> collective(threadIndex);
63+
64+
nvidia::functors::Assign assign;
65+
collective(
66+
assign,
67+
cache,
68+
buffRead_shifted
69+
);
70+
__syncthreads();
71+
72+
Type neighbors = 0;
73+
for (uint32_t i = 1; i < 9; ++i)
74+
{
75+
Space offset(Mask::getRelativeDirections<DIM2 > (i));
76+
neighbors += cache(threadIndex + offset);
77+
}
78+
79+
Type isLife = cache(threadIndex);
80+
isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) +
81+
(bool)(((isLife)*(1 << (neighbors))) & rule);
82+
83+
buffWrite(blockCell + threadIndex) = isLife;
7584
}
76-
77-
Type isLife = cache(threadIndex);
78-
isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) +
79-
(bool)(((isLife)*(1 << (neighbors))) & rule);
80-
81-
buffWrite(blockCell + threadIndex) = isLife;
82-
}
83-
84-
template<class BoxWriteOnly, class Mapping>
85-
__global__ void randomInit(BoxWriteOnly buffWrite,
86-
uint32_t seed,
87-
float fraction,
88-
Mapping mapper)
85+
};
86+
87+
struct randomInit
8988
{
90-
/* get position in grid in units of SuperCells from blockID */
91-
const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
92-
/* convert position in unit of cells */
93-
const Space blockCell = block * Mapping::SuperCellSize::toRT();
94-
/* convert CUDA dim3 to DataSpace<DIM3> */
95-
const Space threadIndex(threadIdx);
96-
const uint32_t cellIdx = DataSpaceOperations<DIM2>::map(
97-
mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(),
98-
blockCell + threadIndex);
99-
100-
/* get uniform random number from seed */
101-
PMACC_AUTO(rng, nvidia::rng::create(
102-
nvidia::rng::methods::Xor(seed, cellIdx),
103-
nvidia::rng::distributions::Uniform_float()));
104-
105-
/* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
106-
buffWrite(blockCell + threadIndex) = (rng() <= fraction);
107-
}
89+
template<class BoxWriteOnly, class Mapping>
90+
DINLINE void operator()(BoxWriteOnly buffWrite,
91+
uint32_t seed,
92+
float fraction,
93+
Mapping mapper) const
94+
{
95+
/* get position in grid in units of SuperCells from blockID */
96+
const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
97+
/* convert position in unit of cells */
98+
const Space blockCell = block * Mapping::SuperCellSize::toRT();
99+
/* convert CUDA dim3 to DataSpace<DIM3> */
100+
const Space threadIndex(threadIdx);
101+
const uint32_t cellIdx = DataSpaceOperations<DIM2>::map(
102+
mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(),
103+
blockCell + threadIndex);
104+
105+
/* get uniform random number from seed */
106+
PMACC_AUTO(rng, nvidia::rng::create(
107+
nvidia::rng::methods::Xor(seed, cellIdx),
108+
nvidia::rng::distributions::Uniform_float()));
109+
110+
/* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
111+
buffWrite(blockCell + threadIndex) = (rng() <= fraction);
112+
}
113+
};
108114
}
109115

110116
template<class MappingDesc>
@@ -130,8 +136,8 @@ namespace gol
130136
GridController<DIM2>& gc = Environment<DIM2>::get().GridController();
131137
uint32_t seed = gc.getGlobalSize() + gc.getGlobalRank();
132138

133-
__cudaKernel(kernel::randomInit)
134-
(mapper.getGridDim(), MappingDesc::SuperCellSize::toRT().toDim3())
139+
PMACC_TYPEKERNEL(kernel::randomInit)
140+
(mapper.getGridDim(), MappingDesc::SuperCellSize::toRT())
135141
(
136142
writeBox,
137143
seed,
@@ -143,8 +149,8 @@ namespace gol
143149
void run(const DBox& readBox, const DBox & writeBox)
144150
{
145151
AreaMapping < Area, MappingDesc > mapper(mapping);
146-
__cudaKernel(kernel::evolution)
147-
(mapper.getGridDim(), MappingDesc::SuperCellSize::toRT().toDim3())
152+
PMACC_TYPEKERNEL(kernel::evolution)
153+
(mapper.getGridDim(), MappingDesc::SuperCellSize::toRT())
148154
(readBox,
149155
writeBox,
150156
rule,

0 commit comments

Comments
 (0)