@@ -38,73 +38,79 @@ namespace gol
38
38
{
39
39
using namespace PMacc ;
40
40
41
- template <class BoxReadOnly , class BoxWriteOnly , class Mapping >
42
- __global__ void evolution (BoxReadOnly buffRead,
43
- BoxWriteOnly buffWrite,
44
- uint32_t rule,
45
- Mapping mapper)
41
+ struct evolution
46
42
{
47
- typedef typename BoxReadOnly::ValueType Type;
48
- typedef SuperCellDescription<
49
- typename Mapping::SuperCellSize,
50
- math::CT::Int< 1 , 1 >,
51
- math::CT::Int< 1 , 1 >
52
- > BlockArea;
53
- PMACC_AUTO (cache, CachedBox::create < 0 , Type > (BlockArea ()));
54
-
55
- const Space block (mapper.getSuperCellIndex (Space (blockIdx)));
56
- const Space blockCell = block * Mapping::SuperCellSize::toRT ();
57
- const Space threadIndex (threadIdx);
58
- PMACC_AUTO (buffRead_shifted, buffRead.shift (blockCell));
59
-
60
- ThreadCollective<BlockArea> collective (threadIndex);
61
-
62
- nvidia::functors::Assign assign;
63
- collective (
64
- assign,
65
- cache,
66
- buffRead_shifted
67
- );
68
- __syncthreads ();
69
-
70
- Type neighbors = 0 ;
71
- for (uint32_t i = 1 ; i < 9 ; ++i)
43
+ template <class BoxReadOnly , class BoxWriteOnly , class Mapping >
44
+ DINLINE void operator ()(BoxReadOnly buffRead,
45
+ BoxWriteOnly buffWrite,
46
+ uint32_t rule,
47
+ Mapping mapper) const
72
48
{
73
- Space offset (Mask::getRelativeDirections<DIM2 > (i));
74
- neighbors += cache (threadIndex + offset);
49
+ typedef typename BoxReadOnly::ValueType Type;
50
+ typedef SuperCellDescription<
51
+ typename Mapping::SuperCellSize,
52
+ math::CT::Int< 1 , 1 >,
53
+ math::CT::Int< 1 , 1 >
54
+ > BlockArea;
55
+ PMACC_AUTO (cache, CachedBox::create < 0 , Type > (BlockArea ()));
56
+
57
+ const Space block (mapper.getSuperCellIndex (Space (blockIdx)));
58
+ const Space blockCell = block * Mapping::SuperCellSize::toRT ();
59
+ const Space threadIndex (threadIdx);
60
+ PMACC_AUTO (buffRead_shifted, buffRead.shift (blockCell));
61
+
62
+ ThreadCollective<BlockArea> collective (threadIndex);
63
+
64
+ nvidia::functors::Assign assign;
65
+ collective (
66
+ assign,
67
+ cache,
68
+ buffRead_shifted
69
+ );
70
+ __syncthreads ();
71
+
72
+ Type neighbors = 0 ;
73
+ for (uint32_t i = 1 ; i < 9 ; ++i)
74
+ {
75
+ Space offset (Mask::getRelativeDirections<DIM2 > (i));
76
+ neighbors += cache (threadIndex + offset);
77
+ }
78
+
79
+ Type isLife = cache (threadIndex);
80
+ isLife = (bool )(((!isLife)*(1 << (neighbors + 9 ))) & rule) +
81
+ (bool )(((isLife)*(1 << (neighbors))) & rule);
82
+
83
+ buffWrite (blockCell + threadIndex) = isLife;
75
84
}
76
-
77
- Type isLife = cache (threadIndex);
78
- isLife = (bool )(((!isLife)*(1 << (neighbors + 9 ))) & rule) +
79
- (bool )(((isLife)*(1 << (neighbors))) & rule);
80
-
81
- buffWrite (blockCell + threadIndex) = isLife;
82
- }
83
-
84
- template <class BoxWriteOnly , class Mapping >
85
- __global__ void randomInit (BoxWriteOnly buffWrite,
86
- uint32_t seed,
87
- float fraction,
88
- Mapping mapper)
85
+ };
86
+
87
+ struct randomInit
89
88
{
90
- /* get position in grid in units of SuperCells from blockID */
91
- const Space block (mapper.getSuperCellIndex (Space (blockIdx)));
92
- /* convert position in unit of cells */
93
- const Space blockCell = block * Mapping::SuperCellSize::toRT ();
94
- /* convert CUDA dim3 to DataSpace<DIM3> */
95
- const Space threadIndex (threadIdx);
96
- const uint32_t cellIdx = DataSpaceOperations<DIM2>::map (
97
- mapper.getGridSuperCells () * Mapping::SuperCellSize::toRT (),
98
- blockCell + threadIndex);
99
-
100
- /* get uniform random number from seed */
101
- PMACC_AUTO (rng, nvidia::rng::create (
102
- nvidia::rng::methods::Xor (seed, cellIdx),
103
- nvidia::rng::distributions::Uniform_float ()));
104
-
105
- /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
106
- buffWrite (blockCell + threadIndex) = (rng () <= fraction);
107
- }
89
+ template <class BoxWriteOnly , class Mapping >
90
+ DINLINE void operator ()(BoxWriteOnly buffWrite,
91
+ uint32_t seed,
92
+ float fraction,
93
+ Mapping mapper) const
94
+ {
95
+ /* get position in grid in units of SuperCells from blockID */
96
+ const Space block (mapper.getSuperCellIndex (Space (blockIdx)));
97
+ /* convert position in unit of cells */
98
+ const Space blockCell = block * Mapping::SuperCellSize::toRT ();
99
+ /* convert CUDA dim3 to DataSpace<DIM3> */
100
+ const Space threadIndex (threadIdx);
101
+ const uint32_t cellIdx = DataSpaceOperations<DIM2>::map (
102
+ mapper.getGridSuperCells () * Mapping::SuperCellSize::toRT (),
103
+ blockCell + threadIndex);
104
+
105
+ /* get uniform random number from seed */
106
+ PMACC_AUTO (rng, nvidia::rng::create (
107
+ nvidia::rng::methods::Xor (seed, cellIdx),
108
+ nvidia::rng::distributions::Uniform_float ()));
109
+
110
+ /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
111
+ buffWrite (blockCell + threadIndex) = (rng () <= fraction);
112
+ }
113
+ };
108
114
}
109
115
110
116
template <class MappingDesc >
@@ -130,8 +136,8 @@ namespace gol
130
136
GridController<DIM2>& gc = Environment<DIM2>::get ().GridController ();
131
137
uint32_t seed = gc.getGlobalSize () + gc.getGlobalRank ();
132
138
133
- __cudaKernel (kernel::randomInit)
134
- (mapper.getGridDim (), MappingDesc::SuperCellSize::toRT (). toDim3 () )
139
+ PMACC_TYPEKERNEL (kernel::randomInit)
140
+ (mapper.getGridDim (), MappingDesc::SuperCellSize::toRT ())
135
141
(
136
142
writeBox,
137
143
seed,
@@ -143,8 +149,8 @@ namespace gol
143
149
void run (const DBox& readBox, const DBox & writeBox)
144
150
{
145
151
AreaMapping < Area, MappingDesc > mapper (mapping);
146
- __cudaKernel (kernel::evolution)
147
- (mapper.getGridDim (), MappingDesc::SuperCellSize::toRT (). toDim3 () )
152
+ PMACC_TYPEKERNEL (kernel::evolution)
153
+ (mapper.getGridDim (), MappingDesc::SuperCellSize::toRT ())
148
154
(readBox,
149
155
writeBox,
150
156
rule,
0 commit comments