[gol] transform kernel functions to functors

psychocoderHPC · psychocoderHPC · commit d0aa883dcdf9 · 2016-12-08T10:52:37.000+01:00
- use `PMACC_TYPEKERNEL` macro to start a kernel
- transform global kernel function to a functor
diff --git a/src/libPMacc/examples/gameOfLife2D/include/Evolution.hpp b/src/libPMacc/examples/gameOfLife2D/include/Evolution.hpp
@@ -38,73 +38,79 @@ namespace gol
     {
         using namespace PMacc;
 
-        template<class BoxReadOnly, class BoxWriteOnly, class Mapping>
-        __global__ void evolution(BoxReadOnly buffRead,
-                                  BoxWriteOnly buffWrite,
-                                  uint32_t rule,
-                                  Mapping mapper)
+        struct evolution
         {
-            typedef typename BoxReadOnly::ValueType Type;
-            typedef SuperCellDescription<
-                    typename Mapping::SuperCellSize,
-                    math::CT::Int< 1, 1 >,
-                    math::CT::Int< 1, 1 >
-                    > BlockArea;
-            PMACC_AUTO(cache, CachedBox::create < 0, Type > (BlockArea()));
-
-            const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
-            const Space blockCell = block * Mapping::SuperCellSize::toRT();
-            const Space threadIndex(threadIdx);
-            PMACC_AUTO(buffRead_shifted, buffRead.shift(blockCell));
-
-            ThreadCollective<BlockArea> collective(threadIndex);
-
-            nvidia::functors::Assign assign;
-            collective(
-                      assign,
-                      cache,
-                      buffRead_shifted
-                      );
-            __syncthreads();
-
-            Type neighbors = 0;
-            for (uint32_t i = 1; i < 9; ++i)
+            template<class BoxReadOnly, class BoxWriteOnly, class Mapping>
+            DINLINE void operator()(BoxReadOnly buffRead,
+                                      BoxWriteOnly buffWrite,
+                                      uint32_t rule,
+                                      Mapping mapper) const
             {
-                Space offset(Mask::getRelativeDirections<DIM2 > (i));
-                neighbors += cache(threadIndex + offset);
+                typedef typename BoxReadOnly::ValueType Type;
+                typedef SuperCellDescription<
+                        typename Mapping::SuperCellSize,
+                        math::CT::Int< 1, 1 >,
+                        math::CT::Int< 1, 1 >
+                        > BlockArea;
+                PMACC_AUTO(cache, CachedBox::create < 0, Type > (BlockArea()));
+
+                const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
+                const Space blockCell = block * Mapping::SuperCellSize::toRT();
+                const Space threadIndex(threadIdx);
+                PMACC_AUTO(buffRead_shifted, buffRead.shift(blockCell));
+
+                ThreadCollective<BlockArea> collective(threadIndex);
+
+                nvidia::functors::Assign assign;
+                collective(
+                          assign,
+                          cache,
+                          buffRead_shifted
+                          );
+                __syncthreads();
+
+                Type neighbors = 0;
+                for (uint32_t i = 1; i < 9; ++i)
+                {
+                    Space offset(Mask::getRelativeDirections<DIM2 > (i));
+                    neighbors += cache(threadIndex + offset);
+                }
+
+                Type isLife = cache(threadIndex);
+                isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) +
+                        (bool)(((isLife)*(1 << (neighbors))) & rule);
+
+                buffWrite(blockCell + threadIndex) = isLife;
             }
-
-            Type isLife = cache(threadIndex);
-            isLife = (bool)(((!isLife)*(1 << (neighbors + 9))) & rule) +
-                    (bool)(((isLife)*(1 << (neighbors))) & rule);
-
-            buffWrite(blockCell + threadIndex) = isLife;
-        }
-
-        template<class BoxWriteOnly, class Mapping>
-        __global__ void randomInit(BoxWriteOnly buffWrite,
-                                   uint32_t seed,
-                                   float fraction,
-                                   Mapping mapper)
+        };
+        
+        struct randomInit
         {
-            /* get position in grid in units of SuperCells from blockID */
-            const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
-            /* convert position in unit of cells */
-            const Space blockCell = block * Mapping::SuperCellSize::toRT();
-            /* convert CUDA dim3 to DataSpace<DIM3> */
-            const Space threadIndex(threadIdx);
-            const uint32_t cellIdx = DataSpaceOperations<DIM2>::map(
-                    mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(),
-                    blockCell + threadIndex);
-
-            /* get uniform random number from seed  */
-            PMACC_AUTO(rng, nvidia::rng::create(
-                                nvidia::rng::methods::Xor(seed, cellIdx),
-                                nvidia::rng::distributions::Uniform_float()));
-
-            /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
-            buffWrite(blockCell + threadIndex) = (rng() <= fraction);
-        }
+            template<class BoxWriteOnly, class Mapping>
+            DINLINE void operator()(BoxWriteOnly buffWrite,
+                                       uint32_t seed,
+                                       float fraction,
+                                       Mapping mapper) const
+            {
+                /* get position in grid in units of SuperCells from blockID */
+                const Space block(mapper.getSuperCellIndex(Space(blockIdx)));
+                /* convert position in unit of cells */
+                const Space blockCell = block * Mapping::SuperCellSize::toRT();
+                /* convert CUDA dim3 to DataSpace<DIM3> */
+                const Space threadIndex(threadIdx);
+                const uint32_t cellIdx = DataSpaceOperations<DIM2>::map(
+                        mapper.getGridSuperCells() * Mapping::SuperCellSize::toRT(),
+                        blockCell + threadIndex);
+
+                /* get uniform random number from seed  */
+                PMACC_AUTO(rng, nvidia::rng::create(
+                                    nvidia::rng::methods::Xor(seed, cellIdx),
+                                    nvidia::rng::distributions::Uniform_float()));
+
+                /* write 1(white) if uniform random number 0<rng<1 is smaller than 'fraction' */
+                buffWrite(blockCell + threadIndex) = (rng() <= fraction);
+            }
+        };
     }
 
     template<class MappingDesc>
@@ -130,8 +136,8 @@ namespace gol
             GridController<DIM2>& gc = Environment<DIM2>::get().GridController();
             uint32_t seed = gc.getGlobalSize() + gc.getGlobalRank();
 
-            __cudaKernel(kernel::randomInit)
-                    (mapper.getGridDim(), MappingDesc::SuperCellSize::toRT().toDim3())
+            PMACC_TYPEKERNEL(kernel::randomInit)
+                    (mapper.getGridDim(), MappingDesc::SuperCellSize::toRT())
                     (
                      writeBox,
                      seed,
@@ -143,8 +149,8 @@ namespace gol
         void run(const DBox& readBox, const DBox & writeBox)
         {
             AreaMapping < Area, MappingDesc > mapper(mapping);
-            __cudaKernel(kernel::evolution)
-                    (mapper.getGridDim(), MappingDesc::SuperCellSize::toRT().toDim3())
+            PMACC_TYPEKERNEL(kernel::evolution)
+                    (mapper.getGridDim(), MappingDesc::SuperCellSize::toRT())
                     (readBox,
                      writeBox,
                      rule,