Skip to content

Commit 6ce7693

Browse files
use PMACC_TYPEKERNEL
- use `PMACC_TYPEKERNEL` macro to start a kernel - tranform global kernel function to a functor
1 parent 8c03253 commit 6ce7693

File tree

2 files changed

+26
-18
lines changed

2 files changed

+26
-18
lines changed

src/libPMacc/test/particles/IdProvider.hpp

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,20 @@ namespace bmpl = boost::mpl;
4141

4242
namespace
4343
{
44-
template<class T_IdProvider, class T_Box>
45-
__global__ void generateIds(T_Box outputbox, uint32_t numThreads, uint32_t numIdsPerThread)
44+
template< typename T_IdProvider >
45+
struct generateIds
4646
{
47-
const uint32_t localId = blockIdx.x * blockDim.x + threadIdx.x;
48-
if(localId >= numThreads)
49-
return;
50-
for(uint32_t i=0; i<numIdsPerThread; i++)
51-
outputbox(i * numThreads + localId) = T_IdProvider::getNewId();
52-
}
47+
template<class T_Box>
48+
DINLINE void operator()(T_Box outputbox, uint32_t numThreads, uint32_t numIdsPerThread) const
49+
{
50+
const uint32_t localId = blockIdx.x * blockDim.x + threadIdx.x;
51+
if(localId < numThreads)
52+
{
53+
for(uint32_t i=0; i<numIdsPerThread; i++)
54+
outputbox(i * numThreads + localId) = T_IdProvider::getNewId();
55+
}
56+
}
57+
};
5358
}
5459

5560
/**
@@ -115,7 +120,7 @@ struct IdProviderTest
115120
BOOST_REQUIRE_EQUAL(IdProvider::getNewId(), state.nextId);
116121
// Generate the same IDs on the device
117122
PMacc::HostDeviceBuffer<uint64_t, 1> idBuf(numIds);
118-
__cudaKernel(generateIds<IdProvider>)(numBlocks, numThreadsPerBlock)
123+
PMACC_TYPEKERNEL(generateIds<IdProvider>)(numBlocks, numThreadsPerBlock)
119124
(idBuf.getDeviceBuffer().getDataBox(), numThreads, numIdsPerThread);
120125
idBuf.deviceToHost();
121126
BOOST_REQUIRE_EQUAL(numIds, ids.size());

src/libPMacc/test/random/2DDistribution.cu

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -38,17 +38,20 @@
3838
typedef PMacc::DataSpace<DIM2> Space2D;
3939
typedef PMacc::DataSpace<DIM3> Space3D;
4040

41-
template<class T_DataBox, class T_Random>
42-
__global__ void RandomFiller(T_DataBox box, Space2D boxSize, T_Random rand, uint32_t numSamples)
41+
struct RandomFiller
4342
{
44-
const Space3D ownIdx = Space3D(threadIdx) + Space3D(blockIdx) * Space3D(blockDim);
45-
rand.init(ownIdx.shrink<2>());
46-
for(uint32_t i=0; i<numSamples; i++)
43+
template<class T_DataBox, class T_Random>
44+
DINLINE void operator()(T_DataBox box, Space2D boxSize, T_Random rand, uint32_t numSamples) const
4745
{
48-
Space2D idx = rand(boxSize);
49-
atomicAdd(&box(idx), 1);
46+
const Space3D ownIdx = Space3D(threadIdx) + Space3D(blockIdx) * Space3D(blockDim);
47+
rand.init(ownIdx.shrink<2>());
48+
for(uint32_t i=0; i<numSamples; i++)
49+
{
50+
Space2D idx = rand(boxSize);
51+
atomicAdd(&box(idx), 1);
52+
}
5053
}
51-
}
54+
};
5255

5356
template<class T_RNGProvider>
5457
struct GetRandomIdx
@@ -130,7 +133,7 @@ void generateRandomNumbers(const Space2D& rngSize, uint32_t numSamples, T_Device
130133
Space2D gridSize(rngSize / blockSize);
131134

132135
CUDA_CHECK(cudaEventRecord(start));
133-
__cudaKernel(RandomFiller)(gridSize, blockSize)(buffer.getDataBox(), buffer.getDataSpace(), rand, numSamples);
136+
PMACC_TYPEKERNEL(RandomFiller)(gridSize, blockSize)(buffer.getDataBox(), buffer.getDataSpace(), rand, numSamples);
134137
CUDA_CHECK(cudaEventRecord(stop));
135138
CUDA_CHECK(cudaEventSynchronize(stop));
136139
float milliseconds = 0;

0 commit comments

Comments
 (0)