|
38 | 38 | typedef PMacc::DataSpace<DIM2> Space2D;
|
39 | 39 | typedef PMacc::DataSpace<DIM3> Space3D;
|
40 | 40 |
|
41 |
| -template<class T_DataBox, class T_Random> |
42 |
| -__global__ void RandomFiller(T_DataBox box, Space2D boxSize, T_Random rand, uint32_t numSamples) |
| 41 | +struct RandomFiller |
43 | 42 | {
|
44 |
| - const Space3D ownIdx = Space3D(threadIdx) + Space3D(blockIdx) * Space3D(blockDim); |
45 |
| - rand.init(ownIdx.shrink<2>()); |
46 |
| - for(uint32_t i=0; i<numSamples; i++) |
| 43 | + template<class T_DataBox, class T_Random> |
| 44 | + DINLINE void operator()(T_DataBox box, Space2D boxSize, T_Random rand, uint32_t numSamples) const |
47 | 45 | {
|
48 |
| - Space2D idx = rand(boxSize); |
49 |
| - atomicAdd(&box(idx), 1); |
| 46 | + const Space3D ownIdx = Space3D(threadIdx) + Space3D(blockIdx) * Space3D(blockDim); |
| 47 | + rand.init(ownIdx.shrink<2>()); |
| 48 | + for(uint32_t i=0; i<numSamples; i++) |
| 49 | + { |
| 50 | + Space2D idx = rand(boxSize); |
| 51 | + atomicAdd(&box(idx), 1); |
| 52 | + } |
50 | 53 | }
|
51 |
| -} |
| 54 | +}; |
52 | 55 |
|
53 | 56 | template<class T_RNGProvider>
|
54 | 57 | struct GetRandomIdx
|
@@ -130,7 +133,7 @@ void generateRandomNumbers(const Space2D& rngSize, uint32_t numSamples, T_Device
|
130 | 133 | Space2D gridSize(rngSize / blockSize);
|
131 | 134 |
|
132 | 135 | CUDA_CHECK(cudaEventRecord(start));
|
133 |
| - __cudaKernel(RandomFiller)(gridSize, blockSize)(buffer.getDataBox(), buffer.getDataSpace(), rand, numSamples); |
| 136 | + PMACC_TYPEKERNEL(RandomFiller)(gridSize, blockSize)(buffer.getDataBox(), buffer.getDataSpace(), rand, numSamples); |
134 | 137 | CUDA_CHECK(cudaEventRecord(stop));
|
135 | 138 | CUDA_CHECK(cudaEventSynchronize(stop));
|
136 | 139 | float milliseconds = 0;
|
|
0 commit comments