@@ -14,3 +14,75 @@ inline void gpuAssert(cudaError_t code, const char* file, int line)
14
14
Log::log ().debugLogError (buffer);
15
15
}
16
16
}
17
+
18
+
19
+ // / <summary>
20
+ // / Get thedim grid to use for a dispatch, from a multiple of
21
+ // / dim block that are used by the kernel, and the number of
22
+ // / calculation that has to be done.
23
+ // / </summary>
24
+ // / <param name="dimBlock">Number of threads per block
25
+ // / </param>
26
+ // / <param name="numCalculation">Number of calculation
27
+ // / to do on kernel (eg. if we make calculation on a 1024x1024 texture, and
28
+ // / we only want to compute a value on the first 528x528 pixels , then
29
+ // / numCalculation = 528,528,1)
30
+ // / </param>
31
+ // / <param name="getUp">If true will get the
32
+ // / upper multiple of dimBlock, else will get the lower multiple. By
33
+ // / default its true.
34
+ // / </param>
35
+ // / <param name="mustDoAllCalculation">if true
36
+ // / imply that dimBlock must be multiple of numCalculation
37
+ // / </param>
38
+ // / <returns>The dim of grid to use in dispatch</returns>
39
+ inline dim3 calculateDimGrid (dim3 dimBlock, dim3 numCalculation, bool getUp = true ,
40
+ bool mustDoAllCalculation = false )
41
+ {
42
+ int addFactor = getUp ? 1 : 0 ;
43
+ float invDimBlockX = 1 .0f / dimBlock.x ;
44
+ float invDimBlockY = 1 .0f / dimBlock.y ;
45
+ float invDimBlockZ = 1 .0f / dimBlock.z ;
46
+
47
+ if (mustDoAllCalculation)
48
+ {
49
+ if (numCalculation.x % dimBlock.x != 0 ||
50
+ numCalculation.y % dimBlock.y != 0 ||
51
+ numCalculation.z % dimBlock.z != 0 )
52
+ {
53
+ Log::log ().debugLogError (
54
+ " Number of threads per block (" + std::to_string (dimBlock.x ) +
55
+ " , " + std::to_string (dimBlock.y ) + " , " +
56
+ std::to_string (dimBlock.z ) +
57
+ " )"
58
+ " is not a multiple of (" +
59
+ std::to_string (numCalculation.x ) + " , " +
60
+ std::to_string (numCalculation.y ) + " , " +
61
+ std::to_string (numCalculation.z ) +
62
+ " )"
63
+ " , therefore the compute shader will not compute on all data." );
64
+ }
65
+ }
66
+
67
+ unsigned int multipleDimBlockX =
68
+ dimBlock.x * ((int )(numCalculation.x * invDimBlockX) + addFactor);
69
+ unsigned int dimGridX = multipleDimBlockX / dimBlock.x ;
70
+
71
+ unsigned int multipleDimBlockY =
72
+ dimBlock.y * ((int )(numCalculation.y * invDimBlockY) + addFactor);
73
+ unsigned int dimGridY = multipleDimBlockY / dimBlock.y ;
74
+
75
+ unsigned int multipleDimBlockZ =
76
+ dimBlock.z * ((int )(numCalculation.z * invDimBlockZ) + addFactor);
77
+ unsigned int dimGridZ = multipleDimBlockZ / dimBlock.z ;
78
+
79
+ if (dimGridX < 1 || dimGridY < 1 || dimGridZ <1 )
80
+ {
81
+ Log::log ().debugLogError (
82
+ " Threads group size " + std::to_string (dimGridX) +
83
+ std::to_string (dimGridY) + std::to_string (dimGridZ) +
84
+ " must be above zero." );
85
+ }
86
+
87
+ return dim3{dimGridX, dimGridY, dimGridZ};
88
+ }
0 commit comments