1
1
#pragma once
2
2
#include " cuda_runtime.h"
3
3
#include " log.h"
4
- #include < string>
5
4
6
5
// use this macro if you want to check cuda function
7
6
23
22
* @param ans A function that return a cudaError_t
24
23
*
25
24
*/
26
- #define CUDA_CHECK_RETURN (ans ) \
27
- { \
28
- int ret = cudaAssert ((ans), __FILE__, __LINE__); \
29
- if (ret != 0 ) \
30
- return ret; \
31
- }
25
+ #define CUDA_CHECK_RETURN (ans ) \
26
+ { \
27
+ int ret = cudaAssert ((ans), __FILE__, __LINE__); \
28
+ if (ret != 0 ) return ret; \
29
+ }
32
30
33
31
/* *
34
32
* @brief Check if a cuda function has succeed. If it doesn't log the cuda
39
37
* @param msg A string to log
40
38
*
41
39
*/
42
- #define CUDA_CHECK_RETURN_VOID (ans, msg ) \
43
- { \
44
- int ret = cudaAssert ((ans), __FILE__, __LINE__); \
45
- if (ret != 0 ) { \
46
- Log::log ().debugLogError (msg); \
47
- return ; \
48
- } \
49
- }
40
+ #define CUDA_CHECK_RETURN_VOID (ans, msg ) \
41
+ { \
42
+ int ret = cudaAssert ((ans), __FILE__, __LINE__); \
43
+ if (ret != 0 ) \
44
+ { \
45
+ Log::log ().debugLogError (msg); \
46
+ return ; \
47
+ } \
48
+ }
50
49
51
50
/* *
52
51
* @brief Check if a cufft function has succeed. If it doesn't log the
66
65
* @param ans A function of cufft library that return an int
67
66
*
68
67
*/
69
- #define CUFFT_CHECK_RETURN (ans ) \
70
- { \
71
- int ret = cufftAssert ((int )(ans), __FILE__, __LINE__); \
72
- if (ret != 0 ) \
73
- return ret; \
74
- }
68
+ #define CUFFT_CHECK_RETURN (ans ) \
69
+ { \
70
+ int ret = cufftAssert ((int )(ans), __FILE__, __LINE__); \
71
+ if (ret != 0 ) return ret; \
72
+ }
75
73
76
74
/* *
77
- * @brief Log an error associated to cuda library if there has been an error
75
+ * @brief Log an error associated to cuda library if there has been an error
78
76
* during a cuda function
79
77
*
80
78
* @param[in] code A return code of a function of cuda library
83
81
*
84
82
* @return the return code
85
83
*/
86
- inline int cudaAssert (cudaError_t code, const char *file, int line) {
87
- if (code != cudaSuccess) {
88
- char buffer[2048 ];
89
- sprintf_s (buffer, " Cuda error: %i %s %s %d\n " , code,
90
- cudaGetErrorString (code), file, line);
91
- std::string strError (buffer);
92
- Log::log ().debugLogError (buffer);
93
- }
94
- return (int )code;
84
+ inline int cudaAssert (cudaError_t code, const char * file, int line)
85
+ {
86
+ if (code != cudaSuccess)
87
+ {
88
+ char buffer[2048 ];
89
+ sprintf_s (buffer, " Cuda error: %i %s %s %d\n " , code, cudaGetErrorString (code), file, line);
90
+ std::string strError (buffer);
91
+ Log::log ().debugLogError (buffer);
92
+ }
93
+ return (int )code;
95
94
}
96
95
97
96
/* *
98
- * @brief Log an error associated to cufft library if there has been an error
97
+ * @brief Log an error associated to cufft library if there has been an error
99
98
* during a cufft function
100
99
*
101
100
* @param[in] cufftResult A return code of a function of cufft library
@@ -104,75 +103,44 @@ inline int cudaAssert(cudaError_t code, const char *file, int line) {
104
103
*
105
104
* @return the return code
106
105
*/
107
- inline int cufftAssert (int cufftResult, const char *file, int line) {
108
-
109
- if (cufftResult != 0 ) {
110
- std::string cufftInterpret;
111
- switch (cufftResult) {
112
- case (0 ):
113
- cufftInterpret = " The cuFFT operation was successful" ;
114
- break ;
115
- case (1 ):
116
- cufftInterpret = " cuFFT was passed an invalid plan handle" ;
117
- break ;
118
- case (2 ):
119
- cufftInterpret = " cuFFT failed to allocate GPU or CPU memory" ;
120
- break ;
121
- case (3 ):
122
- cufftInterpret = " No longer used" ;
123
- break ;
124
- case (4 ):
125
- cufftInterpret = " User specified an invalid pointer or parameter" ;
126
- break ;
127
- case (5 ):
128
- cufftInterpret = " Driver or internal cuFFT library error" ;
129
- break ;
130
- case (6 ):
131
- cufftInterpret = " Failed to execute an FFT on the GPU" ;
132
- break ;
133
- case (7 ):
134
- cufftInterpret = " The cuFFT library failed to initialize" ;
135
- break ;
136
- case (8 ):
137
- cufftInterpret = " User specified an invalid transform size" ;
138
- break ;
139
- case (9 ):
140
- cufftInterpret = " No longer used" ;
141
- break ;
142
- case (10 ):
143
- cufftInterpret = " Missing parameters in call" ;
144
- break ;
145
- case (11 ):
146
- cufftInterpret =
147
- " Execution of a plan was on different GPU than plan creation" ;
148
- break ;
149
- case (12 ):
150
- cufftInterpret = " Internal plan database error" ;
151
- break ;
152
- case (13 ):
153
- cufftInterpret = " No workspace has been provided prior to plan execution" ;
154
- break ;
155
- case (14 ):
156
- cufftInterpret =
157
- " Function does not implement functionality for parameters given." ;
158
- break ;
159
- case (15 ):
160
- cufftInterpret = " Used in previous versions." ;
161
- break ;
162
- case (16 ):
163
- cufftInterpret = " Operation is not supported for parameters given." ;
164
- break ;
165
- default :
166
- cufftInterpret = " Unknown error." ;
167
- break ;
106
+ inline int cufftAssert (int cufftResult, const char * file, int line)
107
+ {
108
+
109
+ if (cufftResult != 0 )
110
+ {
111
+ std::string cufftInterpret;
112
+ switch (cufftResult)
113
+ {
114
+ case (0 ): cufftInterpret = " The cuFFT operation was successful" ; break ;
115
+ case (1 ): cufftInterpret = " cuFFT was passed an invalid plan handle" ; break ;
116
+ case (2 ): cufftInterpret = " cuFFT failed to allocate GPU or CPU memory" ; break ;
117
+ case (3 ): cufftInterpret = " No longer used" ; break ;
118
+ case (4 ): cufftInterpret = " User specified an invalid pointer or parameter" ; break ;
119
+ case (5 ): cufftInterpret = " Driver or internal cuFFT library error" ; break ;
120
+ case (6 ): cufftInterpret = " Failed to execute an FFT on the GPU" ; break ;
121
+ case (7 ): cufftInterpret = " The cuFFT library failed to initialize" ; break ;
122
+ case (8 ): cufftInterpret = " User specified an invalid transform size" ; break ;
123
+ case (9 ): cufftInterpret = " No longer used" ; break ;
124
+ case (10 ): cufftInterpret = " Missing parameters in call" ; break ;
125
+ case (11 ):
126
+ cufftInterpret = " Execution of a plan was on different GPU than plan creation" ;
127
+ break ;
128
+ case (12 ): cufftInterpret = " Internal plan database error" ; break ;
129
+ case (13 ): cufftInterpret = " No workspace has been provided prior to plan execution" ; break ;
130
+ case (14 ):
131
+ cufftInterpret = " Function does not implement functionality for parameters given." ;
132
+ break ;
133
+ case (15 ): cufftInterpret = " Used in previous versions." ; break ;
134
+ case (16 ): cufftInterpret = " Operation is not supported for parameters given." ; break ;
135
+ default : cufftInterpret = " Unknown error." ; break ;
136
+ }
137
+ char buffer[2048 ];
138
+ sprintf_s (buffer, " Cufft error: %i %s %s %d\n " , cufftResult, cufftInterpret.c_str (), file,
139
+ line);
140
+ std::string strError (buffer);
141
+ Log::log ().debugLogError (buffer);
168
142
}
169
- char buffer[2048 ];
170
- sprintf_s (buffer, " Cufft error: %i %s %s %d\n " , cufftResult,
171
- cufftInterpret.c_str (), file, line);
172
- std::string strError (buffer);
173
- Log::log ().debugLogError (buffer);
174
- }
175
- return cufftResult;
143
+ return cufftResult;
176
144
}
177
145
178
146
/* *
@@ -181,63 +149,63 @@ inline int cufftAssert(int cufftResult, const char *file, int line) {
181
149
* calculation that has to be done.
182
150
*
183
151
* @param[in] dimBlock Number of threads per block
184
- * @param[in] numCalculation Number of calculation
152
+ * @param[in] numCalculation Number of calculation
185
153
* to do on kernel (eg. if we make calculation on a 1024x1024 texture, and
186
154
* we only want to compute a value on the first 528x528 pixels , then
187
155
* numCalculation = 528,528,1)
188
- * @param[in] getUp If true will get the upper multiple of
156
+ * @param[in] getUp If true will get the upper multiple of
189
157
* dimBlock, else will get the lower multiple. By default its true.
190
- * @param[in] mustDoAllCalculation Imply that dimBlock must
158
+ * @param[in] mustDoAllCalculation Imply that dimBlock must
191
159
* be multiple of numCalculation
192
160
*
193
161
* @return The dim of grid to use in dispatch
194
162
*/
195
- inline dim3 calculateDimGrid (dim3 dimBlock, dim3 numCalculation,
196
- bool getUp = true ,
197
- bool mustDoAllCalculation = false ) {
198
- int addFactor = getUp ? 1 : 0 ;
199
- float invDimBlockX = 1 .0f / dimBlock.x ;
200
- float invDimBlockY = 1 .0f / dimBlock.y ;
201
- float invDimBlockZ = 1 .0f / dimBlock.z ;
202
-
203
- if (mustDoAllCalculation) {
204
- if (numCalculation. x % dimBlock. x != 0 ||
205
- numCalculation.y % dimBlock.y != 0 ||
206
- numCalculation.z % dimBlock.z != 0 ) {
207
- Log::log (). debugLogError (
208
- " Number of threads per block ( " + std::to_string (dimBlock. x ) + " , " +
209
- std::to_string (dimBlock. y ) + " , " + std::to_string (dimBlock.z ) +
210
- " ) "
211
- " is not a multiple of ( " +
212
- std::to_string (numCalculation. x ) + " , " +
213
- std::to_string (numCalculation.y ) + " , " +
214
- std::to_string (numCalculation.z ) +
215
- " )"
216
- " , therefore the compute shader will not compute on all data." );
217
- }
218
- }
219
-
220
- unsigned int multipleDimBlockX =
221
- dimBlock.x * ((int )(numCalculation.x * invDimBlockX) + addFactor);
222
- // unsigned int multipleDimBlockX =
223
- // dimBlock.x * (numCalculation.x / dimBlock.x) + addFactor);
224
- // TODO remove dimBlock.x above and bellow
225
- unsigned int dimGridX = multipleDimBlockX / dimBlock.x ;
226
-
227
- unsigned int multipleDimBlockY =
228
- dimBlock.y * ((int )(numCalculation.y * invDimBlockY) + addFactor);
229
- unsigned int dimGridY = multipleDimBlockY / dimBlock.y ;
230
-
231
- unsigned int multipleDimBlockZ =
232
- dimBlock.z * ((int )(numCalculation.z * invDimBlockZ) + addFactor);
233
- unsigned int dimGridZ = multipleDimBlockZ / dimBlock.z ;
234
-
235
- if (dimGridX < 1 || dimGridY < 1 || dimGridZ < 1 ) {
236
- Log::log (). debugLogError ( " Threads group size " + std::to_string (dimGridX) +
237
- std::to_string (dimGridY ) +
238
- std::to_string (dimGridZ) +
239
- " must be above zero." );
240
- }
241
-
242
- return dim3{dimGridX, dimGridY, dimGridZ};
243
- }
163
+ inline dim3 calculateDimGrid (dim3 dimBlock, dim3 numCalculation, bool getUp = true ,
164
+ bool mustDoAllCalculation = false )
165
+ {
166
+ int addFactor = getUp ? 1 : 0 ;
167
+ float invDimBlockX = 1 .0f / dimBlock.x ;
168
+ float invDimBlockY = 1 .0f / dimBlock.y ;
169
+ float invDimBlockZ = 1 .0f / dimBlock.z ;
170
+
171
+ if (mustDoAllCalculation)
172
+ {
173
+ if (numCalculation. x % dimBlock. x != 0 || numCalculation.y % dimBlock.y != 0 ||
174
+ numCalculation.z % dimBlock.z != 0 )
175
+ {
176
+ Log::log (). debugLogError (
177
+ " Number of threads per block ( " + std::to_string (dimBlock.x ) + " , " +
178
+ std::to_string (dimBlock. y ) + " , " + std::to_string (dimBlock. z ) +
179
+ " ) "
180
+ " is not a multiple of ( " +
181
+ std::to_string (numCalculation. x ) + " , " + std::to_string (numCalculation.y ) + " , " +
182
+ std::to_string (numCalculation.z ) +
183
+ " )"
184
+ " , therefore the compute shader will not compute on all data." );
185
+ }
186
+ }
187
+
188
+ unsigned int multipleDimBlockX =
189
+ dimBlock.x * ((int )(numCalculation.x * invDimBlockX) + addFactor);
190
+ // unsigned int multipleDimBlockX =
191
+ // dimBlock.x * (numCalculation.x / dimBlock.x) + addFactor);
192
+ // TODO remove dimBlock.x above and bellow
193
+ unsigned int dimGridX = multipleDimBlockX / dimBlock.x ;
194
+
195
+ unsigned int multipleDimBlockY =
196
+ dimBlock.y * ((int )(numCalculation.y * invDimBlockY) + addFactor);
197
+ unsigned int dimGridY = multipleDimBlockY / dimBlock.y ;
198
+
199
+ unsigned int multipleDimBlockZ =
200
+ dimBlock.z * ((int )(numCalculation.z * invDimBlockZ) + addFactor);
201
+ unsigned int dimGridZ = multipleDimBlockZ / dimBlock.z ;
202
+
203
+ if (dimGridX < 1 || dimGridY < 1 || dimGridZ < 1 )
204
+ {
205
+ Log::log (). debugLogError ( " Threads group size " + std::to_string (dimGridX ) +
206
+ std::to_string (dimGridY) + std::to_string (dimGridZ) +
207
+ " must be above zero." );
208
+ }
209
+
210
+ return dim3{dimGridX, dimGridY, dimGridZ};
211
+ }
0 commit comments