File tree Expand file tree Collapse file tree 3 files changed +30
-25
lines changed Expand file tree Collapse file tree 3 files changed +30
-25
lines changed Original file line number Diff line number Diff line change @@ -176,17 +176,20 @@ static const char * cu_get_error_str(CUresult err) {
176
176
#endif
177
177
178
178
#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
179
- #define CUDA_SET_SHARED_MEMORY_LIMIT (kernel, nbytes ) \
180
- do { \
181
- static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false }; \
182
- const int id = ggml_cuda_get_device (); \
183
- if (!shared_memory_limit_raised[id]) { \
184
- CUDA_CHECK (cudaFuncSetAttribute (kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, nbytes)); \
185
- shared_memory_limit_raised[id] = true ; \
186
- } \
187
- } while (0 )
179
+ # define CUDA_SET_SHARED_MEMORY_LIMIT (kernel, nbytes ) \
180
+ do { \
181
+ static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = { false }; \
182
+ const int id = ggml_cuda_get_device (); \
183
+ if (!shared_memory_limit_raised[id]) { \
184
+ CUDA_CHECK (cudaFuncSetAttribute (kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, nbytes)); \
185
+ shared_memory_limit_raised[id] = true ; \
186
+ } \
187
+ } while (0 )
188
188
#else
189
- #define CUDA_SET_SHARED_MEMORY_LIMIT (kernel, nbytes ) do {} while (0 )
189
+ # define CUDA_SET_SHARED_MEMORY_LIMIT (kernel, nbytes ) \
190
+ do { \
191
+ GGML_UNUSED (nbytes); \
192
+ } while (0 )
190
193
#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA)
191
194
192
195
#if CUDART_VERSION >= 11010 || defined(GGML_USE_MUSA)
Original file line number Diff line number Diff line change @@ -299,14 +299,14 @@ static __global__ void flash_attn_tile_ext_f32(
299
299
GGML_UNUSED (dst); GGML_UNUSED (dst_meta); GGML_UNUSED (scale);
300
300
GGML_UNUSED (max_bias); GGML_UNUSED (m0); GGML_UNUSED (m1);
301
301
GGML_UNUSED (n_head_log2); GGML_UNUSED (logit_softcap);
302
- GGML_UNUSED (ne00); GGML_UNUSED (ne01); GGML_UNUSED (ne02);
303
- GGML_UNUSED (ne03 ); GGML_UNUSED (ne10 ); GGML_UNUSED (ne11 );
304
- GGML_UNUSED (ne12 ); GGML_UNUSED (ne13); GGML_UNUSED (ne31 );
305
- GGML_UNUSED (nb31); GGML_UNUSED (nb01); GGML_UNUSED (nb02 );
306
- GGML_UNUSED (nb03 ); GGML_UNUSED (nb11 ); GGML_UNUSED (nb12 );
307
- GGML_UNUSED (nb13 ); GGML_UNUSED (nb21 ); GGML_UNUSED (nb22 );
308
- GGML_UNUSED (nb23 ); GGML_UNUSED (ne0 ); GGML_UNUSED (ne1 );
309
- GGML_UNUSED (ne2); GGML_UNUSED (ne3);
302
+ GGML_UNUSED (ne00); GGML_UNUSED (ne01); GGML_UNUSED (ne02); GGML_UNUSED (ne03);
303
+ GGML_UNUSED (ne10 ); GGML_UNUSED (ne11 ); GGML_UNUSED (ne12); GGML_UNUSED (ne13 );
304
+ GGML_UNUSED (ne31 ); GGML_UNUSED (ne32 );
305
+ GGML_UNUSED (nb31); GGML_UNUSED (nb32 );
306
+ GGML_UNUSED (nb01 ); GGML_UNUSED (nb02 ); GGML_UNUSED (nb03 );
307
+ GGML_UNUSED (nb11 ); GGML_UNUSED (nb12 ); GGML_UNUSED (nb13 );
308
+ GGML_UNUSED (nb21 ); GGML_UNUSED (nb22 ); GGML_UNUSED (nb23 );
309
+ GGML_UNUSED (ne0); GGML_UNUSED (ne1); GGML_UNUSED ( ne2); GGML_UNUSED (ne3);
310
310
NO_DEVICE_CODE;
311
311
#endif // FLASH_ATTN_AVAILABLE
312
312
}
Original file line number Diff line number Diff line change @@ -337,13 +337,15 @@ static __global__ void flash_attn_vec_ext_f32(
337
337
GGML_UNUSED (Q); GGML_UNUSED (K); GGML_UNUSED (V); GGML_UNUSED (mask);
338
338
GGML_UNUSED (dst); GGML_UNUSED (dst_meta); GGML_UNUSED (scale);
339
339
GGML_UNUSED (max_bias); GGML_UNUSED (m0); GGML_UNUSED (m1);
340
- GGML_UNUSED (n_head_log2); GGML_UNUSED (logit_softcap); GGML_UNUSED (ne00);
341
- GGML_UNUSED (ne01); GGML_UNUSED (ne02); GGML_UNUSED (ne03); GGML_UNUSED (ne10);
342
- GGML_UNUSED (ne11); GGML_UNUSED (ne12); GGML_UNUSED (ne13); GGML_UNUSED (ne31);
343
- GGML_UNUSED (nb31); GGML_UNUSED (nb01); GGML_UNUSED (nb02); GGML_UNUSED (nb03);
344
- GGML_UNUSED (nb11); GGML_UNUSED (nb12); GGML_UNUSED (nb13); GGML_UNUSED (nb21);
345
- GGML_UNUSED (nb22); GGML_UNUSED (nb23); GGML_UNUSED (ne0); GGML_UNUSED (ne1);
346
- GGML_UNUSED (ne2); GGML_UNUSED (ne3);
340
+ GGML_UNUSED (n_head_log2); GGML_UNUSED (logit_softcap);
341
+ GGML_UNUSED (ne00); GGML_UNUSED (ne01); GGML_UNUSED (ne02); GGML_UNUSED (ne03);
342
+ GGML_UNUSED (ne10); GGML_UNUSED (ne11); GGML_UNUSED (ne12); GGML_UNUSED (ne13);
343
+ GGML_UNUSED (ne31); GGML_UNUSED (ne32);
344
+ GGML_UNUSED (nb31); GGML_UNUSED (nb32);
345
+ GGML_UNUSED (nb01); GGML_UNUSED (nb02); GGML_UNUSED (nb03);
346
+ GGML_UNUSED (nb11); GGML_UNUSED (nb12); GGML_UNUSED (nb13);
347
+ GGML_UNUSED (nb21); GGML_UNUSED (nb22); GGML_UNUSED (nb23);
348
+ GGML_UNUSED (ne0); GGML_UNUSED (ne1); GGML_UNUSED (ne2); GGML_UNUSED (ne3);
347
349
NO_DEVICE_CODE;
348
350
#endif // FLASH_ATTN_AVAILABLE
349
351
}
You can’t perform that action at this time.
0 commit comments