add CUDA_GLU_BLOCK_SIZE [no ci]

CISC · qnixsynapse · commit cfa9c7a47a1e · 2025-06-22T10:37:25.000+05:30
diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu
@@ -213,8 +213,8 @@ static __global__ void unary_gated_op_kernel(const T * x, T * dst, const int k,
 
 template <float (*op)(float), typename T>
 static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) {
-    const int num_blocks = (k + CUDA_NEG_BLOCK_SIZE - 1) / CUDA_NEG_BLOCK_SIZE;
-    unary_gated_op_kernel<op><<<num_blocks, CUDA_NEG_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
+    const int num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE;
+    unary_gated_op_kernel<op><<<num_blocks, CUDA_GLU_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
 }
 
 template <float (*op)(float)>
diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh
@@ -15,6 +15,7 @@
 #define CUDA_SQRT_BLOCK_SIZE 256
 #define CUDA_SIN_BLOCK_SIZE 256
 #define CUDA_COS_BLOCK_SIZE 256
+#define CUDA_GLU_BLOCK_SIZE 256
 
 void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
 

Original file line number	Diff line number	Diff line change
`@@ -213,8 +213,8 @@ static __global__ void unary_gated_op_kernel(const T * x, T * dst, const int k,`
`213`	`213`
`214`	`214`	`template <float (*op)(float), typename T>`
`215`	`215`	`static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) {`
`216`		`- const int num_blocks = (k + CUDA_NEG_BLOCK_SIZE - 1) / CUDA_NEG_BLOCK_SIZE;`
`217`		`- unary_gated_op_kernel<op><<<num_blocks, CUDA_NEG_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);`
	`216`	`+ const int num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE;`
	`217`	`+ unary_gated_op_kernel<op><<<num_blocks, CUDA_GLU_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);`
`218`	`218`	`}`
`219`	`219`
`220`	`220`	`template <float (*op)(float)>`