Skip to content

Commit cfa9c7a

Browse files
CISCqnixsynapse
authored andcommitted
add CUDA_GLU_BLOCK_SIZE [no ci]
1 parent d9ddeb9 commit cfa9c7a

File tree

2 files changed

+3
-2
lines changed

2 files changed

+3
-2
lines changed

ggml/src/ggml-cuda/unary.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,8 @@ static __global__ void unary_gated_op_kernel(const T * x, T * dst, const int k,
213213

214214
template <float (*op)(float), typename T>
215215
static void unary_gated_cuda(const T * x, T * dst, const int k, const int n, const int o, cudaStream_t stream) {
216-
const int num_blocks = (k + CUDA_NEG_BLOCK_SIZE - 1) / CUDA_NEG_BLOCK_SIZE;
217-
unary_gated_op_kernel<op><<<num_blocks, CUDA_NEG_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
216+
const int num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE;
217+
unary_gated_op_kernel<op><<<num_blocks, CUDA_GLU_BLOCK_SIZE, 0, stream>>>(x, dst, k, n, o);
218218
}
219219

220220
template <float (*op)(float)>

ggml/src/ggml-cuda/unary.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define CUDA_SQRT_BLOCK_SIZE 256
1616
#define CUDA_SIN_BLOCK_SIZE 256
1717
#define CUDA_COS_BLOCK_SIZE 256
18+
#define CUDA_GLU_BLOCK_SIZE 256
1819

1920
void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
2021

0 commit comments

Comments
 (0)