Skip to content

Commit 878aa4f

Browse files
authored
Apply suggestions from code review
These changes plus: ```c++ #define cublasGemmBatchedEx hipblasGemmBatchedEx ``` are needed to compile with ROCM. I haven't done performance testing, but it seems to work. I couldn't figure out how to propose a change for lines outside what the pull changed, also this is the first time trying to create a multi-part review so please forgive me if I mess something up.
1 parent c13fcfb commit 878aa4f

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

ggml-cuda.cu

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7154,9 +7154,9 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
71547154
}
71557155

71567156
// allocate device memory for pointers
7157-
void ** src0_ptrs_as = nullptr;
7158-
void ** src1_ptrs_as = nullptr;
7159-
void ** dst_ptrs_as = nullptr;
7157+
const void ** src0_ptrs_as = nullptr;
7158+
const void ** src1_ptrs_as = nullptr;
7159+
void ** dst_ptrs_as = nullptr;
71607160

71617161
CUDA_CHECK(cudaMalloc(&src0_ptrs_as, ne23*sizeof(void *)));
71627162
CUDA_CHECK(cudaMalloc(&src1_ptrs_as, ne23*sizeof(void *)));
@@ -7170,9 +7170,9 @@ static void ggml_cuda_mul_mat_mat_batched_cublas(const ggml_tensor * src0, const
71707170
CUBLAS_CHECK(
71717171
cublasGemmBatchedEx(g_cublas_handles[id], CUBLAS_OP_T, CUBLAS_OP_N,
71727172
ne01, ne11, ne10,
7173-
&alpha_f16, (void **) src0_ptrs_as, CUDA_R_16F, nb01/sizeof(half),
7174-
(void **) src1_ptrs_as, CUDA_R_16F, nb11/sizeof(float),
7175-
&beta_f16, (void **) dst_ptrs_as, CUDA_R_16F, ne01,
7173+
&alpha_f16, (const void **) src0_ptrs_as, CUDA_R_16F, nb01/sizeof(half),
7174+
(const void **) src1_ptrs_as, CUDA_R_16F, nb11/sizeof(float),
7175+
&beta_f16, ( void **) dst_ptrs_as, CUDA_R_16F, ne01,
71767176
ne23,
71777177
CUBLAS_COMPUTE_16F,
71787178
CUBLAS_GEMM_DEFAULT_TENSOR_OP));

0 commit comments

Comments
 (0)