Skip to content

Commit adaa0ef

Browse files
-ngl 35 works
1 parent 9fb224c commit adaa0ef

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

ggml-cuda.cu

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6334,7 +6334,6 @@ void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * sr
63346334
GGML_ASSERT(src0->backend != GGML_BACKEND_GPU_SPLIT);
63356335
GGML_ASSERT(src0->nb[0] <= src0->nb[1] && src0->nb[2] <= src0->nb[3]); // 0213 permutation
63366336
GGML_ASSERT(src1->nb[0] <= src1->nb[1] && src1->nb[2] <= src1->nb[3]); // 0213 permutation
6337-
GGML_ASSERT(src0->type == GGML_TYPE_F16);
63386337
GGML_ASSERT(src1->type == GGML_TYPE_F32);
63396338

63406339
const int64_t ne00 = src0->ne[0];
@@ -6347,15 +6346,21 @@ void ggml_cuda_mul_mat_vec_p021(const ggml_tensor * src0, const ggml_tensor * sr
63476346
cudaStream_t cudaStream_main = g_cudaStreams_main[g_main_device];
63486347

63496348
struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu *) src0->extra;
6350-
void * src0_ddq = src0_extra->data_device[g_main_device];
6349+
char * src0_ddq = (char *) src0_extra->data_device[g_main_device];
63516350

63526351
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
63536352
float * src1_ddf = (float *) src1_extra->data_device[g_main_device];
63546353

63556354
struct ggml_tensor_extra_gpu * dst_extra = (ggml_tensor_extra_gpu *) dst->extra;
63566355
float * dst_ddf = (float *) dst_extra->data_device[g_main_device];
63576356

6358-
ggml_mul_mat_p021_f16_f32_cuda(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, ne02, ne12, cudaStream_main);
6357+
if (src0->type == GGML_TYPE_F16) {
6358+
ggml_mul_mat_p021_f16_f32_cuda(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, ne02, ne12, cudaStream_main);
6359+
} else if (ggml_is_quantized(src0->type)) {
6360+
ggml_cuda_op_mul_mat_vec(src0, src1, dst, src0_ddq, nullptr, src1_ddf, dst_ddf, 0, 0, ne01, 0, cudaStream_main);
6361+
} else {
6362+
GGML_ASSERT(false);
6363+
}
63596364
}
63606365

63616366
void ggml_cuda_mul_mat_vec_nc(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst){
@@ -6402,7 +6407,7 @@ void ggml_cuda_mul_mat(const ggml_tensor * src0, const ggml_tensor * src1, ggml_
64026407
src1->backend == GGML_BACKEND_GPU && dst->backend == GGML_BACKEND_GPU;
64036408
const bool src0_is_quantized = ggml_is_quantized(src0->type);
64046409

6405-
if (all_on_device && !src0_is_quantized && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
6410+
if (all_on_device && ggml_is_permuted(src0) && ggml_is_permuted(src1) && src1->ne[1] == 1) {
64066411
ggml_cuda_mul_mat_vec_p021(src0, src1, dst);
64076412
} else if (all_on_device && !ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && src1->ne[1] == 1) {
64086413
ggml_cuda_mul_mat_vec_nc(src0, src1, dst);

0 commit comments

Comments
 (0)