Skip to content

Commit 69c487f

Browse files
CUDA: MMQ code deduplication + iquant support (#8495)
* CUDA: MMQ code deduplication + iquant support * 1 less parallel job for CI build
1 parent 07283b1 commit 69c487f

File tree

11 files changed

+809
-648
lines changed

11 files changed

+809
-648
lines changed

.github/workflows/build.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -860,7 +860,7 @@ jobs:
860860
mkdir build
861861
cd build
862862
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
863-
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
863+
cmake --build . --config Release -j $((${env:NUMBER_OF_PROCESSORS} - 1))
864864
865865
- name: Determine tag name
866866
id: tag

ggml/src/ggml-cuda/mmq.cu

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,24 @@ void ggml_cuda_op_mul_mat_q(
5959
case GGML_TYPE_Q6_K:
6060
mul_mat_q_case<GGML_TYPE_Q6_K>(ctx, args, stream);
6161
break;
62+
case GGML_TYPE_IQ2_XXS:
63+
mul_mat_q_case<GGML_TYPE_IQ2_XXS>(ctx, args, stream);
64+
break;
65+
case GGML_TYPE_IQ2_XS:
66+
mul_mat_q_case<GGML_TYPE_IQ2_XS>(ctx, args, stream);
67+
break;
68+
case GGML_TYPE_IQ2_S:
69+
mul_mat_q_case<GGML_TYPE_IQ2_S>(ctx, args, stream);
70+
break;
71+
case GGML_TYPE_IQ3_XXS:
72+
mul_mat_q_case<GGML_TYPE_IQ3_XXS>(ctx, args, stream);
73+
break;
74+
case GGML_TYPE_IQ3_S:
75+
mul_mat_q_case<GGML_TYPE_IQ3_S>(ctx, args, stream);
76+
break;
77+
case GGML_TYPE_IQ1_S:
78+
mul_mat_q_case<GGML_TYPE_IQ1_S>(ctx, args, stream);
79+
break;
6280
case GGML_TYPE_IQ4_XS:
6381
mul_mat_q_case<GGML_TYPE_IQ4_XS>(ctx, args, stream);
6482
break;
@@ -93,6 +111,12 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
93111
case GGML_TYPE_Q4_K:
94112
case GGML_TYPE_Q5_K:
95113
case GGML_TYPE_Q6_K:
114+
case GGML_TYPE_IQ2_XXS:
115+
case GGML_TYPE_IQ2_XS:
116+
case GGML_TYPE_IQ2_S:
117+
case GGML_TYPE_IQ3_XXS:
118+
case GGML_TYPE_IQ3_S:
119+
case GGML_TYPE_IQ1_S:
96120
case GGML_TYPE_IQ4_XS:
97121
case GGML_TYPE_IQ4_NL:
98122
mmq_supported = true;

ggml/src/ggml-cuda/mmq.cuh

Lines changed: 731 additions & 646 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/template-instances/generate_cu_files.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@
2323
TYPES_MMQ = [
2424
"GGML_TYPE_Q4_0", "GGML_TYPE_Q4_1", "GGML_TYPE_Q5_0", "GGML_TYPE_Q5_1", "GGML_TYPE_Q8_0",
2525
"GGML_TYPE_Q2_K", "GGML_TYPE_Q3_K", "GGML_TYPE_Q4_K", "GGML_TYPE_Q5_K", "GGML_TYPE_Q6_K",
26-
"GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
26+
"GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
27+
"GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS"
2728
]
2829

2930
SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ1_S);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ2_S);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ2_XS);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ2_XXS);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ3_S);
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// This file has been autogenerated by generate_cu_files.py, do not edit manually.
2+
3+
#include "../mmq.cuh"
4+
5+
DECL_MMQ_CASE(GGML_TYPE_IQ3_XXS);

0 commit comments

Comments
 (0)