Skip to content

Commit 872d314

Browse files
committed
Revert "Use cuBLAS for large batches and quants with block size 16 (ikawrakow#559)"
This reverts commit 31bd318.
1 parent 627c6fc commit 872d314

File tree

1 file changed

+16
-18
lines changed

1 file changed

+16
-18
lines changed

ggml/src/ggml-cuda/mmq.cu

Lines changed: 16 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -163,33 +163,20 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
163163
bool mmq_supported;
164164

165165
switch (type) {
166-
case GGML_TYPE_Q2_K: mmq_supported = ne11 < 384; break;
167-
case GGML_TYPE_Q3_K:
168-
case GGML_TYPE_Q6_K:
169-
case GGML_TYPE_IQ2_XS:
170-
case GGML_TYPE_IQ2_S:
171-
mmq_supported = ne11 < 1536;
172-
break;
173-
case GGML_TYPE_IQ2_K:
174-
case GGML_TYPE_IQ3_K:
175-
case GGML_TYPE_IQ4_K:
176-
case GGML_TYPE_IQ5_K:
177-
case GGML_TYPE_IQ6_K:
178-
case GGML_TYPE_IQ2_K_R4:
179-
case GGML_TYPE_IQ3_K_R4:
180-
case GGML_TYPE_IQ4_K_R4:
181-
case GGML_TYPE_IQ5_K_R4:
182-
mmq_supported = ne11 < 1024;
183-
break;
184166
case GGML_TYPE_Q4_0:
185167
case GGML_TYPE_Q4_1:
186168
case GGML_TYPE_Q5_0:
187169
case GGML_TYPE_Q5_1:
188170
case GGML_TYPE_Q6_0:
189171
case GGML_TYPE_Q8_0:
172+
case GGML_TYPE_Q2_K:
173+
case GGML_TYPE_Q3_K:
190174
case GGML_TYPE_Q4_K:
191175
case GGML_TYPE_Q5_K:
176+
case GGML_TYPE_Q6_K:
192177
case GGML_TYPE_IQ2_XXS:
178+
case GGML_TYPE_IQ2_XS:
179+
case GGML_TYPE_IQ2_S:
193180
case GGML_TYPE_IQ3_XXS:
194181
case GGML_TYPE_IQ3_S:
195182
case GGML_TYPE_IQ1_S:
@@ -201,11 +188,22 @@ bool ggml_cuda_should_use_mmq(enum ggml_type type, int cc, int64_t ne11) {
201188
case GGML_TYPE_IQ5_KS:
202189
case GGML_TYPE_IQ5_KS_R4:
203190
case GGML_TYPE_IQ2_KS:
191+
case GGML_TYPE_IQ2_K:
192+
case GGML_TYPE_IQ3_K:
193+
case GGML_TYPE_IQ4_K:
194+
case GGML_TYPE_IQ5_K:
195+
case GGML_TYPE_IQ6_K:
204196
case GGML_TYPE_IQ2_KT:
205197
case GGML_TYPE_IQ3_KT:
206198
case GGML_TYPE_IQ4_KT:
207199
mmq_supported = true;
208200
break;
201+
case GGML_TYPE_IQ2_K_R4:
202+
case GGML_TYPE_IQ3_K_R4:
203+
case GGML_TYPE_IQ4_K_R4:
204+
case GGML_TYPE_IQ5_K_R4:
205+
mmq_supported = ne11 < 1024;
206+
break;
209207
default:
210208
mmq_supported = false;
211209
break;

0 commit comments

Comments
 (0)