|
5 | 5 | #include "ggml-quants.h"
|
6 | 6 | #include "quants.h"
|
7 | 7 |
|
8 |
| -#if defined(__APPLE__) |
9 |
| -#include "apple-fallback.h" |
10 |
| -#endif |
| 8 | +#include "arch-fallback.h" |
11 | 9 |
|
12 | 10 | #include <string.h>
|
13 | 11 | #include <assert.h>
|
@@ -42,12 +40,10 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in
|
42 | 40 | void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
43 | 41 | quantize_row_q8_0_ref(x, y, k);
|
44 | 42 | }
|
45 |
| -GGML_CPU_NATIVE_IMPL(quantize_row_q8_0) |
46 | 43 |
|
47 | 44 | void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
48 | 45 | quantize_row_q8_1_ref(x, y, k);
|
49 | 46 | }
|
50 |
| -GGML_CPU_NATIVE_IMPL(quantize_row_q8_1) |
51 | 47 |
|
52 | 48 | //
|
53 | 49 | // 2-6 bit quantization in super-blocks
|
@@ -108,7 +104,6 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy,
|
108 | 104 | void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
|
109 | 105 | quantize_row_q8_K_ref(x, y, k);
|
110 | 106 | }
|
111 |
| -GGML_CPU_NATIVE_IMPL(quantize_row_q8_K) |
112 | 107 |
|
113 | 108 | //===================================== Dot products =================================
|
114 | 109 |
|
@@ -147,7 +142,6 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
147 | 142 |
|
148 | 143 | *s = sumf;
|
149 | 144 | }
|
150 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_0_q8_0) |
151 | 145 |
|
152 | 146 | // TODO: add WASM SIMD
|
153 | 147 | void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
@@ -185,7 +179,6 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
185 | 179 |
|
186 | 180 | *s = sumf;
|
187 | 181 | }
|
188 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_1_q8_1) |
189 | 182 |
|
190 | 183 | void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
191 | 184 | const int qk = QK8_0;
|
@@ -229,7 +222,6 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
229 | 222 |
|
230 | 223 | *s = sumf;
|
231 | 224 | }
|
232 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_0_q8_0) |
233 | 225 |
|
234 | 226 | void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
235 | 227 | const int qk = QK8_1;
|
@@ -273,7 +265,6 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
273 | 265 |
|
274 | 266 | *s = sumf;
|
275 | 267 | }
|
276 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_1_q8_1) |
277 | 268 |
|
278 | 269 | void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
279 | 270 | const int qk = QK8_0;
|
@@ -304,7 +295,6 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
304 | 295 |
|
305 | 296 | *s = sumf;
|
306 | 297 | }
|
307 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q8_0_q8_0) |
308 | 298 |
|
309 | 299 | void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
310 | 300 | assert(nrc == 1);
|
@@ -357,7 +347,6 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
357 | 347 |
|
358 | 348 | *s = sumf;
|
359 | 349 | }
|
360 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq1_0_q8_K) |
361 | 350 |
|
362 | 351 | void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
363 | 352 | assert(nrc == 1);
|
@@ -390,7 +379,6 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
390 | 379 |
|
391 | 380 | *s = sumf;
|
392 | 381 | }
|
393 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq2_0_q8_K) |
394 | 382 |
|
395 | 383 | void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
396 | 384 | assert(nrc == 1);
|
@@ -443,7 +431,6 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
443 | 431 | }
|
444 | 432 | *s = sumf;
|
445 | 433 | }
|
446 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q2_K_q8_K) |
447 | 434 |
|
448 | 435 | void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
449 | 436 | assert(n % QK_K == 0);
|
@@ -523,7 +510,6 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
523 | 510 | for (int l = 0; l < 8; ++l) sumf += sums[l];
|
524 | 511 | *s = sumf;
|
525 | 512 | }
|
526 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q3_K_q8_K) |
527 | 513 |
|
528 | 514 | void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
529 | 515 | assert(n % QK_K == 0);
|
@@ -599,7 +585,6 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
599 | 585 | for (int l = 0; l < 8; ++l) sumf += sums[l];
|
600 | 586 | *s = sumf;
|
601 | 587 | }
|
602 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_K_q8_K) |
603 | 588 |
|
604 | 589 | void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
605 | 590 | assert(n % QK_K == 0);
|
@@ -680,7 +665,6 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
680 | 665 | for (int l = 0; l < 8; ++l) sumf += sums[l];
|
681 | 666 | *s = sumf;
|
682 | 667 | }
|
683 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_K_q8_K) |
684 | 668 |
|
685 | 669 | void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
686 | 670 | assert(n % QK_K == 0);
|
@@ -736,7 +720,6 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c
|
736 | 720 | for (int l = 0; l < 8; ++l) sumf += sums[l];
|
737 | 721 | *s = sumf;
|
738 | 722 | }
|
739 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q6_K_q8_K) |
740 | 723 |
|
741 | 724 | void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
742 | 725 | assert(n % QK_K == 0);
|
@@ -779,7 +762,6 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
|
779 | 762 | }
|
780 | 763 | *s = 0.125f * sumf;
|
781 | 764 | }
|
782 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xxs_q8_K) |
783 | 765 |
|
784 | 766 | void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
785 | 767 | assert(n % QK_K == 0);
|
@@ -830,7 +812,6 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
830 | 812 | }
|
831 | 813 | *s = 0.125f * sumf;
|
832 | 814 | }
|
833 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xs_q8_K) |
834 | 815 |
|
835 | 816 | void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
836 | 817 | assert(n % QK_K == 0);
|
@@ -883,7 +864,6 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
883 | 864 |
|
884 | 865 | *s = 0.125f * sumf;
|
885 | 866 | }
|
886 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_s_q8_K) |
887 | 867 |
|
888 | 868 | void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
889 | 869 | assert(n % QK_K == 0);
|
@@ -928,7 +908,6 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs
|
928 | 908 | }
|
929 | 909 | *s = 0.25f * sumf;
|
930 | 910 | }
|
931 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_xxs_q8_K) |
932 | 911 |
|
933 | 912 | void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
934 | 913 | assert(n % QK_K == 0);
|
@@ -985,7 +964,6 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
985 | 964 | }
|
986 | 965 | *s = sumf;
|
987 | 966 | }
|
988 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_s_q8_K) |
989 | 967 |
|
990 | 968 | void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
991 | 969 | assert(n % QK_K == 0);
|
@@ -1029,7 +1007,6 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
1029 | 1007 |
|
1030 | 1008 | *s = sumf;
|
1031 | 1009 | }
|
1032 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_s_q8_K) |
1033 | 1010 |
|
1034 | 1011 | void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
1035 | 1012 | assert(n % QK_K == 0);
|
@@ -1091,7 +1068,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
1091 | 1068 |
|
1092 | 1069 | *s = sumf;
|
1093 | 1070 | }
|
1094 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_m_q8_K) |
1095 | 1071 |
|
1096 | 1072 | void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
1097 | 1073 | assert(nrc == 1);
|
@@ -1121,7 +1097,6 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
1121 | 1097 | }
|
1122 | 1098 | *s = sumf;
|
1123 | 1099 | }
|
1124 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_nl_q8_0) |
1125 | 1100 |
|
1126 | 1101 | void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) {
|
1127 | 1102 | assert(nrc == 1);
|
@@ -1168,7 +1143,6 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs,
|
1168 | 1143 | }
|
1169 | 1144 | *s = sumf;
|
1170 | 1145 | }
|
1171 |
| -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_xs_q8_K) |
1172 | 1146 |
|
1173 | 1147 | // ============================ 4-bit non-linear quants
|
1174 | 1148 |
|
|
0 commit comments