Skip to content

Commit 7c5f444

Browse files
committed
Convergence LCPP/IKL and cleanup part 2
1 parent c7a3665 commit 7c5f444

File tree

16 files changed

+123
-116
lines changed

16 files changed

+123
-116
lines changed

ggml/src/ggml-cpu/ggml-cpu-quants.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
#include "ggml-cpu-impl.h"
88
#include "ggml-cpu.h"
99

10+
#include "iqk_croco/iqk_quantize_croco.h"
11+
1012
#include <math.h>
1113
#include <string.h>
1214
#include <assert.h>

ggml/src/ggml-cuda/convert.cu

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
#include "convert.cuh"
22
#include "dequantize.cuh"
33

4+
#include <cstdint>
5+
46
#define CUDA_Q8_0_NE_ALIGN 2048
57

68
#define UNUSED GGML_UNUSED
@@ -1464,10 +1466,10 @@ to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
14641466
return dequantize_row_iq1_s_cuda;
14651467
case GGML_TYPE_IQ1_M:
14661468
return dequantize_row_iq1_m_cuda;
1467-
// case GGML_TYPE_IQ1_BN:
1468-
// return dequantize_row_iq1_bn_cuda;
1469-
// case GGML_TYPE_IQ2_BN:
1470-
// return dequantize_row_iq2_bn_cuda;
1469+
case GGML_TYPE_IQ1_BN:
1470+
return dequantize_row_iq1_bn_cuda;
1471+
case GGML_TYPE_IQ2_BN:
1472+
return dequantize_row_iq2_bn_cuda;
14711473
case GGML_TYPE_IQ4_NL:
14721474
return dequantize_row_iq4_nl_cuda;
14731475
case GGML_TYPE_IQ4_XS:
@@ -1548,10 +1550,10 @@ to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
15481550
return dequantize_row_iq1_s_cuda;
15491551
case GGML_TYPE_IQ1_M:
15501552
return dequantize_row_iq1_m_cuda;
1551-
// case GGML_TYPE_IQ1_BN:
1552-
// return dequantize_row_iq1_bn_cuda;
1553-
// case GGML_TYPE_IQ2_BN:
1554-
// return dequantize_row_iq2_bn_cuda;
1553+
case GGML_TYPE_IQ1_BN:
1554+
return dequantize_row_iq1_bn_cuda;
1555+
case GGML_TYPE_IQ2_BN:
1556+
return dequantize_row_iq2_bn_cuda;
15551557
case GGML_TYPE_IQ4_NL:
15561558
return dequantize_row_iq4_nl_cuda;
15571559
case GGML_TYPE_IQ4_XS:

ggml/src/ggml-cuda/fattn-tile-f16.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ void ggml_cuda_flash_attn_ext_tile_f16(ggml_backend_cuda_context & ctx, ggml_ten
329329
const ggml_tensor * Q = dst->src[0];
330330

331331
const int32_t precision = KQV->op_params[3];
332-
// GGML_ASSERT_CONTINUE(precision == GGML_PREC_DEFAULT);
332+
GGML_ASSERT_CONTINUE(precision == GGML_PREC_DEFAULT);
333333

334334
float logit_softcap;
335335
memcpy(&logit_softcap, (const float *) KQV->op_params + 2, sizeof(float));

ggml/src/ggml-cuda/fattn-vec-f16.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -370,7 +370,7 @@ void ggml_cuda_flash_attn_ext_vec_f16_case(ggml_backend_cuda_context & ctx, ggml
370370
const ggml_tensor * V = dst->src[2];
371371

372372
const int32_t precision = KQV->op_params[3];
373-
// GGML_ASSERT_CONTINUE(precision == GGML_PREC_DEFAULT);
373+
GGML_ASSERT_CONTINUE(precision == GGML_PREC_DEFAULT);
374374

375375
GGML_ASSERT(K->type == type_K);
376376
GGML_ASSERT(V->type == type_V);

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -466,6 +466,7 @@ struct ggml_cuda_pool_vmm : public ggml_cuda_pool {
466466
#if defined(GGML_USE_HIP)
467467
mappings.push_back({start_ptr, reserve_size});
468468
#endif
469+
469470
// the memory allocation handle is no longer needed after mapping
470471
CU_CHECK(cuMemRelease(handle));
471472

ggml/src/ggml-cuda/iqk_mmvq.cu

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -919,3 +919,4 @@ void mul_mat_vec_iq2_bn_q8_1_cuda(
919919

920920
iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ2_BN, 1, vec_dot_iq2_bn_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
921921
}
922+

ggml/src/ggml-cuda/mma.cuh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,6 @@ namespace ggml_cuda_mma {
216216

217217
static __device__ __forceinline__ void mma(
218218
tile<16, 8, int> & D, const tile<16, 4, int> & A, const tile<8, 4, int> & B) {
219-
220219
#ifdef NEW_MMA_AVAILABLE
221220
#if __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE
222221
asm("mma.sync.aligned.m16n8k16.row.col.s32.s8.s8.s32 {%0, %1, %2, %3}, {%4, %5}, {%6}, {%0, %1, %2, %3};"
@@ -334,7 +333,6 @@ namespace ggml_cuda_mma {
334333

335334
static __device__ __forceinline__ void mma(
336335
tile<16, 8, float> & D, const tile<16, 8, half2> & A, const tile<8, 8, half2> & B) {
337-
338336
#ifdef NEW_MMA_AVAILABLE
339337
const int * Axi = (const int *) A.x;
340338
const int * Bxi = (const int *) B.x;

ggml/src/ggml-cuda/mmq.cu

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
#include "mmq.cuh"
2+
#include "quantize.cuh"
3+
4+
#include <vector>
25

36
void ggml_cuda_op_mul_mat_q(
47
ggml_backend_cuda_context & ctx,

ggml/src/ggml-cuda/template-instances/generate_cu_files.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
"GGML_TYPE_IQ2_XXS", "GGML_TYPE_IQ2_XS", "GGML_TYPE_IQ2_S", "GGML_TYPE_IQ3_XXS", "GGML_TYPE_IQ3_S",
2828
"GGML_TYPE_IQ1_S", "GGML_TYPE_IQ4_NL", "GGML_TYPE_IQ4_XS", "GGML_TYPE_Q6_0",
2929
"GGML_TYPE_IQ2_KS", "GGML_TYPE_IQ4_KS", "GGML_TYPE_IQ5_KS",
30-
"GGML_TYPE_IQ2_K", "GGML_TYPE_IQ3_K", "GGML_TYPE_IQ4_K", "GGML_TYPE_IQ5_K", "GGML_TYPE_IQ6_K"
30+
"GGML_TYPE_IQ2_K",
31+
"GGML_TYPE_IQ3_K", "GGML_TYPE_IQ4_K", "GGML_TYPE_IQ5_K", "GGML_TYPE_IQ6_K"
3132
]
3233

3334
SOURCE_MMQ = """// This file has been autogenerated by generate_cu_files.py, do not edit manually.

ggml/src/ggml-cuda/vendors/cuda.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
#include <cublas_v2.h>
66
#include <cuda_bf16.h>
77
#include <cuda_fp16.h>
8-
#include <cuda_bf16.h>
98

109
#if CUDART_VERSION < 11020
1110
#define CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED CU_DEVICE_ATTRIBUTE_VIRTUAL_ADDRESS_MANAGEMENT_SUPPORTED

0 commit comments

Comments
 (0)