Skip to content

Commit 9e3c534

Browse files
committed
sync : ggml vulkan (ggml/0)
ggml-ci
1 parent b6c05ce commit 9e3c534

38 files changed

+1089
-144995
lines changed

ggml/src/ggml-vulkan-shaders.hpp

Lines changed: 0 additions & 144957 deletions
This file was deleted.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
find_package (Threads REQUIRED)
2+
3+
set(TARGET vulkan-shaders-gen)
4+
add_executable(${TARGET} vulkan-shaders-gen.cpp)
5+
install(TARGETS ${TARGET} RUNTIME)
6+
target_compile_features(${TARGET} PRIVATE cxx_std_11)
7+
target_link_libraries(vulkan-shaders-gen PUBLIC Threads::Threads)

ggml/src/vulkan-shaders/add.comp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,11 @@
44
#include "generic_binary_head.comp"
55

66
void main() {
7-
if (gl_GlobalInvocationID.x >= p.ne) {
7+
const uint idx = get_idx();
8+
9+
if (idx >= p.ne) {
810
return;
911
}
1012

11-
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]) + FLOAT_TYPE(data_b[src1_idx(gl_GlobalInvocationID.x)]));
13+
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(FLOAT_TYPE(data_a[src0_idx(idx)]) + FLOAT_TYPE(data_b[src1_idx(idx)]));
1214
}

ggml/src/vulkan-shaders/clamp.comp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
#include "generic_unary_head.comp"
55

66
void main() {
7-
if (gl_GlobalInvocationID.x >= p.ne) {
7+
const uint idx = get_idx();
8+
9+
if (idx >= p.ne) {
810
return;
911
}
1012

11-
const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]);
12-
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(val < p.param1 ? p.param1 : (val > p.param2 ? p.param2 : val));
13+
const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(idx)]);
14+
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(val < p.param1 ? p.param1 : (val > p.param2 ? p.param2 : val));
1315
}

ggml/src/vulkan-shaders/concat.comp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
#version 450
2+
3+
#include "types.comp"
4+
#include "generic_binary_head.comp"
5+
6+
void main() {
7+
const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
8+
const int dim = p.param3;
9+
10+
if (idx >= p.ne) {
11+
return;
12+
}
13+
14+
const uint i3 = idx / (p.ne22*p.ne21*p.ne20);
15+
const uint i3_offset = i3 * p.ne22*p.ne21*p.ne20;
16+
const uint i2 = (idx - i3_offset) / (p.ne21*p.ne20);
17+
const uint i2_offset = i2*p.ne21*p.ne20;
18+
const uint i1 = (idx - i3_offset - i2_offset) / p.ne20;
19+
const uint i0 = idx - i3_offset - i2_offset - i1*p.ne20;
20+
21+
uint o[4] = {0, 0, 0, 0};
22+
o[dim] = dim == 0 ? p.ne00 : (dim == 1 ? p.ne01 : (dim == 2 ? p.ne02 : p.ne03));
23+
24+
const uint src0_idx = i3*p.nb03 + i2*p.nb02 + i1*p.nb01 + i0*p.nb00;
25+
const uint src1_idx = (i3 - o[3])*p.nb13 + (i2 - o[2])*p.nb12 + (i1 - o[1])*p.nb11 + (i0 - o[0])*p.nb10;
26+
const uint dst_idx = i3*p.nb23 + i2*p.nb22 + i1*p.nb21 + i0*p.nb20;
27+
28+
const bool is_src0 = i0 < p.ne00 && i1 < p.ne01 && i2 < p.ne02 && i3 < p.ne03;
29+
30+
#ifndef OPTIMIZATION_ERROR_WORKAROUND
31+
data_d[p.d_offset + dst_idx] = D_TYPE(is_src0 ? data_a[src0_idx] : data_b[src1_idx]);
32+
#else
33+
data_d[p.d_offset + dst_idx] = is_src0 ? data_a[src0_idx] : data_b[src1_idx];
34+
#endif
35+
}

ggml/src/vulkan-shaders/copy.comp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,15 @@
44
#include "generic_unary_head.comp"
55

66
void main() {
7-
if (gl_GlobalInvocationID.x >= p.ne) {
7+
const uint idx = get_idx();
8+
9+
if (idx >= p.ne) {
810
return;
911
}
1012

1113
#ifndef OPTIMIZATION_ERROR_WORKAROUND
12-
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = D_TYPE(data_a[src0_idx(gl_GlobalInvocationID.x)]);
14+
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(data_a[src0_idx(idx)]);
1315
#else
14-
data_d[p.d_offset + dst_idx(gl_GlobalInvocationID.x)] = data_a[src0_idx(gl_GlobalInvocationID.x)];
16+
data_d[p.d_offset + dst_idx(idx)] = data_a[src0_idx(idx)];
1517
#endif
1618
}

ggml/src/vulkan-shaders/cos.comp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#version 450
2+
3+
#include "types.comp"
4+
#include "generic_unary_head.comp"
5+
6+
void main() {
7+
const uint idx = get_idx();
8+
9+
if (idx >= p.ne) {
10+
return;
11+
}
12+
13+
const FLOAT_TYPE val = FLOAT_TYPE(data_a[src0_idx(idx)]);
14+
data_d[p.d_offset + dst_idx(idx)] = D_TYPE(cos(val));
15+
}

ggml/src/vulkan-shaders/dequant_funcs.comp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,3 +58,11 @@ vec2 dequantize(uint ib, uint iqs, uint a_offset) {
5858
return vec2(int(data_a[a_offset + ib].qs[iqs]), int(data_a[a_offset + ib].qs[iqs + 1])) * d;
5959
}
6060
#endif
61+
62+
#if defined(DATA_A_IQ4_NL)
63+
vec2 dequantize(uint ib, uint iqs, uint a_offset) {
64+
const float d = float(data_a[a_offset + ib].d);
65+
const uint vui = uint(data_a[a_offset + ib].qs[iqs]);
66+
return vec2(kvalues_iq4nl[vui & 0xF], kvalues_iq4nl[vui >> 4]) * d;
67+
}
68+
#endif
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#version 450
2+
3+
#include "dequant_head.comp"
4+
5+
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
6+
7+
layout (binding = 0) readonly buffer A {block_iq4_nl data_a[];};
8+
layout (binding = 1) writeonly buffer D {D_TYPE data_b[];};
9+
10+
void main() {
11+
const uint i = gl_WorkGroupID.x * 4 + gl_LocalInvocationID.x / 64;
12+
13+
const uint tid = gl_LocalInvocationID.x % 64;
14+
const uint il = tid/32;
15+
const uint ir = tid%32;
16+
const uint ib = 32*i + ir;
17+
if (ib >= p.nel / 32) {
18+
return;
19+
}
20+
21+
const uint q_idx = 8*il;
22+
const uint b_idx = 1024*i + 32*ir + q_idx;
23+
24+
const float d = float(data_a[ib].d);
25+
26+
[[unroll]] for (uint l = 0; l < 8; ++l) {
27+
data_b[b_idx + l + 0] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] & 0xF]);
28+
data_b[b_idx + l + 16] = D_TYPE(d * kvalues_iq4nl[data_a[ib].qs[q_idx + l] >> 4]);
29+
}
30+
}

ggml/src/vulkan-shaders/dequant_q4_0.comp

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,13 @@ void main() {
1818
return;
1919
}
2020

21-
const uint b_idx = 1024*i + 32*ir + 8*il;
21+
const uint q_idx = 8*il;
22+
const uint b_idx = 1024*i + 32*ir + q_idx;
2223

2324
const float d = float(data_a[ib].d);
24-
const float dm = -8.0f * d;
25-
26-
const uint q_idx = 8*il;
2725

2826
[[unroll]] for (uint l = 0; l < 8; ++l) {
29-
data_b[b_idx + l + 0] = D_TYPE(d * (data_a[ib].qs[q_idx + l] & 0xF) + dm);
30-
data_b[b_idx + l + 16] = D_TYPE(d * (data_a[ib].qs[q_idx + l] >> 4) + dm);
27+
data_b[b_idx + l + 0] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] & 0xF) - 8.0f));
28+
data_b[b_idx + l + 16] = D_TYPE(d * ((data_a[ib].qs[q_idx + l] >> 4) - 8.0f));
3129
}
3230
}

0 commit comments

Comments
 (0)