Skip to content

Commit 81f4797

Browse files
committed
Merge branch 'master' into huydt/mb
2 parents 1004327 + d17a809 commit 81f4797

32 files changed

+1048
-451
lines changed

CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,11 @@ if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
159159
# ... otherwise assume ggml is added by a parent CMakeLists.txt
160160
endif()
161161

162+
if (MINGW)
163+
# Target Windows 8 for PrefetchVirtualMemory
164+
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
165+
endif()
166+
162167
#
163168
# build the library
164169
#

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
44

55
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
6+
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
67
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
78

89
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggml-org/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)

ci/run.sh

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,20 @@ if [ ! -z ${GG_BUILD_METAL} ]; then
4646
fi
4747

4848
if [ ! -z ${GG_BUILD_CUDA} ]; then
49-
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
49+
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON"
50+
51+
if command -v nvidia-smi >/dev/null 2>&1; then
52+
CUDA_ARCH=$(nvidia-smi --query-gpu=compute_cap --format=csv,noheader,nounits 2>/dev/null | head -1 | tr -d '.')
53+
if [[ -n "$CUDA_ARCH" && "$CUDA_ARCH" =~ ^[0-9]+$ ]]; then
54+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=${CUDA_ARCH}"
55+
else
56+
echo "Warning: Using fallback CUDA architectures"
57+
CMAKE_EXTRA="${CMAKE_EXTRA} -DCMAKE_CUDA_ARCHITECTURES=61;70;75;80;86;89"
58+
fi
59+
else
60+
echo "Error: nvidia-smi not found, cannot build with CUDA"
61+
exit 1
62+
fi
5063
fi
5164

5265
if [ ! -z ${GG_BUILD_SYCL} ]; then

convert_hf_to_gguf.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3712,8 +3712,7 @@ def set_gguf_parameters(self):
37123712
self._try_set_pooling_type()
37133713

37143714
if self.cls_out_labels:
3715-
key_name = gguf.Keys.Classifier.OUTPUT_LABELS.format(arch = gguf.MODEL_ARCH_NAMES[self.model_arch])
3716-
self.gguf_writer.add_array(key_name, [v for k, v in sorted(self.cls_out_labels.items())])
3715+
self.gguf_writer.add_classifier_output_labels([v for k, v in sorted(self.cls_out_labels.items())])
37173716

37183717
def set_vocab(self):
37193718
tokens, toktypes, tokpre = self.get_vocab_base()

examples/embedding/embedding.cpp

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -236,9 +236,24 @@ int main(int argc, char ** argv) {
236236
LOG("\n");
237237
}
238238
} else if (pooling_type == LLAMA_POOLING_TYPE_RANK) {
239+
const uint32_t n_cls_out = llama_model_n_cls_out(model);
240+
std::vector<std::string> cls_out_labels;
241+
242+
for (uint32_t i = 0; i < n_cls_out; i++) {
243+
const char * label = llama_model_cls_label(model, i);
244+
const std::string label_i(label == nullptr ? "" : label);
245+
cls_out_labels.emplace_back(label_i.empty() ? std::to_string(i) : label_i);
246+
}
247+
239248
for (int j = 0; j < n_embd_count; j++) {
240-
// NOTE: if you change this log - update the tests in ci/run.sh
241-
LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]);
249+
for (uint32_t i = 0; i < n_cls_out; i++) {
250+
// NOTE: if you change this log - update the tests in ci/run.sh
251+
if (n_cls_out == 1) {
252+
LOG("rerank score %d: %8.3f\n", j, emb[j * n_embd]);
253+
} else {
254+
LOG("rerank score %d: %8.3f [%s]\n", j, emb[j * n_embd + i], cls_out_labels[i].c_str());
255+
}
256+
}
242257
}
243258
} else {
244259
// print the first part of the embeddings or for a single prompt, the full embedding

ggml/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM")
137137
set(GGML_CPU_POWERPC_CPUTYPE "" CACHE STRING "ggml: CPU type for PowerPC")
138138

139139

140-
if (WIN32)
140+
if (MINGW)
141141
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows version")
142142
endif()
143143

ggml/src/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,6 @@ if (NOT MSVC)
125125
endif()
126126

127127
if (MINGW)
128-
# Target Windows 8 for PrefetchVirtualMemory
129128
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
130129
endif()
131130

ggml/src/ggml-vulkan/ggml-vulkan.cpp

Lines changed: 158 additions & 30 deletions
Large diffs are not rendered by default.
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#version 450
2+
3+
#include "types.comp"
4+
5+
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // src0 - kernel: [K, Cout, Cin]
6+
layout (binding = 1) readonly buffer B {B_TYPE data_b[];}; // src1 - input: [L, Cin]
7+
layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; // dst - result [KL, Cout]
8+
9+
layout(local_size_x = 128 , local_size_y = 1, local_size_z = 1) in;
10+
11+
layout (push_constant) uniform parameter {
12+
uint32_t Cout;
13+
uint32_t Cin;
14+
uint32_t K;
15+
uint32_t L;
16+
uint32_t KL;
17+
18+
uint32_t nb01;
19+
uint32_t nb02;
20+
uint32_t nb11;
21+
uint32_t nb1;
22+
23+
int32_t s0;
24+
} p;
25+
26+
27+
uint32_t Cout_idx = gl_WorkGroupID.x;
28+
const uint32_t bs = gl_WorkGroupSize.x;
29+
uint32_t tid = gl_LocalInvocationID.x;
30+
// Code is more straightforward if we assume it is bs*s0+K instead of (bs-1)*s0+K.
31+
uint32_t tmp_len = bs*p.s0+p.K;
32+
shared D_TYPE tmp[4096];
33+
34+
uint splitWork(uint workSize){
35+
return (bs + workSize -1) / bs;
36+
}
37+
38+
void main(){
39+
for(uint32_t i = 0; i < splitWork(tmp_len); i++){
40+
uint32_t idx = i*bs+tid;
41+
if(idx < tmp_len){
42+
tmp[idx] = 0.0;
43+
}
44+
}
45+
46+
uint32_t L_blocks = splitWork(p.L);
47+
for(uint32_t L_block_id = 0; L_block_id < L_blocks; L_block_id++){
48+
if(L_block_id > 0){
49+
barrier();
50+
// Shift values in tmp to the current processing window
51+
for(int i = 0; i < splitWork(tmp_len); i++){
52+
uint32_t idx = i*bs+tid;
53+
if(idx >= bs*p.s0 && idx < tmp_len){
54+
tmp[idx-bs*p.s0] = tmp[idx];
55+
tmp[idx] = 0.0;
56+
}else if(idx >= p.K && idx < bs*p.s0){
57+
tmp[idx] = 0.0;
58+
}
59+
}
60+
}
61+
barrier();
62+
63+
// Save contributions of the block to tmp
64+
uint32_t L_idx = L_block_id*bs + tid;
65+
for(uint32_t K_idx = 0; K_idx < p.K; K_idx++){
66+
D_TYPE dp = 0.0;
67+
for(uint32_t Cin_idx = 0; Cin_idx < p.Cin; Cin_idx++){
68+
A_TYPE elemKrn = data_a[K_idx + Cout_idx * p.nb01 + Cin_idx * p.nb02];
69+
if(L_idx < p.L){
70+
B_TYPE elemInp = data_b[L_idx + Cin_idx*p.nb11];
71+
dp = fma(elemKrn, elemInp, dp);
72+
}
73+
}
74+
tmp[tid*p.s0 + K_idx] += dp;
75+
barrier();
76+
}
77+
78+
// Save the computed values except the last block that can have different size
79+
uint32_t KLb_idx = L_block_id*bs*p.s0;
80+
if(L_block_id < L_blocks-1){
81+
for(uint32_t s0_idx = 0; s0_idx < p.s0; s0_idx++){
82+
uint32_t sh_idx = p.s0*tid+s0_idx;
83+
uint32_t KL_idx = KLb_idx+sh_idx;
84+
if(KL_idx < p.KL){
85+
data_d[KL_idx + Cout_idx*p.nb1] = tmp[sh_idx];
86+
}
87+
}
88+
}
89+
}
90+
91+
for(uint32_t i = 0; i < splitWork(tmp_len); i++){
92+
uint32_t idx = i*bs+tid;
93+
uint32_t KL_idx = (L_blocks-1)*bs*p.s0+idx;
94+
if(KL_idx < p.KL){
95+
data_d[KL_idx + Cout_idx*p.nb1] = tmp[idx];
96+
}
97+
}
98+
}

ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,8 @@ void process_shaders() {
622622

623623
string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
624624

625+
string_to_spv("conv_transpose_1d_f32", "conv_transpose_1d.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
626+
625627
string_to_spv("pool2d_f32", "pool2d.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
626628

627629
string_to_spv("rwkv_wkv6_f32", "wkv6.comp", merge_maps(base_dict, {{"A_TYPE", "float"}}));

0 commit comments

Comments
 (0)