Skip to content

Commit 1bcb13d

Browse files
committed
Merge branch 'pre-cpu-refactor' into croco_nex_0
2 parents 4ce13c8 + 822cf24 commit 1bcb13d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2273
-2628
lines changed
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
name: Build on Linux using cross-compiler
2+
on:
3+
workflow_dispatch:
4+
workflow_call:
5+
6+
jobs:
7+
ubuntu-latest-riscv64-cpu-cross:
8+
runs-on: ubuntu-latest
9+
10+
steps:
11+
- uses: actions/checkout@v4
12+
- name: Setup Riscv
13+
run: |
14+
sudo dpkg --add-architecture riscv64
15+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
17+
sudo apt-get clean
18+
sudo apt-get update
19+
sudo apt-get install -y --no-install-recommends \
20+
build-essential \
21+
gcc-14-riscv64-linux-gnu \
22+
g++-14-riscv64-linux-gnu
23+
24+
- name: Build
25+
run: |
26+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
27+
-DGGML_OPENMP=OFF \
28+
-DLLAMA_BUILD_EXAMPLES=ON \
29+
-DLLAMA_BUILD_TESTS=OFF \
30+
-DCMAKE_SYSTEM_NAME=Linux \
31+
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
32+
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
33+
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
34+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
35+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
36+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
37+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
38+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
39+
40+
cmake --build build --config Release -j $(nproc)
41+
42+
ubuntu-latest-riscv64-vulkan-cross:
43+
runs-on: ubuntu-latest
44+
45+
steps:
46+
- uses: actions/checkout@v4
47+
with:
48+
fetch-depth: 0
49+
50+
- name: Setup Riscv
51+
run: |
52+
sudo dpkg --add-architecture riscv64
53+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
54+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
55+
sudo apt-get clean
56+
sudo apt-get update
57+
sudo apt-get install -y --no-install-recommends \
58+
build-essential \
59+
glslc \
60+
gcc-14-riscv64-linux-gnu \
61+
g++-14-riscv64-linux-gnu \
62+
libvulkan-dev:riscv64
63+
64+
- name: Build
65+
run: |
66+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
67+
-DGGML_VULKAN=ON \
68+
-DGGML_OPENMP=OFF \
69+
-DLLAMA_BUILD_EXAMPLES=ON \
70+
-DLLAMA_BUILD_TESTS=OFF \
71+
-DCMAKE_SYSTEM_NAME=Linux \
72+
-DCMAKE_SYSTEM_PROCESSOR=riscv64 \
73+
-DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
74+
-DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
75+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
76+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
77+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
78+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
79+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
80+
81+
cmake --build build --config Release -j $(nproc)
82+
83+
ubuntu-latest-arm64-vulkan-cross:
84+
runs-on: ubuntu-latest
85+
86+
steps:
87+
- uses: actions/checkout@v4
88+
with:
89+
fetch-depth: 0
90+
91+
- name: Setup Arm64
92+
run: |
93+
sudo dpkg --add-architecture arm64
94+
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
95+
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
96+
sudo apt-get clean
97+
sudo apt-get update
98+
sudo apt-get install -y --no-install-recommends \
99+
build-essential \
100+
glslc \
101+
crossbuild-essential-arm64 \
102+
libvulkan-dev:arm64
103+
104+
- name: Build
105+
run: |
106+
cmake -B build -DCMAKE_BUILD_TYPE=Release \
107+
-DGGML_VULKAN=ON \
108+
-DGGML_OPENMP=OFF \
109+
-DLLAMA_BUILD_EXAMPLES=ON \
110+
-DLLAMA_BUILD_TESTS=OFF \
111+
-DCMAKE_SYSTEM_NAME=Linux \
112+
-DCMAKE_SYSTEM_PROCESSOR=aarch64 \
113+
-DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
114+
-DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
115+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON \
116+
-DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
117+
-DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
118+
-DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
119+
-DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
120+
121+
cmake --build build --config Release -j $(nproc)

.github/workflows/kcpp-build-release-linux-cuda12.yaml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ env:
88
jobs:
99
linux:
1010
runs-on: ubuntu-22.04
11+
container:
12+
image: ubuntu:20.04
13+
options: --privileged
1114
steps:
1215
- name: Clone
1316
id: checkout
@@ -18,8 +21,18 @@ jobs:
1821
- name: Dependencies
1922
id: depends
2023
run: |
21-
sudo apt-get update
22-
sudo apt-get install git curl bzip2
24+
apt-get update
25+
apt-get install -y sudo
26+
export DEBIAN_FRONTEND=noninteractive
27+
sudo ln -fs /usr/share/zoneinfo/UTC /etc/localtime
28+
echo "tzdata tzdata/Areas select Etc" | sudo debconf-set-selections
29+
echo "tzdata tzdata/Zones/Etc select UTC" | sudo debconf-set-selections
30+
sudo apt-get -y install git curl bzip2 python3-tk tcl tk
31+
32+
- name: Set Tcl/Tk Paths
33+
run: |
34+
echo "TCL_LIBRARY=$(find /usr/lib/ -name 'tcl8*' | head -n 1)" >> $GITHUB_ENV
35+
echo "TK_LIBRARY=$(find /usr/lib/ -name 'tk8*' | head -n 1)" >> $GITHUB_ENV
2336
2437
- name: Build
2538
id: make_build

.github/workflows/kcpp-build-release-linux.yaml

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ env:
88
jobs:
99
linux:
1010
runs-on: ubuntu-22.04
11+
container:
12+
image: ubuntu:20.04
13+
options: --privileged
1114
steps:
1215
- name: Clone
1316
id: checkout
@@ -18,8 +21,18 @@ jobs:
1821
- name: Dependencies
1922
id: depends
2023
run: |
21-
sudo apt-get update
22-
sudo apt-get install git curl bzip2
24+
apt-get update
25+
apt-get install -y sudo
26+
export DEBIAN_FRONTEND=noninteractive
27+
sudo ln -fs /usr/share/zoneinfo/UTC /etc/localtime
28+
echo "tzdata tzdata/Areas select Etc" | sudo debconf-set-selections
29+
echo "tzdata tzdata/Zones/Etc select UTC" | sudo debconf-set-selections
30+
sudo apt-get -y install git curl bzip2 python3-tk tcl tk
31+
32+
- name: Set Tcl/Tk Paths
33+
run: |
34+
echo "TCL_LIBRARY=$(find /usr/lib/ -name 'tcl8*' | head -n 1)" >> $GITHUB_ENV
35+
echo "TK_LIBRARY=$(find /usr/lib/ -name 'tk8*' | head -n 1)" >> $GITHUB_ENV
2336
2437
- name: Build
2538
id: make_build

.github/workflows/kcpp-build-release-win-full-cu12.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,19 @@ jobs:
8484
with:
8585
name: kcpp_windows_pyinstallers
8686
path: dist/
87+
88+
- name: Generate VK Instructions
89+
id: gen_vk_instructions
90+
run: |
91+
echo "If you cannot compile vulkan shaders yourself with glslc, you can manually patch in precompiled vulkan shader source files. Copy ggml-vulkan-shaders.cpp and ggml-vulkan-shaders.hpp to the ggml/src subdirectory in KoboldCpp source files before building." > vulkan-readme.txt
92+
93+
- name: Save Standalone Vulkan Shaders
94+
uses: actions/upload-artifact@v4
95+
with:
96+
name: vulkan_precompiled_shaders
97+
path: |
98+
ggml/src/ggml-vulkan-shaders.cpp
99+
ggml/src/ggml-vulkan-shaders.hpp
100+
vulkan-readme.txt
101+
102+

Makefile

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -768,9 +768,21 @@ ifeq ($(OS),Windows_NT)
768768
@echo 'Vulkan Shaders Rebuilt for Windows...'
769769
else
770770
@echo 'Now rebuilding vulkan shaders for Linux...'
771-
${shell} chmod +x vulkan-shaders-gen
772-
${shell} chmod +x glslc-linux
773-
$(shell) ./vulkan-shaders-gen --glslc ./glslc-linux --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp
771+
@chmod +x vulkan-shaders-gen glslc-linux
772+
@echo 'Checking if bundled glslc-linux binary is usable...'
773+
@GLSLC_BIN=$$(if ./glslc-linux --version >/dev/null 2>&1; then \
774+
echo "./glslc-linux"; \
775+
elif command -v glslc >/dev/null 2>&1; then \
776+
echo "glslc"; \
777+
else \
778+
echo ""; \
779+
fi); \
780+
if [ -z "$$GLSLC_BIN" ]; then \
781+
echo "Error: No usable glslc found. Vulkan shaders cannot be compiled!"; \
782+
else \
783+
echo "Using GLSLC: $$GLSLC_BIN"; \
784+
./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp; \
785+
fi
774786
@echo 'Vulkan Shaders Rebuilt for Linux...'
775787
endif
776788

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
248248
- Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger)
249249
- Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories.
250250
- Many other features including new samplers, regex support, websearch, RAG via TextDB and more.
251-
- Ready-to-use binaries for Windows, MacOS, Linux, Android (via Termux), Colab, Docker, also supports other platforms if self-compiled (like Raspberry PI).
251+
- Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like Android (via Termux) and Raspberry PI).
252252
- [Need help finding a model? Read this!](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file)
253253

254254
## Windows Usage (Precompiled Binary, Recommended)
@@ -260,7 +260,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
260260
- You can also run it using the command line. For info, please check `koboldcpp.exe --help`
261261

262262
## Linux Usage (Precompiled Binary, Recommended)
263-
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first).
263+
On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary for greatest compatibility on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first). If you have a newer device, you can also try the `koboldcpp-linux-x64-cuda1210` instead for better speeds.
264264

265265
Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
266266
```

colab.ipynb

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@
7373
"WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n",
7474
"TTSCommand = \"\"\n",
7575
"#@markdown <hr>\n",
76+
"LoadEmbeddingsModel = False #@param {type:\"boolean\"}\n",
77+
"EmbeddingsModel = \"https://huggingface.co/yixuan-chia/snowflake-arctic-embed-s-GGUF/resolve/main/snowflake-arctic-embed-s-Q4_0.gguf\" #@param [\"https://huggingface.co/yixuan-chia/snowflake-arctic-embed-s-GGUF/resolve/main/snowflake-arctic-embed-s-Q4_0.gguf\"]{allow-input: true}\n",
78+
"ECommand = \"\"\n",
79+
"#@markdown <hr>\n",
7680
"#@markdown This enables saving stories directly to your google drive. You will have to grant permissions, and then you can access the saves from the \"KoboldCpp Server Storage\" option.\n",
7781
"AllowSaveToGoogleDrive = False #@param {type:\"boolean\"}\n",
7882
"SavGdriveCommand = \"\"\n",
@@ -124,6 +128,10 @@
124128
" TTSCommand = \"--ttsmodel ttsmodel.bin --ttswavtokenizer ttswavtok.bin --ttsgpu\"\n",
125129
"else:\n",
126130
" TTSCommand = \"\"\n",
131+
"if EmbeddingsModel and LoadEmbeddingsModel:\n",
132+
" ECommand = \"--embeddingsmodel emodel.bin\"\n",
133+
"else:\n",
134+
" ECommand = \"\"\n",
127135
"if FlashAttention:\n",
128136
" FACommand = \"--flashattention\"\n",
129137
"else:\n",
@@ -152,6 +160,8 @@
152160
"if TTSCommand:\n",
153161
" !aria2c -x 10 -o ttsmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $TTSModel\n",
154162
" !aria2c -x 10 -o ttswavtok.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $WavTokModel\n",
163+
"if ECommand:\n",
164+
" !aria2c -x 10 -o emodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $EmbeddingsModel\n",
155165
"\n",
156166
"if MakeLocalTunnelFallback:\n",
157167
" import urllib\n",
@@ -165,7 +175,7 @@
165175
" print(f\"Please open the above link, and input the password '{ltpw}'\\nYour KoboldCpp will start shortly...\")\n",
166176
" print(\"=================\")\n",
167177
" !sleep 10\n",
168-
"!./koboldcpp_linux model.gguf --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $SavGdriveCommand\n"
178+
"!./koboldcpp_linux model.gguf --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
169179
]
170180
}
171181
],

common/arg.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <algorithm>
2020
#include <climits>
2121
#include <cstdarg>
22+
#include <filesystem>
2223
#include <fstream>
2324
#include <regex>
2425
#include <set>
@@ -657,9 +658,13 @@ static void common_params_handle_model(
657658
}
658659
}
659660

660-
// TODO: allow custom host
661-
model.url = "https://huggingface.co/" + model.hf_repo + "/resolve/main/" + model.hf_file;
662-
661+
std::string hf_endpoint = "https://huggingface.co/";
662+
const char * hf_endpoint_env = getenv("HF_ENDPOINT");
663+
if (hf_endpoint_env) {
664+
hf_endpoint = hf_endpoint_env;
665+
if (hf_endpoint.back() != '/') hf_endpoint += '/';
666+
}
667+
model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
663668
// make sure model path is present (for caching purposes)
664669
if (model.path.empty()) {
665670
// this is to avoid different repo having same file name, or same file name in different subdirs

common/minja/chat-template.hpp

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,19 @@
99
#pragma once
1010

1111
#include "minja.hpp"
12-
#include <json.hpp>
12+
13+
#include <chrono>
14+
#include <cstddef>
15+
#include <cstdio>
16+
#include <exception>
17+
#include <iomanip>
18+
#include <memory>
19+
#include <sstream>
1320
#include <string>
1421
#include <vector>
1522

23+
#include <json.hpp>
24+
1625
using json = nlohmann::ordered_json;
1726

1827
namespace minja {
@@ -425,7 +434,7 @@ class chat_template {
425434
auto obj = json {
426435
{"tool_calls", tool_calls},
427436
};
428-
if (!content.is_null() && content != "") {
437+
if (!content.is_null() && !content.empty()) {
429438
obj["content"] = content;
430439
}
431440
message["content"] = obj.dump(2);
@@ -435,13 +444,12 @@ class chat_template {
435444
if (polyfill_tool_responses && role == "tool") {
436445
message["role"] = "user";
437446
auto obj = json {
438-
{"tool_response", {
439-
{"content", message.at("content")},
440-
}},
447+
{"tool_response", json::object()},
441448
};
442449
if (message.contains("name")) {
443-
obj["tool_response"]["name"] = message.at("name");
450+
obj["tool_response"]["tool"] = message.at("name");
444451
}
452+
obj["tool_response"]["content"] = message.at("content");
445453
if (message.contains("tool_call_id")) {
446454
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
447455
}
@@ -510,7 +518,7 @@ class chat_template {
510518
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
511519
json messages_with_system = messages;
512520

513-
if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
521+
if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") {
514522
std::string existing_system = messages_with_system.at(0).at("content");
515523
messages_with_system[0] = json {
516524
{"role", "system"},

0 commit comments

Comments
 (0)