Nexesenex
diff --git a/‎.github/workflows/build-linux-cross.yml
Lines changed: 121 additions & 0 deletions b/‎.github/workflows/build-linux-cross.yml
Lines changed: 121 additions & 0 deletions
diff --git a/‎.github/workflows/kcpp-build-release-linux-cuda12.yaml
Lines changed: 15 additions & 2 deletions b/‎.github/workflows/kcpp-build-release-linux-cuda12.yaml
Lines changed: 15 additions & 2 deletions
diff --git a/‎.github/workflows/kcpp-build-release-linux.yaml
Lines changed: 15 additions & 2 deletions b/‎.github/workflows/kcpp-build-release-linux.yaml
Lines changed: 15 additions & 2 deletions
diff --git a/‎.github/workflows/kcpp-build-release-win-full-cu12.yaml
Lines changed: 16 additions & 0 deletions b/‎.github/workflows/kcpp-build-release-win-full-cu12.yaml
Lines changed: 16 additions & 0 deletions
diff --git a/‎Makefile
Lines changed: 15 additions & 3 deletions b/‎Makefile
Lines changed: 15 additions & 3 deletions
diff --git a/‎README.md
Lines changed: 2 additions & 2 deletions b/‎README.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎colab.ipynb
Lines changed: 11 additions & 1 deletion b/‎colab.ipynb
Lines changed: 11 additions & 1 deletion
diff --git a/‎common/arg.cpp
Lines changed: 8 additions & 3 deletions b/‎common/arg.cpp
Lines changed: 8 additions & 3 deletions
diff --git a/‎common/minja/chat-template.hpp
Lines changed: 15 additions & 7 deletions b/‎common/minja/chat-template.hpp
Lines changed: 15 additions & 7 deletions
@@ -0,0 +1,121 @@
+name: Build on Linux using cross-compiler
+on:
+  workflow_dispatch:
+  workflow_call:
+
+jobs:
+  ubuntu-latest-riscv64-cpu-cross:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Setup Riscv
+        run: |
+          sudo dpkg --add-architecture riscv64
+          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
+                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
+          sudo apt-get clean
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  gcc-14-riscv64-linux-gnu \
+                  g++-14-riscv64-linux-gnu
+
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
+                         -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+
+          cmake --build build --config Release -j $(nproc)
+
+  ubuntu-latest-riscv64-vulkan-cross:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Riscv
+        run: |
+          sudo dpkg --add-architecture riscv64
+          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
+                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
+          sudo apt-get clean
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  glslc \
+                  gcc-14-riscv64-linux-gnu \
+                  g++-14-riscv64-linux-gnu \
+                  libvulkan-dev:riscv64
+
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_VULKAN=ON \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=riscv64 \
+                         -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \
+                         -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+
+          cmake --build build --config Release -j $(nproc)
+
+  ubuntu-latest-arm64-vulkan-cross:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Arm64
+        run: |
+          sudo dpkg --add-architecture arm64
+          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
+                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
+          sudo apt-get clean
+          sudo apt-get update
+          sudo apt-get install -y --no-install-recommends \
+                  build-essential \
+                  glslc \
+                  crossbuild-essential-arm64 \
+                  libvulkan-dev:arm64
+
+      - name: Build
+        run: |
+          cmake -B build -DCMAKE_BUILD_TYPE=Release \
+                         -DGGML_VULKAN=ON \
+                         -DGGML_OPENMP=OFF \
+                         -DLLAMA_BUILD_EXAMPLES=ON \
+                         -DLLAMA_BUILD_TESTS=OFF \
+                         -DCMAKE_SYSTEM_NAME=Linux \
+                         -DCMAKE_SYSTEM_PROCESSOR=aarch64 \
+                         -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \
+                         -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \
+                         -DCMAKE_POSITION_INDEPENDENT_CODE=ON \
+                         -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \
+                         -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH
+
+          cmake --build build --config Release -j $(nproc)
@@ -8,6 +8,9 @@ env:
 jobs:
   linux:
     runs-on: ubuntu-22.04
+    container:
+      image: ubuntu:20.04
+      options: --privileged
     steps:
       - name: Clone
         id: checkout
@@ -18,8 +21,18 @@ jobs:
       - name: Dependencies
         id: depends
         run: |
-          sudo apt-get update
-          sudo apt-get install git curl bzip2
+          apt-get update
+          apt-get install -y sudo
+          export DEBIAN_FRONTEND=noninteractive
+          sudo ln -fs /usr/share/zoneinfo/UTC /etc/localtime
+          echo "tzdata tzdata/Areas select Etc" | sudo debconf-set-selections
+          echo "tzdata tzdata/Zones/Etc select UTC" | sudo debconf-set-selections
+          sudo apt-get -y install git curl bzip2 python3-tk tcl tk
+
+      - name: Set Tcl/Tk Paths
+        run: |
+          echo "TCL_LIBRARY=$(find /usr/lib/ -name 'tcl8*' | head -n 1)" >> $GITHUB_ENV
+          echo "TK_LIBRARY=$(find /usr/lib/ -name 'tk8*' | head -n 1)" >> $GITHUB_ENV
 
       - name: Build
         id: make_build
 
@@ -8,6 +8,9 @@ env:
 jobs:
   linux:
     runs-on: ubuntu-22.04
+    container:
+      image: ubuntu:20.04
+      options: --privileged
     steps:
       - name: Clone
         id: checkout
@@ -18,8 +21,18 @@ jobs:
       - name: Dependencies
         id: depends
         run: |
-          sudo apt-get update
-          sudo apt-get install git curl bzip2
+          apt-get update
+          apt-get install -y sudo
+          export DEBIAN_FRONTEND=noninteractive
+          sudo ln -fs /usr/share/zoneinfo/UTC /etc/localtime
+          echo "tzdata tzdata/Areas select Etc" | sudo debconf-set-selections
+          echo "tzdata tzdata/Zones/Etc select UTC" | sudo debconf-set-selections
+          sudo apt-get -y install git curl bzip2 python3-tk tcl tk
+
+      - name: Set Tcl/Tk Paths
+        run: |
+          echo "TCL_LIBRARY=$(find /usr/lib/ -name 'tcl8*' | head -n 1)" >> $GITHUB_ENV
+          echo "TK_LIBRARY=$(find /usr/lib/ -name 'tk8*' | head -n 1)" >> $GITHUB_ENV
 
       - name: Build
         id: make_build
 
@@ -84,3 +84,19 @@ jobs:
         with:
           name: kcpp_windows_pyinstallers
           path: dist/
+
+      - name: Generate VK Instructions
+        id: gen_vk_instructions
+        run: |
+          echo "If you cannot compile vulkan shaders yourself with glslc, you can manually patch in precompiled vulkan shader source files. Copy ggml-vulkan-shaders.cpp and ggml-vulkan-shaders.hpp to the ggml/src subdirectory in KoboldCpp source files before building." > vulkan-readme.txt
+
+      - name: Save Standalone Vulkan Shaders
+        uses: actions/upload-artifact@v4
+        with:
+          name: vulkan_precompiled_shaders
+          path: |
+            ggml/src/ggml-vulkan-shaders.cpp
+            ggml/src/ggml-vulkan-shaders.hpp
+            vulkan-readme.txt
+
+
@@ -768,9 +768,21 @@ ifeq ($(OS),Windows_NT)
 	@echo 'Vulkan Shaders Rebuilt for Windows...'
 else
 	@echo 'Now rebuilding vulkan shaders for Linux...'
-	${shell} chmod +x vulkan-shaders-gen
-	${shell} chmod +x glslc-linux
-	$(shell) ./vulkan-shaders-gen --glslc ./glslc-linux --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp
+	@chmod +x vulkan-shaders-gen glslc-linux
+	@echo 'Checking if bundled glslc-linux binary is usable...'
+	@GLSLC_BIN=$$(if ./glslc-linux --version >/dev/null 2>&1; then \
+		echo "./glslc-linux"; \
+	elif command -v glslc >/dev/null 2>&1; then \
+		echo "glslc"; \
+	else \
+		echo ""; \
+	fi); \
+	if [ -z "$$GLSLC_BIN" ]; then \
+		echo "Error: No usable glslc found. Vulkan shaders cannot be compiled!"; \
+	else \
+		echo "Using GLSLC: $$GLSLC_BIN"; \
+		./vulkan-shaders-gen --glslc "$$GLSLC_BIN" --input-dir ggml/src/ggml-vulkan/vulkan-shaders --target-hpp ggml/src/ggml-vulkan-shaders.hpp --target-cpp ggml/src/ggml-vulkan-shaders.cpp; \
+	fi
 	@echo 'Vulkan Shaders Rebuilt for Linux...'
 endif
 
 
@@ -248,7 +248,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
 - Includes multiple modes (chat, adventure, instruct, storywriter) and UI Themes (aesthetic roleplay, classic writer, corporate assistant, messsenger)
 - Supports loading Tavern Character Cards, importing many different data formats from various sites, reading or exporting JSON savefiles and persistent stories.
 - Many other features including new samplers, regex support, websearch, RAG via TextDB and more.
-- Ready-to-use binaries for Windows, MacOS, Linux, Android (via Termux), Colab, Docker, also supports other platforms if self-compiled (like Raspberry PI).
+- Ready-to-use binaries for Windows, MacOS, Linux. Runs directly with Colab, Docker, also supports other platforms if self-compiled (like  Android (via Termux) and Raspberry PI).
 - [Need help finding a model? Read this!](https://github.com/LostRuins/koboldcpp/wiki#getting-an-ai-model-file)
 
 ## Windows Usage (Precompiled Binary, Recommended)
@@ -260,7 +260,7 @@ KoboldCpp is an easy-to-use AI text-generation software for GGML and GGUF models
 - You can also run it using the command line. For info, please check `koboldcpp.exe --help`
 
 ## Linux Usage (Precompiled Binary, Recommended)
-On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first).
+On modern Linux systems, you should download the `koboldcpp-linux-x64-cuda1150` prebuilt PyInstaller binary for greatest compatibility on the **[releases page](https://github.com/LostRuins/koboldcpp/releases/latest)**. Simply download and run the binary (You may have to `chmod +x` it first). If you have a newer device, you can also try the `koboldcpp-linux-x64-cuda1210` instead for better speeds.
 
 Alternatively, you can also install koboldcpp to the current directory by running the following terminal command:
 ```
 
@@ -73,6 +73,10 @@
     "WavTokModel = \"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\" #@param [\"https://huggingface.co/koboldcpp/tts/resolve/main/WavTokenizer-Large-75-Q4_0.gguf\"]{allow-input: true}\n",
     "TTSCommand = \"\"\n",
     "#@markdown <hr>\n",
+    "LoadEmbeddingsModel = False #@param {type:\"boolean\"}\n",
+    "EmbeddingsModel = \"https://huggingface.co/yixuan-chia/snowflake-arctic-embed-s-GGUF/resolve/main/snowflake-arctic-embed-s-Q4_0.gguf\" #@param [\"https://huggingface.co/yixuan-chia/snowflake-arctic-embed-s-GGUF/resolve/main/snowflake-arctic-embed-s-Q4_0.gguf\"]{allow-input: true}\n",
+    "ECommand = \"\"\n",
+    "#@markdown <hr>\n",
     "#@markdown This enables saving stories directly to your google drive. You will have to grant permissions, and then you can access the saves from the \"KoboldCpp Server Storage\" option.\n",
     "AllowSaveToGoogleDrive = False #@param {type:\"boolean\"}\n",
     "SavGdriveCommand = \"\"\n",
@@ -124,6 +128,10 @@
     "  TTSCommand = \"--ttsmodel ttsmodel.bin --ttswavtokenizer ttswavtok.bin --ttsgpu\"\n",
     "else:\n",
     "  TTSCommand = \"\"\n",
+    "if EmbeddingsModel and LoadEmbeddingsModel:\n",
+    "  ECommand = \"--embeddingsmodel emodel.bin\"\n",
+    "else:\n",
+    "  ECommand = \"\"\n",
     "if FlashAttention:\n",
     "  FACommand = \"--flashattention\"\n",
     "else:\n",
@@ -152,6 +160,8 @@
     "if TTSCommand:\n",
     "  !aria2c -x 10 -o ttsmodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $TTSModel\n",
     "  !aria2c -x 10 -o ttswavtok.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $WavTokModel\n",
+    "if ECommand:\n",
+    "  !aria2c -x 10 -o emodel.bin --summary-interval=5 --download-result=default --allow-overwrite=true --file-allocation=none $EmbeddingsModel\n",
     "\n",
     "if MakeLocalTunnelFallback:\n",
     "  import urllib\n",
@@ -165,7 +175,7 @@
     "  print(f\"Please open the above link, and input the password '{ltpw}'\\nYour KoboldCpp will start shortly...\")\n",
     "  print(\"=================\")\n",
     "  !sleep 10\n",
-    "!./koboldcpp_linux model.gguf --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $SavGdriveCommand\n"
+    "!./koboldcpp_linux model.gguf --usecublas 0 mmq --chatcompletionsadapter AutoGuess --multiuser --gpulayers $Layers --contextsize $ContextSize --websearch --quiet --remotetunnel $FACommand $MPCommand $VCommand $SCommand $WCommand $TTSCommand $ECommand $SavGdriveCommand\n"
    ]
   }
  ],
 
@@ -19,6 +19,7 @@
 #include <algorithm>
 #include <climits>
 #include <cstdarg>
+#include <filesystem>
 #include <fstream>
 #include <regex>
 #include <set>
@@ -657,9 +658,13 @@ static void common_params_handle_model(
                 }
             }
 
-            // TODO: allow custom host
-            model.url = "https://huggingface.co/" + model.hf_repo + "/resolve/main/" + model.hf_file;
-
+            std::string hf_endpoint = "https://huggingface.co/";
+            const char * hf_endpoint_env = getenv("HF_ENDPOINT");
+            if (hf_endpoint_env) {
+                hf_endpoint = hf_endpoint_env;
+                if (hf_endpoint.back() != '/') hf_endpoint += '/';
+            }
+            model.url = hf_endpoint + model.hf_repo + "/resolve/main/" + model.hf_file;
             // make sure model path is present (for caching purposes)
             if (model.path.empty()) {
                 // this is to avoid different repo having same file name, or same file name in different subdirs
 
@@ -9,10 +9,19 @@
 #pragma once
 
 #include "minja.hpp"
-#include <json.hpp>
+
+#include <chrono>
+#include <cstddef>
+#include <cstdio>
+#include <exception>
+#include <iomanip>
+#include <memory>
+#include <sstream>
 #include <string>
 #include <vector>
 
+#include <json.hpp>
+
 using json = nlohmann::ordered_json;
 
 namespace minja {
@@ -425,7 +434,7 @@ class chat_template {
                         auto obj = json {
                             {"tool_calls", tool_calls},
                         };
-                        if (!content.is_null() && content != "") {
+                        if (!content.is_null() && !content.empty()) {
                             obj["content"] = content;
                         }
                         message["content"] = obj.dump(2);
@@ -435,13 +444,12 @@ class chat_template {
                 if (polyfill_tool_responses && role == "tool") {
                     message["role"] = "user";
                     auto obj = json {
-                        {"tool_response", {
-                            {"content", message.at("content")},
-                        }},
+                        {"tool_response", json::object()},
                     };
                     if (message.contains("name")) {
-                        obj["tool_response"]["name"] = message.at("name");
+                        obj["tool_response"]["tool"] = message.at("name");
                     }
+                    obj["tool_response"]["content"] = message.at("content");
                     if (message.contains("tool_call_id")) {
                         obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
                     }
@@ -510,7 +518,7 @@ class chat_template {
     static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
         json messages_with_system = messages;
 
-        if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
+        if (!messages_with_system.empty() && messages_with_system[0].at("role") == "system") {
             std::string existing_system = messages_with_system.at(0).at("content");
             messages_with_system[0] = json {
                 {"role", "system"},