Skip to content

Commit f5cf2e2

Browse files
committed
Merge branch 'remoteManagement' into remoteman_stripped
2 parents 18a3883 + f9f644d commit f5cf2e2

File tree

122 files changed

+6323
-2080
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

122 files changed

+6323
-2080
lines changed

.github/workflows/kcpp-build-release-arm64.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,9 @@ jobs:
7171
apt-get update && apt-get install -y build-essential && \
7272
apt-get update && apt-get install -y gcc-12 g++-12 && \
7373
export LD_LIBRARY_PATH=/usr/lib/gcc/x86_64-linux-gnu/12:$LD_LIBRARY_PATH && \
74-
pip install customtkinter pyinstaller tk pdfplumber multiprocess && \
74+
pip install customtkinter pyinstaller tk pdfplumber && \
7575
cd /src && \
76-
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber --collect-all multiprocess \
76+
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber \
7777
--add-data './koboldcpp_default.so:.' \
7878
--add-data './kcpp_adapters:./kcpp_adapters' \
7979
--add-data './koboldcpp.py:.' \

.github/workflows/kcpp-build-release-osx.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,15 +28,15 @@ jobs:
2828
- name: Dependencies
2929
id: depends
3030
run: |
31-
pip install customtkinter pyinstaller tk pdfplumber multiprocess
31+
pip install customtkinter pyinstaller tk pdfplumber
3232
3333
- name: Build
3434
id: make_build
3535
run: |
3636
make LLAMA_METAL=1 LLAMA_PORTABLE=1
3737
chmod +x './create_ver_file.sh'
3838
. create_ver_file.sh
39-
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber --collect-all multiprocess --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
39+
pyinstaller --noconfirm --onefile --collect-all customtkinter --collect-all psutil --collect-all pdfplumber --add-data './koboldcpp_default.so:.' --add-data './ggml-metal-merged.metal:.' --add-data './kcpp_adapters:./kcpp_adapters' --add-data './koboldcpp.py:.' --add-data './json_to_gbnf.py:.' --add-data './klite.embd:.' --add-data './kcpp_docs.embd:.' --add-data './kcpp_sdui.embd:.' --add-data './taesd.embd:.' --add-data './taesd_xl.embd:.' --add-data './taesd_f.embd:.' --add-data './taesd_3.embd:.' --add-data './rwkv_vocab.embd:.' --add-data './rwkv_world_vocab.embd:.' --version-file './version.txt' --clean --console koboldcpp.py -n "koboldcpp-mac-arm64"
4040
4141
- name: Test
4242
id: test

.github/workflows/kcpp-build-release-win-full-cu12.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
- name: Install python dependencies
3434
run: |
3535
python -m pip install --upgrade pip
36-
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber multiprocess
36+
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber
3737
3838
- name: Download and install win64devkit
3939
run: |

.github/workflows/kcpp-build-release-win-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
- name: Install python dependencies
3434
run: |
3535
python -m pip install --upgrade pip
36-
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber multiprocess
36+
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber
3737
3838
- name: Download and install win64devkit
3939
run: |

.github/workflows/kcpp-build-release-win-oldcpu-full.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ jobs:
3333
- name: Install python dependencies
3434
run: |
3535
python -m pip install --upgrade pip
36-
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber multiprocess
36+
pip install customtkinter==5.2.0 pyinstaller==5.11.0 psutil==5.9.5 pdfplumber
3737
3838
- name: Download and install win64devkit
3939
run: |

.github/workflows/release.yml

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,6 @@ concurrency:
1616
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
1717
cancel-in-progress: true
1818

19-
# Fine-grant permission
20-
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
21-
permissions:
22-
contents: write # for creating release
23-
2419
env:
2520
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
2621
CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"
@@ -416,28 +411,27 @@ jobs:
416411
CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }}
417412
run: |
418413
cp $env:CURL_PATH\bin\libcurl-x64.dll .\build\bin\Release\libcurl-x64.dll
419-
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
414+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
420415
421416
- name: Upload artifacts
422417
uses: actions/upload-artifact@v4
423418
with:
424-
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
425-
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
419+
path: llama-${{ steps.tag.outputs.name }}-bin-win-cuda${{ matrix.cuda }}-x64.zip
420+
name: llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
426421

427422
- name: Copy and pack Cuda runtime
428-
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
429423
run: |
430424
echo "Cuda install location: ${{ env.CUDA_PATH }}"
431425
$dst='.\build\bin\cudart\'
432426
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
433427
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
434-
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
428+
7z a cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip $dst\*
435429
436430
- name: Upload Cuda runtime
437431
uses: actions/upload-artifact@v4
438432
with:
439-
path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
440-
name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
433+
path: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
434+
name: cudart-llama-bin-win-cuda${{ matrix.cuda }}-x64.zip
441435

442436
windows-sycl:
443437
runs-on: windows-latest
@@ -646,6 +640,11 @@ jobs:
646640
release:
647641
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
648642

643+
# Fine-grant permission
644+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
645+
permissions:
646+
contents: write # for creating release
647+
649648
runs-on: ubuntu-latest
650649

651650
needs:

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./ggml/src/gg
438438
target_compile_features(ggml PUBLIC c_std_11) # don't bump
439439
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
440440
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
441+
target_compile_options(ggml PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-use_fast_math -extended-lambda>)
441442

442443
add_library(ggml_v1
443444
otherarch/ggml_v1.c

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ ifdef LLAMA_CUBLAS
183183
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(filter-out ggml/src/ggml-cuda/ggml-cuda.cu, $(wildcard ggml/src/ggml-cuda/*.cu)))
184184
CUBLAS_OBJS += $(OBJS_CUDA_TEMP_INST)
185185
NVCC = nvcc
186-
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
186+
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math -extended-lambda
187187

188188
ifdef LLAMA_ADD_CONDA_PATHS
189189
CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
@@ -671,7 +671,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
671671
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
672672

673673
clean:
674-
rm -vf *.o main ttsmain sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen vulkan-shaders-gen-noext gguf-split gguf-split.exe vulkan-shaders-gen.exe vulkan-shaders-gen-noext.exe main.exe ttsmain.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so ggml/src/ggml-vulkan-shaders.cpp ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders-noext.cpp ggml/src/ggml-vulkan-shaders-noext.hpp
674+
rm -vf *.o main ttsmain sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen vulkan-shaders-gen-noext gguf-split mtmd-cli mainvk mainvk.exe mtmd-cli.exe gguf-split.exe vulkan-shaders-gen.exe vulkan-shaders-gen-noext.exe main.exe ttsmain.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so ggml/src/ggml-vulkan-shaders.cpp ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders-noext.cpp ggml/src/ggml-vulkan-shaders-noext.hpp
675675
rm -vrf ggml/src/ggml-cuda/*.o
676676
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
677677

@@ -688,6 +688,8 @@ gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-ve
688688
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
689689
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
690690
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
691+
mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o ggml-backend-reg_vulkan.o ggml-vulkan.o $(OBJS_FULL) $(OBJS) lib/vulkan-1.lib
692+
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
691693

692694
ggml/src/ggml-vulkan-shaders.cpp:
693695
ifdef VULKAN_BUILD

README.md

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ It offers the following functionalities:
3333

3434
## Agent thinking mode (experimental)
3535

36-
An attempt to replicate tool usage / agent logic in Lite. Essentially, the AI is provided the user input and a list of tools that it can use. Should work on all UI modes for instruct.
36+
An attempt to replicate tool usage / agent logic in Lite. Essentially, the AI is provided the user input and a list of tools that it can use. Should work on all UI modes for instruct, along with supporting chat names.
3737

3838
The currently supported options include:
3939
- Sending messages / Asking for additional user input (including AI suggested options like a text adventure)
@@ -59,6 +59,18 @@ The currently supported options include:
5959
- Supports system prompts, both using and setting it automatically
6060
- Supports setting a "state" parameter which is always inserted at the end of the text. It is also possible to define the format that the response must use (i.e. {health: 10, mana: 20...})
6161
- Support enforcing a specific action order (i.e. the agent can be set to always roll a dice, then send a response)
62+
- Support for manually preventing the agent from taking specific actions:
63+
64+
```
65+
[DOCUMENT BREAK][Forbidden agent commands]ask_user|roll_dice[DOCUMENT BREAK]
66+
```
67+
68+
- Support for randomly selecting elements from a list of items. The lists can be defined in the TextDB with:
69+
70+
```
71+
[DOCUMENT BREAK][Table:Genres]Action Fantasy
72+
Horror[DOCUMENT BREAK]
73+
```
6274

6375
Using this function requires the following conditions to be met:
6476
- Use an instruct model
@@ -75,6 +87,8 @@ Using this function requires the following conditions to be met:
7587

7688
![image](https://github.com/user-attachments/assets/41ec4f1c-5698-4ef3-ba7c-6998cbc1d8f3)
7789

90+
- Upload document support (including upload of text documents, lorebooks, PDFs (SevenOf9 wrote the parser), OCR using the vision model loaded, and transcription from audio)
91+
- Export / Import of WI groups from files
7892

7993
## Running the fork
8094

common/arg.cpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ using json = nlohmann::ordered_json;
4141

4242
std::initializer_list<enum llama_example> mmproj_examples = {
4343
LLAMA_EXAMPLE_LLAVA,
44-
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
44+
LLAMA_EXAMPLE_SERVER,
4545
};
4646

4747
static std::string read_file(const std::string & fname) {
@@ -2205,32 +2205,33 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22052205
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
22062206
add_opt(common_arg(
22072207
{"--mmproj"}, "FILE",
2208-
"path to a multimodal projector file. see tools/mtmd/README.md",
2208+
"path to a multimodal projector file. see tools/mtmd/README.md\n"
2209+
"note: if -hf is used, this argument can be omitted",
22092210
[](common_params & params, const std::string & value) {
22102211
params.mmproj.path = value;
22112212
}
2212-
).set_examples(mmproj_examples));
2213+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
22132214
add_opt(common_arg(
22142215
{"--mmproj-url"}, "URL",
22152216
"URL to a multimodal projector file. see tools/mtmd/README.md",
22162217
[](common_params & params, const std::string & value) {
22172218
params.mmproj.url = value;
22182219
}
2219-
).set_examples(mmproj_examples));
2220+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ_URL"));
22202221
add_opt(common_arg(
22212222
{"--no-mmproj"},
22222223
"explicitly disable multimodal projector, useful when using -hf",
22232224
[](common_params & params) {
22242225
params.no_mmproj = true;
22252226
}
2226-
).set_examples(mmproj_examples));
2227+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ"));
22272228
add_opt(common_arg(
22282229
{"--no-mmproj-offload"},
22292230
"do not offload multimodal projector to GPU",
22302231
[](common_params & params) {
22312232
params.mmproj_use_gpu = false;
22322233
}
2233-
).set_examples(mmproj_examples));
2234+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
22342235
add_opt(common_arg(
22352236
{"--image"}, "FILE",
22362237
"path to an image file. use with multimodal models. Specify multiple times for batching",
@@ -2437,6 +2438,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
24372438
}
24382439
}
24392440
));
2441+
add_opt(common_arg(
2442+
{"--no-op-offload"},
2443+
string_format("disable offloading host tensor operations to device (default: %s)", params.no_op_offload ? "true" : "false"),
2444+
[](common_params & params) {
2445+
params.no_op_offload = true;
2446+
}
2447+
));
24402448
add_opt(common_arg(
24412449
{"--lora"}, "FNAME",
24422450
"path to LoRA adapter (can be repeated to use multiple adapters)",
@@ -2628,6 +2636,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
26282636
params.i_chunk = value;
26292637
}
26302638
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
2639+
add_opt(common_arg(
2640+
{"--parse-special"},
2641+
string_format("prase special tokens (chat, tool, etc) (default: %s)", params.parse_special ? "true" : "false"),
2642+
[](common_params & params) {
2643+
params.parse_special = true;
2644+
}
2645+
).set_examples({LLAMA_EXAMPLE_IMATRIX}));
26312646
add_opt(common_arg(
26322647
{"-pps"},
26332648
string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),

0 commit comments

Comments
 (0)