Skip to content

Commit aec3483

Browse files
committed
Merge remote-tracking branch 'upstream/master' into webgpu
2 parents c09bfc5 + c465030 commit aec3483

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

78 files changed

+2389
-695
lines changed

.devops/intel.Dockerfile

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -49,19 +49,23 @@ COPY --from=build /app/full /app
4949

5050
WORKDIR /app
5151

52-
RUN apt-get update \
53-
&& apt-get install -y \
54-
git \
55-
python3 \
56-
python3-pip \
57-
&& pip install --upgrade pip setuptools wheel \
58-
&& pip install -r requirements.txt \
59-
&& apt autoremove -y \
60-
&& apt clean -y \
61-
&& rm -rf /tmp/* /var/tmp/* \
62-
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
63-
&& find /var/cache -type f -delete
64-
52+
RUN apt-get update && \
53+
apt-get install -y \
54+
git \
55+
python3 \
56+
python3-pip \
57+
python3-venv && \
58+
python3 -m venv /opt/venv && \
59+
. /opt/venv/bin/activate && \
60+
pip install --upgrade pip setuptools wheel && \
61+
pip install -r requirements.txt && \
62+
apt autoremove -y && \
63+
apt clean -y && \
64+
rm -rf /tmp/* /var/tmp/* && \
65+
find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete && \
66+
find /var/cache -type f -delete
67+
68+
ENV PATH="/opt/venv/bin:$PATH"
6569

6670
ENTRYPOINT ["/app/tools.sh"]
6771

.github/workflows/build.yml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -693,7 +693,7 @@ jobs:
693693
- build: 'openblas-x64'
694694
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
695695
- build: 'vulkan-x64'
696-
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
696+
defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON'
697697
- build: 'llvm-arm64'
698698
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
699699
- build: 'llvm-arm64-opencl-adreno'
@@ -778,6 +778,7 @@ jobs:
778778
cmake -S . -B build ${{ matrix.defines }} `
779779
-DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include"
780780
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
781+
cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release
781782
782783
- name: Add libopenblas.dll
783784
id: add_libopenblas_dll

CMakeLists.txt

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,14 @@ option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured
8989
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
9090
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
9191

92+
if (NOT DEFINED LLAMA_BUILD_NUMBER)
93+
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
94+
endif()
95+
if (NOT DEFINED LLAMA_BUILD_COMMIT)
96+
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
97+
endif()
98+
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
99+
92100
# override ggml options
93101
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
94102
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
@@ -155,6 +163,8 @@ if (LLAMA_USE_SYSTEM_GGML)
155163
endif()
156164

157165
if (NOT TARGET ggml AND NOT LLAMA_USE_SYSTEM_GGML)
166+
set(GGML_BUILD_NUMBER ${LLAMA_BUILD_NUMBER})
167+
set(GGML_BUILD_COMMIT ${LLAMA_BUILD_COMMIT})
158168
add_subdirectory(ggml)
159169
# ... otherwise assume ggml is added by a parent CMakeLists.txt
160170
endif()
@@ -204,10 +214,6 @@ endif()
204214
include(GNUInstallDirs)
205215
include(CMakePackageConfigHelpers)
206216

207-
set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER})
208-
set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT})
209-
set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER})
210-
211217
set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
212218
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
213219
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")

README.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
77
[![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml)
88

9-
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggml-org/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
9+
[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml)
1010

1111
Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++
1212

@@ -18,7 +18,6 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
1818
## Hot topics
1919

2020
- 🔥 Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md)
21-
- **GGML developer experience survey (organized and reviewed by NVIDIA):** [link](https://forms.gle/Gasw3cRgyhNEnrwK9)
2221
- A new binary `llama-mtmd-cli` is introduced to replace `llava-cli`, `minicpmv-cli`, `gemma3-cli` ([#13012](https://github.com/ggml-org/llama.cpp/pull/13012)) and `qwen2vl-cli` ([#13141](https://github.com/ggml-org/llama.cpp/pull/13141)), `libllava` will be deprecated
2322
- VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode
2423
- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639

ci/run.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ sd=`dirname $0`
3939
cd $sd/../
4040
SRC=`pwd`
4141

42-
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=OFF"
42+
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON"
4343

4444
if [ ! -z ${GG_BUILD_METAL} ]; then
4545
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"

common/CMakeLists.txt

Lines changed: 7 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,31 +23,21 @@ if(EXISTS "${PROJECT_SOURCE_DIR}/.git")
2323
endif()
2424

2525
if(EXISTS "${GIT_DIR}/index")
26-
set(GIT_INDEX "${GIT_DIR}/index")
26+
# For build-info.cpp below
27+
set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS "${GIT_DIR}/index")
2728
else()
2829
message(WARNING "Git index not found in git repository.")
29-
set(GIT_INDEX "")
3030
endif()
3131
else()
3232
message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.")
33-
set(GIT_INDEX "")
3433
endif()
3534

36-
# Add a custom command to rebuild build-info.cpp when .git/index changes
37-
add_custom_command(
38-
OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp"
39-
COMMENT "Generating build details from Git"
40-
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
41-
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
42-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
43-
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
44-
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
45-
WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
46-
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
47-
VERBATIM
48-
)
35+
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in")
36+
set(OUTPUT_FILE "${CMAKE_CURRENT_BINARY_DIR}/build-info.cpp")
37+
configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE})
38+
4939
set(TARGET build_info)
50-
add_library(${TARGET} OBJECT build-info.cpp)
40+
add_library(${TARGET} OBJECT ${OUTPUT_FILE})
5141
if (BUILD_SHARED_LIBS)
5242
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
5343
endif()

common/arg.cpp

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -988,10 +988,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
988988
params.tensor_buft_overrides.push_back({nullptr, nullptr});
989989
}
990990

991-
if (params.reranking && params.embedding) {
992-
throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both");
993-
}
994-
995991
if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) {
996992
throw std::runtime_error(string_format(
997993
"error: the supplied chat template is not supported: %s%s\n",
@@ -2747,9 +2743,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27472743
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS"));
27482744
add_opt(common_arg(
27492745
{"--reranking", "--rerank"},
2750-
string_format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"),
2746+
string_format("enable reranking endpoint on server (default: %s)", "disabled"),
27512747
[](common_params & params) {
2752-
params.reranking = true;
2748+
params.embedding = true;
2749+
params.pooling_type = LLAMA_POOLING_TYPE_RANK;
27532750
}
27542751
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING"));
27552752
add_opt(common_arg(

common/build-info.cpp.in

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@;
2-
char const *LLAMA_COMMIT = "@BUILD_COMMIT@";
1+
int LLAMA_BUILD_NUMBER = @LLAMA_BUILD_NUMBER@;
2+
char const *LLAMA_COMMIT = "@LLAMA_BUILD_COMMIT@";
33
char const *LLAMA_COMPILER = "@BUILD_COMPILER@";
44
char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@";

common/chat-parser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ bool common_chat_msg_parser::add_tool_call(const std::string & name, const std::
4949

5050
// LOG_DBG("Tool call arguments:\n\traw: %s\n\tresult: %s\n", arguments.c_str(), tool_call.arguments.c_str());
5151
result_.tool_calls.emplace_back(tool_call);
52+
5253
return true;
5354
}
5455
bool common_chat_msg_parser::add_tool_call(const json & tool_call) {
@@ -378,3 +379,7 @@ std::optional<common_chat_msg_parser::consume_json_result> common_chat_msg_parse
378379
/* .is_partial = */ found_healing_marker,
379380
};
380381
}
382+
383+
void common_chat_msg_parser::clear_tools() {
384+
result_.tool_calls.clear();
385+
}

common/chat-parser.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,4 +115,6 @@ class common_chat_msg_parser {
115115
const std::vector<std::vector<std::string>> & args_paths = {},
116116
const std::vector<std::vector<std::string>> & content_paths = {}
117117
);
118+
119+
void clear_tools();
118120
};

0 commit comments

Comments
 (0)