Skip to content

Commit 929fe85

Browse files
committed
Merge branch 'master' into compilade/mamba2
2 parents 94c3d53 + c642bc0 commit 929fe85

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2451
-1196
lines changed

.github/workflows/build-linux-cross.yml

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,25 @@ on:
44
workflow_call:
55

66
jobs:
7-
ubuntu-latest-riscv64-cpu-cross:
8-
runs-on: ubuntu-latest
7+
ubuntu-24-riscv64-cpu-cross:
8+
runs-on: ubuntu-24.04
99

1010
steps:
1111
- uses: actions/checkout@v4
1212
- name: Setup Riscv
1313
run: |
1414
sudo dpkg --add-architecture riscv64
15-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
16-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
17-
sudo apt-get clean
18-
sudo apt-get update
15+
16+
# Add arch-specific repositories for non-amd64 architectures
17+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
18+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
19+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
20+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
21+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
22+
EOF
23+
24+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
25+
1926
sudo apt-get install -y --no-install-recommends \
2027
build-essential \
2128
gcc-14-riscv64-linux-gnu \
@@ -40,21 +47,25 @@ jobs:
4047
4148
cmake --build build --config Release -j $(nproc)
4249
43-
ubuntu-latest-riscv64-vulkan-cross:
44-
runs-on: ubuntu-latest
50+
ubuntu-24-riscv64-vulkan-cross:
51+
runs-on: ubuntu-24.04
4552

4653
steps:
4754
- uses: actions/checkout@v4
48-
with:
49-
fetch-depth: 0
50-
5155
- name: Setup Riscv
5256
run: |
5357
sudo dpkg --add-architecture riscv64
54-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
55-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
56-
sudo apt-get clean
57-
sudo apt-get update
58+
59+
# Add arch-specific repositories for non-amd64 architectures
60+
cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
61+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
62+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
63+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
64+
deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
65+
EOF
66+
67+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
68+
5869
sudo apt-get install -y --no-install-recommends \
5970
build-essential \
6071
glslc \
@@ -82,21 +93,25 @@ jobs:
8293
8394
cmake --build build --config Release -j $(nproc)
8495
85-
ubuntu-latest-arm64-vulkan-cross:
86-
runs-on: ubuntu-latest
96+
ubuntu-24-arm64-vulkan-cross:
97+
runs-on: ubuntu-24.04
8798

8899
steps:
89100
- uses: actions/checkout@v4
90-
with:
91-
fetch-depth: 0
92-
93101
- name: Setup Arm64
94102
run: |
95103
sudo dpkg --add-architecture arm64
96-
sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
97-
/etc/apt/sources.list /etc/apt/apt-mirrors.txt
98-
sudo apt-get clean
99-
sudo apt-get update
104+
105+
# Add arch-specific repositories for non-amd64 architectures
106+
cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
107+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
108+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
109+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
110+
deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
111+
EOF
112+
113+
sudo apt-get update || true ;# Prevent failure due to missing URLs.
114+
100115
sudo apt-get install -y --no-install-recommends \
101116
build-essential \
102117
glslc \

.github/workflows/build.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -601,9 +601,8 @@ jobs:
601601
-DGGML_SYCL_F16=ON
602602
cmake --build build --config Release -j $(nproc)
603603
604-
# Disabled for now due to sporadic issue syncing.
605-
# build-linux-cross:
606-
# uses: ./.github/workflows/build-linux-cross.yml
604+
build-linux-cross:
605+
uses: ./.github/workflows/build-linux-cross.yml
607606

608607
macOS-latest-cmake-ios:
609608
runs-on: macos-latest

cmake/build-info.cmake

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,20 @@ endif()
4141

4242
if(MSVC)
4343
set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
44-
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
44+
if (CMAKE_VS_PLATFORM_NAME)
45+
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
46+
else()
47+
set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
48+
endif()
4549
else()
4650
execute_process(
47-
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
51+
COMMAND ${CMAKE_C_COMPILER} --version
4852
OUTPUT_VARIABLE OUT
4953
OUTPUT_STRIP_TRAILING_WHITESPACE
5054
)
55+
string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
5156
set(BUILD_COMPILER ${OUT})
57+
5258
execute_process(
5359
COMMAND ${CMAKE_C_COMPILER} -dumpmachine
5460
OUTPUT_VARIABLE OUT

common/CMakeLists.txt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,9 @@ add_custom_command(
3939
COMMENT "Generating build details from Git"
4040
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
4141
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
42-
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
42+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
43+
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
44+
-P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
4345
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
4446
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
4547
VERBATIM

common/arg.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2783,7 +2783,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
27832783
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP"));
27842784
add_opt(common_arg(
27852785
{"--cache-reuse"}, "N",
2786-
string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse),
2786+
string_format(
2787+
"min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n"
2788+
"[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse
2789+
),
27872790
[](common_params & params, int value) {
27882791
params.n_cache_reuse = value;
27892792
}

convert_hf_to_gguf.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,9 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
419419
@staticmethod
420420
def load_hparams(dir_model: Path):
421421
try:
422-
return AutoConfig.from_pretrained(dir_model).to_dict()
422+
# for security reason, we don't allow loading remote code by default
423+
# if a model need remote code, we will fallback to config.json
424+
return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
423425
except Exception as e:
424426
logger.warning(f"Failed to load model config from {dir_model}: {e}")
425427
logger.warning("Trying to load config.json instead")

examples/llava/clip.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2561,7 +2561,7 @@ struct llava_uhd {
25612561

25622562
// no pinpoints, dynamically calculate the grid size (e.g. minicpmv)
25632563

2564-
auto best_size = get_best_resize(original_size, slice_size, patch_size, has_slices);
2564+
auto best_size = get_best_resize(original_size, slice_size, patch_size, !has_slices);
25652565
res.overview_size = best_size;
25662566

25672567
if (!has_slices) {

examples/llava/mtmd-cli.cpp

Lines changed: 40 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ struct mtmd_cli_context {
7272
llama_batch batch;
7373
int n_batch;
7474

75+
std::vector<mtmd_bitmap> bitmaps;
76+
7577
// note: we know that gemma3 template is "linear", meaning each turn is completely separated to another
7678
// so here we don't need to keep track of chat history
7779
common_chat_templates_ptr tmpls;
@@ -135,13 +137,22 @@ struct mtmd_cli_context {
135137
antiprompt_tokens.begin()
136138
);
137139
}
140+
141+
bool load_image(const std::string & fname) {
142+
mtmd_bitmap bitmap;
143+
if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
144+
return false;
145+
}
146+
bitmaps.push_back(std::move(bitmap));
147+
return true;
148+
}
138149
};
139150

140151
static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int n_predict) {
141152
llama_tokens generated_tokens;
142153
for (int i = 0; i < n_predict; i++) {
143154
if (i > n_predict || !g_is_generating || g_is_interrupted) {
144-
printf("\n");
155+
LOG("\n");
145156
break;
146157
}
147158

@@ -150,15 +161,15 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
150161
common_sampler_accept(smpl, token_id, true);
151162

152163
if (llama_vocab_is_eog(ctx.vocab, token_id) || ctx.check_antiprompt(generated_tokens)) {
153-
printf("\n");
164+
LOG("\n");
154165
break; // end of generation
155166
}
156167

157-
printf("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
168+
LOG("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
158169
fflush(stdout);
159170

160171
if (g_is_interrupted) {
161-
printf("\n");
172+
LOG("\n");
162173
break;
163174
}
164175

@@ -173,25 +184,14 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
173184
return 0;
174185
}
175186

176-
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vector<std::string> & images_fname, bool add_bos = false) {
177-
std::vector<mtmd_bitmap> bitmaps;
178-
187+
static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, bool add_bos = false) {
179188
common_chat_templates_inputs tmpl_inputs;
180189
tmpl_inputs.messages = {msg};
181190
tmpl_inputs.add_generation_prompt = true;
182191
tmpl_inputs.use_jinja = false; // jinja is buggy here
183192
auto formatted_chat = common_chat_templates_apply(ctx.tmpls.get(), tmpl_inputs);
184193
LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str());
185194

186-
for (auto & fname : images_fname) {
187-
mtmd_bitmap bitmap;
188-
if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
189-
LOG_ERR("Unable to load image %s\n", fname.c_str());
190-
return 2; // image not found
191-
}
192-
bitmaps.push_back(std::move(bitmap));
193-
}
194-
195195
mtmd_input_text text;
196196
text.text = formatted_chat.prompt;
197197
text.add_special = add_bos;
@@ -200,19 +200,23 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
200200

201201
if (g_is_interrupted) return 0;
202202

203-
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, bitmaps);
203+
int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, ctx.bitmaps);
204204
if (res != 0) {
205205
LOG_ERR("Unable to tokenize prompt, res = %d\n", res);
206206
return 1;
207207
}
208208

209+
ctx.bitmaps.clear();
210+
209211
if (mtmd_helper_eval(ctx.ctx_vision.get(), ctx.lctx, chunks, ctx.n_past, 0, ctx.n_batch)) {
210212
LOG_ERR("Unable to eval prompt\n");
211213
return 1;
212214
}
213215

214216
ctx.n_past += mtmd_helper_get_n_pos(chunks);
215217

218+
LOG("\n");
219+
216220
return 0;
217221
}
218222

@@ -235,7 +239,7 @@ int main(int argc, char ** argv) {
235239
}
236240

237241
mtmd_cli_context ctx(params);
238-
printf("%s: %s\n", __func__, params.model.path.c_str());
242+
LOG("%s: loading model: %s\n", __func__, params.model.path.c_str());
239243

240244
bool is_single_turn = !params.prompt.empty() && !params.image.empty();
241245

@@ -268,7 +272,12 @@ int main(int argc, char ** argv) {
268272
common_chat_msg msg;
269273
msg.role = "user";
270274
msg.content = params.prompt;
271-
if (eval_message(ctx, msg, params.image, true)) {
275+
for (const auto & image : params.image) {
276+
if (!ctx.load_image(image)) {
277+
return 1; // error is already printed by libmtmd
278+
}
279+
}
280+
if (eval_message(ctx, msg, true)) {
272281
return 1;
273282
}
274283
if (!g_is_interrupted && generate_response(ctx, smpl, n_predict)) {
@@ -283,7 +292,6 @@ int main(int argc, char ** argv) {
283292
LOG("\n");
284293

285294
bool is_first_msg = true;
286-
std::vector<std::string> images_fname;
287295
std::string content;
288296

289297
while (!g_is_interrupted) {
@@ -308,32 +316,32 @@ int main(int argc, char ** argv) {
308316
continue;
309317
}
310318
g_is_generating = true;
311-
if (line.find("/image") == 0) {
319+
if (line == "/image" || line.find("/image ") == 0) {
320+
if (line.size() < 8) {
321+
LOG_ERR("ERR: Missing image filename\n");
322+
continue;
323+
}
312324
std::string image = line.substr(7);
313-
images_fname.push_back(string_strip(image));
314-
content += "<__image__>";
325+
if (ctx.load_image(image)) {
326+
LOG("Image %s loaded\n", image.c_str());
327+
content += "<__image__>";
328+
}
329+
// else, error is already printed by libmtmd
315330
continue;
316331
} else {
317332
content += line;
318333
}
319334
common_chat_msg msg;
320335
msg.role = "user";
321336
msg.content = content;
322-
int ret = eval_message(ctx, msg, images_fname, is_first_msg);
323-
if (g_is_interrupted) break;
324-
if (ret == 2) {
325-
// non-fatal error
326-
images_fname.clear();
327-
content.clear();
328-
continue;
329-
}
337+
int ret = eval_message(ctx, msg, is_first_msg);
330338
if (ret) {
331339
return 1;
332340
}
341+
if (g_is_interrupted) break;
333342
if (generate_response(ctx, smpl, n_predict)) {
334343
return 1;
335344
}
336-
images_fname.clear();
337345
content.clear();
338346
is_first_msg = false;
339347
}

examples/llava/mtmd.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -590,7 +590,7 @@ int32_t mtmd_helper_eval(mtmd_context * ctx,
590590
}
591591

592592
} else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
593-
GGML_ASSERT(!is_last && "logits for last image chunk is not yet support");
593+
GGML_ASSERT(!is_last && "logits for last image chunk is not yet supported");
594594
GGML_ASSERT(chunk.tokens_image != nullptr);
595595
int64_t t0 = ggml_time_ms();
596596
if (ctx->print_timings) {

examples/server/README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ The project is under active development, and we are [looking for feedback and co
154154
| `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate<br/>(env: LLAMA_ARG_SSL_CERT_FILE) |
155155
| `-to, --timeout N` | server read/write timeout in seconds (default: 600)<br/>(env: LLAMA_ARG_TIMEOUT) |
156156
| `--threads-http N` | number of threads used to process HTTP requests (default: -1)<br/>(env: LLAMA_ARG_THREADS_HTTP) |
157-
| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)<br/>(env: LLAMA_ARG_CACHE_REUSE) |
157+
| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)<br/>[(card)](https://ggml.ai/f0.png)<br/>(env: LLAMA_ARG_CACHE_REUSE) |
158158
| `--metrics` | enable prometheus compatible metrics endpoint (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_METRICS) |
159159
| `--slots` | enable slots monitoring endpoint (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_SLOTS) |
160160
| `--props` | enable changing global properties via POST /props (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_PROPS) |

0 commit comments

Comments
 (0)