gabe-l-hart
diff --git a/‎.github/workflows/build-linux-cross.yml
Lines changed: 39 additions & 24 deletions b/‎.github/workflows/build-linux-cross.yml
Lines changed: 39 additions & 24 deletions
diff --git a/‎.github/workflows/build.yml
Lines changed: 2 additions & 3 deletions b/‎.github/workflows/build.yml
Lines changed: 2 additions & 3 deletions
diff --git a/‎cmake/build-info.cmake
Lines changed: 8 additions & 2 deletions b/‎cmake/build-info.cmake
Lines changed: 8 additions & 2 deletions
diff --git a/‎common/CMakeLists.txt
Lines changed: 3 additions & 1 deletion b/‎common/CMakeLists.txt
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/arg.cpp
Lines changed: 4 additions & 1 deletion b/‎common/arg.cpp
Lines changed: 4 additions & 1 deletion
diff --git a/‎convert_hf_to_gguf.py
Lines changed: 3 additions & 1 deletion b/‎convert_hf_to_gguf.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎examples/llava/clip.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/llava/clip.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/llava/mtmd-cli.cpp
Lines changed: 40 additions & 32 deletions b/‎examples/llava/mtmd-cli.cpp
Lines changed: 40 additions & 32 deletions
diff --git a/‎examples/llava/mtmd.cpp
Lines changed: 1 addition & 1 deletion b/‎examples/llava/mtmd.cpp
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/server/README.md
Lines changed: 1 addition & 1 deletion b/‎examples/server/README.md
Lines changed: 1 addition & 1 deletion
@@ -4,18 +4,25 @@ on:
   workflow_call:
 
 jobs:
-  ubuntu-latest-riscv64-cpu-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-cpu-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   gcc-14-riscv64-linux-gnu \
@@ -40,21 +47,25 @@ jobs:
 
           cmake --build build --config Release -j $(nproc)
 
-  ubuntu-latest-riscv64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-riscv64-vulkan-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
       - name: Setup Riscv
         run: |
           sudo dpkg --add-architecture riscv64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
@@ -82,21 +93,25 @@ jobs:
 
           cmake --build build --config Release -j $(nproc)
 
-  ubuntu-latest-arm64-vulkan-cross:
-    runs-on: ubuntu-latest
+  ubuntu-24-arm64-vulkan-cross:
+    runs-on: ubuntu-24.04
 
     steps:
       - uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-
       - name: Setup Arm64
         run: |
           sudo dpkg --add-architecture arm64
-          sudo sed -i 's|http://azure.archive.ubuntu.com/ubuntu|http://ports.ubuntu.com/ubuntu-ports|g' \
-                 /etc/apt/sources.list /etc/apt/apt-mirrors.txt
-          sudo apt-get clean
-          sudo apt-get update
+
+          # Add arch-specific repositories for non-amd64 architectures
+          cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe
+          deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe
+          EOF
+
+          sudo apt-get update || true    ;# Prevent failure due to missing URLs.
+
           sudo apt-get install -y --no-install-recommends \
                   build-essential \
                   glslc \
 
@@ -601,9 +601,8 @@ jobs:
             -DGGML_SYCL_F16=ON
           cmake --build build --config Release -j $(nproc)
 
-# Disabled for now due to sporadic issue syncing.
-#  build-linux-cross:
-#    uses: ./.github/workflows/build-linux-cross.yml
+  build-linux-cross:
+    uses: ./.github/workflows/build-linux-cross.yml
 
   macOS-latest-cmake-ios:
     runs-on: macos-latest
 
@@ -41,14 +41,20 @@ endif()
 
 if(MSVC)
     set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
-    set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+    if (CMAKE_VS_PLATFORM_NAME)
+        set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
+    else()
+        set(BUILD_TARGET "${CMAKE_SYSTEM_NAME} ${CMAKE_SYSTEM_PROCESSOR}")
+    endif()
 else()
     execute_process(
-        COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
+        COMMAND ${CMAKE_C_COMPILER} --version
         OUTPUT_VARIABLE OUT
         OUTPUT_STRIP_TRAILING_WHITESPACE
     )
+    string(REGEX REPLACE " *\n.*" "" OUT "${OUT}")
     set(BUILD_COMPILER ${OUT})
+
     execute_process(
         COMMAND ${CMAKE_C_COMPILER} -dumpmachine
         OUTPUT_VARIABLE OUT
 
@@ -39,7 +39,9 @@ add_custom_command(
     COMMENT "Generating build details from Git"
     COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
             -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
-            -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
+            -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+            -DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME} -DCMAKE_SYSTEM_PROCESSOR=${CMAKE_SYSTEM_PROCESSOR}
+            -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
     WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
     DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
     VERBATIM
 
@@ -2783,7 +2783,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP"));
     add_opt(common_arg(
         {"--cache-reuse"}, "N",
-        string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse),
+        string_format(
+            "min chunk size to attempt reusing from the cache via KV shifting (default: %d)\n"
+            "[(card)](https://ggml.ai/f0.png)", params.n_cache_reuse
+        ),
         [](common_params & params, int value) {
             params.n_cache_reuse = value;
         }
 
@@ -419,7 +419,9 @@ def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]
     @staticmethod
     def load_hparams(dir_model: Path):
         try:
-            return AutoConfig.from_pretrained(dir_model).to_dict()
+            # for security reason, we don't allow loading remote code by default
+            # if a model need remote code, we will fallback to config.json
+            return AutoConfig.from_pretrained(dir_model, trust_remote_code=False).to_dict()
         except Exception as e:
             logger.warning(f"Failed to load model config from {dir_model}: {e}")
             logger.warning("Trying to load config.json instead")
 
@@ -2561,7 +2561,7 @@ struct llava_uhd {
 
         // no pinpoints, dynamically calculate the grid size (e.g. minicpmv)
 
-        auto best_size    = get_best_resize(original_size, slice_size, patch_size, has_slices);
+        auto best_size    = get_best_resize(original_size, slice_size, patch_size, !has_slices);
         res.overview_size = best_size;
 
         if (!has_slices) {
 
@@ -72,6 +72,8 @@ struct mtmd_cli_context {
     llama_batch         batch;
     int                 n_batch;
 
+    std::vector<mtmd_bitmap> bitmaps;
+
     // note: we know that gemma3 template is "linear", meaning each turn is completely separated to another
     // so here we don't need to keep track of chat history
     common_chat_templates_ptr tmpls;
@@ -135,13 +137,22 @@ struct mtmd_cli_context {
             antiprompt_tokens.begin()
         );
     }
+
+    bool load_image(const std::string & fname) {
+        mtmd_bitmap bitmap;
+        if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
+            return false;
+        }
+        bitmaps.push_back(std::move(bitmap));
+        return true;
+    }
 };
 
 static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int n_predict) {
     llama_tokens generated_tokens;
     for (int i = 0; i < n_predict; i++) {
         if (i > n_predict || !g_is_generating || g_is_interrupted) {
-            printf("\n");
+            LOG("\n");
             break;
         }
 
@@ -150,15 +161,15 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
         common_sampler_accept(smpl, token_id, true);
 
         if (llama_vocab_is_eog(ctx.vocab, token_id) || ctx.check_antiprompt(generated_tokens)) {
-            printf("\n");
+            LOG("\n");
             break; // end of generation
         }
 
-        printf("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
+        LOG("%s", common_token_to_piece(ctx.lctx, token_id).c_str());
         fflush(stdout);
 
         if (g_is_interrupted) {
-            printf("\n");
+            LOG("\n");
             break;
         }
 
@@ -173,25 +184,14 @@ static int generate_response(mtmd_cli_context & ctx, common_sampler * smpl, int
     return 0;
 }
 
-static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vector<std::string> & images_fname, bool add_bos = false) {
-    std::vector<mtmd_bitmap> bitmaps;
-
+static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, bool add_bos = false) {
     common_chat_templates_inputs tmpl_inputs;
     tmpl_inputs.messages = {msg};
     tmpl_inputs.add_generation_prompt = true;
     tmpl_inputs.use_jinja = false; // jinja is buggy here
     auto formatted_chat = common_chat_templates_apply(ctx.tmpls.get(), tmpl_inputs);
     LOG_DBG("formatted_chat.prompt: %s\n", formatted_chat.prompt.c_str());
 
-    for (auto & fname : images_fname) {
-        mtmd_bitmap bitmap;
-        if (mtmd_helper_bitmap_init_from_file(fname.c_str(), bitmap)) {
-            LOG_ERR("Unable to load image %s\n", fname.c_str());
-            return 2; // image not found
-        }
-        bitmaps.push_back(std::move(bitmap));
-    }
-
     mtmd_input_text text;
     text.text          = formatted_chat.prompt;
     text.add_special   = add_bos;
@@ -200,19 +200,23 @@ static int eval_message(mtmd_cli_context & ctx, common_chat_msg & msg, std::vect
 
     if (g_is_interrupted) return 0;
 
-    int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, bitmaps);
+    int32_t res = mtmd_tokenize(ctx.ctx_vision.get(), chunks, text, ctx.bitmaps);
     if (res != 0) {
         LOG_ERR("Unable to tokenize prompt, res = %d\n", res);
         return 1;
     }
 
+    ctx.bitmaps.clear();
+
     if (mtmd_helper_eval(ctx.ctx_vision.get(), ctx.lctx, chunks, ctx.n_past, 0, ctx.n_batch)) {
         LOG_ERR("Unable to eval prompt\n");
         return 1;
     }
 
     ctx.n_past += mtmd_helper_get_n_pos(chunks);
 
+    LOG("\n");
+
     return 0;
 }
 
@@ -235,7 +239,7 @@ int main(int argc, char ** argv) {
     }
 
     mtmd_cli_context ctx(params);
-    printf("%s: %s\n", __func__, params.model.path.c_str());
+    LOG("%s: loading model: %s\n", __func__, params.model.path.c_str());
 
     bool is_single_turn = !params.prompt.empty() && !params.image.empty();
 
@@ -268,7 +272,12 @@ int main(int argc, char ** argv) {
         common_chat_msg msg;
         msg.role = "user";
         msg.content = params.prompt;
-        if (eval_message(ctx, msg, params.image, true)) {
+        for (const auto & image : params.image) {
+            if (!ctx.load_image(image)) {
+                return 1; // error is already printed by libmtmd
+            }
+        }
+        if (eval_message(ctx, msg, true)) {
             return 1;
         }
         if (!g_is_interrupted && generate_response(ctx, smpl, n_predict)) {
@@ -283,7 +292,6 @@ int main(int argc, char ** argv) {
         LOG("\n");
 
         bool is_first_msg = true;
-        std::vector<std::string> images_fname;
         std::string content;
 
         while (!g_is_interrupted) {
@@ -308,32 +316,32 @@ int main(int argc, char ** argv) {
                 continue;
             }
             g_is_generating = true;
-            if (line.find("/image") == 0) {
+            if (line == "/image" || line.find("/image ") == 0) {
+                if (line.size() < 8) {
+                    LOG_ERR("ERR: Missing image filename\n");
+                    continue;
+                }
                 std::string image = line.substr(7);
-                images_fname.push_back(string_strip(image));
-                content += "<__image__>";
+                if (ctx.load_image(image)) {
+                    LOG("Image %s loaded\n", image.c_str());
+                    content += "<__image__>";
+                }
+                // else, error is already printed by libmtmd
                 continue;
             } else {
                 content += line;
             }
             common_chat_msg msg;
             msg.role = "user";
             msg.content = content;
-            int ret = eval_message(ctx, msg, images_fname, is_first_msg);
-            if (g_is_interrupted) break;
-            if (ret == 2) {
-                // non-fatal error
-                images_fname.clear();
-                content.clear();
-                continue;
-            }
+            int ret = eval_message(ctx, msg, is_first_msg);
             if (ret) {
                 return 1;
             }
+            if (g_is_interrupted) break;
             if (generate_response(ctx, smpl, n_predict)) {
                 return 1;
             }
-            images_fname.clear();
             content.clear();
             is_first_msg = false;
         }
 
@@ -590,7 +590,7 @@ int32_t mtmd_helper_eval(mtmd_context * ctx,
             }
 
         } else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
-            GGML_ASSERT(!is_last && "logits for last image chunk is not yet support");
+            GGML_ASSERT(!is_last && "logits for last image chunk is not yet supported");
             GGML_ASSERT(chunk.tokens_image != nullptr);
             int64_t t0 = ggml_time_ms();
             if (ctx->print_timings) {
 
@@ -154,7 +154,7 @@ The project is under active development, and we are [looking for feedback and co
 | `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate<br/>(env: LLAMA_ARG_SSL_CERT_FILE) |
 | `-to, --timeout N` | server read/write timeout in seconds (default: 600)<br/>(env: LLAMA_ARG_TIMEOUT) |
 | `--threads-http N` | number of threads used to process HTTP requests (default: -1)<br/>(env: LLAMA_ARG_THREADS_HTTP) |
-| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)<br/>(env: LLAMA_ARG_CACHE_REUSE) |
+| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)<br/>[(card)](https://ggml.ai/f0.png)<br/>(env: LLAMA_ARG_CACHE_REUSE) |
 | `--metrics` | enable prometheus compatible metrics endpoint (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_METRICS) |
 | `--slots` | enable slots monitoring endpoint (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_SLOTS) |
 | `--props` | enable changing global properties via POST /props (default: disabled)<br/>(env: LLAMA_ARG_ENDPOINT_PROPS) |
Original file line number	Diff line number	Diff line change
`@@ -590,7 +590,7 @@ int32_t mtmd_helper_eval(mtmd_context * ctx,`
`590`	`590`	`}`
`591`	`591`
`592`	`592`	`} else if (chunk.type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {`
`593`		`- GGML_ASSERT(!is_last && "logits for last image chunk is not yet support");`
	`593`	`+ GGML_ASSERT(!is_last && "logits for last image chunk is not yet supported");`
`594`	`594`	`GGML_ASSERT(chunk.tokens_image != nullptr);`
`595`	`595`	`int64_t t0 = ggml_time_ms();`
`596`	`596`	`if (ctx->print_timings) {`