From 9d755023acd09bcf2aef10d65c3a2673802bd7c1 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 00:58:18 +0200
Subject: [PATCH 1/8] Add the examples directory for function calling

---
 examples/simple-function-call/README.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 examples/simple-function-call/README.md
diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md
new file mode 100644
index 0000000000000..e69de29bb2d1d

From 52767e4c3e90f1a8fdad656dca491e75a021be99 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <k.log.r.bot@gmail.com>
Date: Mon, 14 Jul 2025 03:09:20 +0200
Subject: [PATCH 2/8] Added the llama-simple-function-call.cpp, README.md AND
 edited the CMakeLists.txts

---
 examples/CMakeLists.txt                       |   1 +
 examples/simple-function-call/CMakeLists.txt  |  14 +
 .../llama-simple-function-call.cpp            | 448 ++++++++++++++++++
 3 files changed, 463 insertions(+)
 create mode 100644 examples/simple-function-call/CMakeLists.txt
 create mode 100644 examples/simple-function-call/llama-simple-function-call.cpp

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index 49e4d2cf8c198..c969103ce704c 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -29,6 +29,7 @@ else()
     add_subdirectory(save-load-state)
     add_subdirectory(simple)
     add_subdirectory(simple-chat)
+    add_subdirectory(simple-function-call)
     add_subdirectory(speculative)
     add_subdirectory(speculative-simple)
     add_subdirectory(gen-docs)
diff --git a/examples/simple-function-call/CMakeLists.txt b/examples/simple-function-call/CMakeLists.txt
new file mode 100644
index 0000000000000..15aa09c2a6f32
--- /dev/null
+++ b/examples/simple-function-call/CMakeLists.txt
@@ -0,0 +1,14 @@
+set(TARGET simple-function-call)
+
+include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
+
+add_executable(${TARGET} llama-simple-function-call.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
+
+# Add include directories
+target_include_directories(${TARGET} PRIVATE ../llava)
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor)
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) 
\ No newline at end of file
diff --git a/examples/simple-function-call/llama-simple-function-call.cpp b/examples/simple-function-call/llama-simple-function-call.cpp
new file mode 100644
index 0000000000000..d9eecdc39fa44
--- /dev/null
+++ b/examples/simple-function-call/llama-simple-function-call.cpp
@@ -0,0 +1,448 @@
+#include "llama.h"
+#include "chat.h"
+#include "common.h"
+#include "sampling.h"
+#include "json.hpp"
+#include <cstdio>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <iostream>
+#include <memory>
+#include <cstdlib>
+#include <array>
+
+using json = nlohmann::json;
+
+// Forward declaration
+std::string execute_shell_command(const std::string& command);
+
+static void print_usage(int argc, char ** argv) {
+    (void)argc;  // Suppress unused parameter warning
+    (void)argv;  // Suppress unused parameter warning
+    printf("\nSimple Function Call Example - Real Shell Command Execution\n");
+    printf("\n");
+}
+
+// Real function to execute shell commands
+std::string execute_shell_command(const std::string& command) {
+    std::array<char, 128> buffer;
+    std::string result;
+    
+    // Use popen to execute the command
+    std::unique_ptr<FILE, int(*)(FILE*)> pipe(popen(command.c_str(), "r"), pclose);
+    if (!pipe) {
+        return "Error: Failed to execute command";
+    }
+    
+    // Read the output
+    while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
+        result += buffer.data();
+    }
+    
+    return result;
+}
+
+int main(int argc, char ** argv) {
+    // path to the model gguf file
+    std::string model_path;
+    // prompt to generate text from
+    std::string prompt;
+    // number of layers to offload to the GPU
+    int ngl = 99;
+    // number of tokens to predict
+    int n_predict = 256;
+    // chat template file
+    std::string chat_template_file;
+    // grammar constraint
+    std::string grammar;
+    // confirmation flag
+    bool confirm_commands = false;
+
+    // parse command line arguments
+    {
+        int i = 1;
+        for (; i < argc; i++) {
+            if (strcmp(argv[i], "-m") == 0) {
+                if (i + 1 < argc) {
+                    model_path = argv[++i];
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "-p") == 0) {
+                if (i + 1 < argc) {
+                    prompt = argv[++i];
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "-n") == 0) {
+                if (i + 1 < argc) {
+                    try {
+                        n_predict = std::stoi(argv[++i]);
+                    } catch (...) {
+                        print_usage(argc, argv);
+                        return 1;
+                    }
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "-ngl") == 0) {
+                if (i + 1 < argc) {
+                    try {
+                        ngl = std::stoi(argv[++i]);
+                    } catch (...) {
+                        print_usage(argc, argv);
+                        return 1;
+                    }
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "--chat-template-file") == 0) {
+                if (i + 1 < argc) {
+                    chat_template_file = argv[++i];
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "--grammar") == 0) {
+                if (i + 1 < argc) {
+                    grammar = argv[++i];
+                } else {
+                    print_usage(argc, argv);
+                    return 1;
+                }
+            } else if (strcmp(argv[i], "--confirm") == 0) {
+                confirm_commands = true;
+            } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
+                print_usage(argc, argv);
+                return 0;
+            } else {
+                fprintf(stderr, "Unknown argument: %s\n", argv[i]);
+                print_usage(argc, argv);
+                return 1;
+            }
+        }
+        
+        if (model_path.empty()) {
+            fprintf(stderr, "Error: Model file (-m) is required\n");
+            print_usage(argc, argv);
+            return 1;
+        }
+        
+        if (prompt.empty()) {
+            fprintf(stderr, "Error: Prompt (-p) is required\n");
+            print_usage(argc, argv);
+            return 1;
+        }
+    }
+
+    printf("Simple Function Call Example\n");
+    printf("Model: %s\n", model_path.c_str());
+    printf("Prompt: %s\n", prompt.c_str());
+    printf("GPU layers: %d\n", ngl);
+    printf("Max tokens: %d\n", n_predict);
+    if (!chat_template_file.empty()) {
+        printf("Chat template: %s\n", chat_template_file.c_str());
+    }
+    if (!grammar.empty()) {
+        printf("Grammar: %s\n", grammar.c_str());
+    }
+    if (confirm_commands) {
+        printf("Command confirmation: enabled\n");
+    }
+    printf("\n");
+
+    // load dynamic backends
+    ggml_backend_load_all();
+
+    // initialize the model
+    llama_model_params model_params = llama_model_default_params();
+    model_params.n_gpu_layers = ngl;
+
+    llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params);
+
+    if (model == NULL) {
+        fprintf(stderr, "%s: error: unable to load model\n", __func__);
+        return 1;
+    }
+
+    const llama_vocab * vocab = llama_model_get_vocab(model);
+
+    // initialize the context
+    llama_context_params ctx_params = llama_context_default_params();
+    // n_ctx is the context size
+    ctx_params.n_ctx = 2048;
+    // n_batch is the maximum number of tokens that can be processed in a single call to llama_decode
+    ctx_params.n_batch = 512;
+    // enable performance counters
+    ctx_params.no_perf = false;
+
+    llama_context * ctx = llama_init_from_model(model, ctx_params);
+
+    if (ctx == NULL) {
+        fprintf(stderr, "%s: error: failed to create the llama_context\n", __func__);
+        return 1;
+    }
+
+    // Initialize chat templates for function calling
+    common_chat_templates_ptr chat_templates = common_chat_templates_init(model, chat_template_file);
+
+    // Define available functions/tools - single shell command tool
+    std::vector<common_chat_tool> tools = {
+        {
+            "shell_command",
+            "Execute a shell command and return the output",
+            R"({
+                "type": "object",
+                "properties": {
+                    "command": {
+                        "type": "string",
+                        "description": "The shell command to execute"
+                    }
+                },
+                "required": ["command"]
+            })"
+        }
+    };
+
+    // Create chat messages
+    std::vector<common_chat_msg> messages = {
+        {
+            "system",
+            "You are a helpful assistant that can execute shell commands. When the user asks for something that requires a command, generate and execute the appropriate shell command. Be careful and only execute safe commands.",
+            {},  // content_parts
+            {},  // tool_calls
+            "",  // reasoning_content
+            "",  // tool_name
+            ""   // tool_call_id
+        },
+        {
+            "user",
+            prompt,
+            {},  // content_parts
+            {},  // tool_calls
+            "",  // reasoning_content
+            "",  // tool_name
+            ""   // tool_call_id
+        }
+    };
+
+    // Set up chat template inputs with tools
+    common_chat_templates_inputs inputs;
+    inputs.messages = messages;
+    inputs.tools = tools;
+    inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+    inputs.add_generation_prompt = true;
+    inputs.use_jinja = true;
+
+    // Apply chat template
+    auto chat_params = common_chat_templates_apply(chat_templates.get(), inputs);
+
+    // Tokenize the prompt
+    const int n_prompt = -llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), NULL, 0, true, true);
+
+    // allocate space for the tokens and tokenize the prompt
+    std::vector<llama_token> prompt_tokens(n_prompt);
+    if (llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true, true) < 0) {
+        fprintf(stderr, "%s: error: failed to tokenize the prompt\n", __func__);
+        return 1;
+    }
+
+    // prepare a batch for the prompt
+    llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
+
+    // initialize the sampler
+    auto sparams = llama_sampler_chain_default_params();
+    sparams.no_perf = false;
+    llama_sampler * smpl = llama_sampler_chain_init(sparams);
+
+    llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
+
+    // main loop
+    const auto t_main_start = ggml_time_us();
+    int n_decode = 0;
+    llama_token new_token_id;
+    std::string response_text;
+
+    for (int n_pos = 0; n_pos + batch.n_tokens < n_prompt + n_predict; ) {
+        // evaluate the current batch with the transformer model
+        if (llama_decode(ctx, batch)) {
+            fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, 1);
+            return 1;
+        }
+
+        n_pos += batch.n_tokens;
+
+        // sample the next token
+        {
+            new_token_id = llama_sampler_sample(smpl, ctx, -1);
+
+            // is it an end of generation?
+            if (llama_vocab_is_eog(vocab, new_token_id)) {
+                break;
+            }
+
+            char buf[128];
+            int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
+            if (n < 0) {
+                fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
+                return 1;
+            }
+            std::string s(buf, n);
+            response_text += s;
+            printf("%s", s.c_str());
+            fflush(stdout);
+
+            // prepare the next batch with the sampled token
+            batch = llama_batch_get_one(&new_token_id, 1);
+
+            n_decode += 1;
+        }
+    }
+
+    printf("\n\n");
+
+    // Parse the response to check for function calls
+    common_chat_syntax syntax;
+    syntax.format = chat_params.format;
+    syntax.parse_tool_calls = true;
+    
+    common_chat_msg parsed_response = common_chat_parse(response_text, false, syntax);
+
+    // Handle function calls if any
+    if (!parsed_response.tool_calls.empty()) {
+        printf("Function calls detected:\n");
+        for (const auto& tool_call : parsed_response.tool_calls) {
+            printf("  Function: %s\n", tool_call.name.c_str());
+            printf("  Arguments: %s\n", tool_call.arguments.c_str());
+            
+            // Execute the function
+            if (tool_call.name == "shell_command") {
+                try {
+                    // Parse JSON arguments
+                    json args = json::parse(tool_call.arguments);
+                    std::string command = args["command"];
+                    
+                    printf("  Command: %s\n", command.c_str());
+                    
+                    // Ask for confirmation if enabled
+                    if (confirm_commands) {
+                        printf("  Execute this command? (y/N): ");
+                        std::string response;
+                        std::getline(std::cin, response);
+                        if (response != "y" && response != "Y") {
+                            printf("  Command execution cancelled.\n");
+                            continue;
+                        }
+                    }
+                    
+                    // Execute the command
+                    std::string result = execute_shell_command(command);
+                    printf("  Result:\n%s", result.c_str());
+                    
+                    // Add the result to the conversation and continue
+                    messages.push_back({
+                        "assistant",
+                        response_text,
+                        {},  // content_parts
+                        {},  // tool_calls
+                        "",  // reasoning_content
+                        "",  // tool_name
+                        ""   // tool_call_id
+                    });
+                    messages.push_back({
+                        "tool",
+                        result,
+                        {},  // content_parts
+                        {},  // tool_calls
+                        "",  // reasoning_content
+                        "",  // tool_name
+                        tool_call.id
+                    });
+                    
+                    // Continue the conversation with the result
+                    printf("\nContinuing conversation with command result...\n");
+                    
+                    // Set up new chat template inputs
+                    common_chat_templates_inputs new_inputs;
+                    new_inputs.messages = messages;
+                    new_inputs.tools = tools;
+                    new_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
+                    new_inputs.add_generation_prompt = true;
+                    new_inputs.use_jinja = true;
+                    
+                    // Apply chat template for continuation
+                    auto new_chat_params = common_chat_templates_apply(chat_templates.get(), new_inputs);
+                    
+                    // Tokenize the new prompt
+                    const int n_new_prompt = -llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), NULL, 0, true, true);
+                    std::vector<llama_token> new_prompt_tokens(n_new_prompt);
+                    if (llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), new_prompt_tokens.data(), new_prompt_tokens.size(), true, true) < 0) {
+                        fprintf(stderr, "%s: error: failed to tokenize the continuation prompt\n", __func__);
+                        return 1;
+                    }
+                    
+                    // Continue generation
+                    batch = llama_batch_get_one(new_prompt_tokens.data(), new_prompt_tokens.size());
+                    std::string continuation_text;
+                    
+                    for (int n_pos = 0; n_pos + batch.n_tokens < n_new_prompt + n_predict; ) {
+                        if (llama_decode(ctx, batch)) {
+                            fprintf(stderr, "%s : failed to eval continuation, return code %d\n", __func__, 1);
+                            return 1;
+                        }
+                        
+                        n_pos += batch.n_tokens;
+                        
+                        new_token_id = llama_sampler_sample(smpl, ctx, -1);
+                        
+                        if (llama_vocab_is_eog(vocab, new_token_id)) {
+                            break;
+                        }
+                        
+                        char buf[128];
+                        int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
+                        if (n < 0) {
+                            fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
+                            return 1;
+                        }
+                        std::string s(buf, n);
+                        continuation_text += s;
+                        printf("%s", s.c_str());
+                        fflush(stdout);
+                        
+                        batch = llama_batch_get_one(&new_token_id, 1);
+                        n_decode += 1;
+                    }
+                    
+                    printf("\n");
+                    
+                } catch (const std::exception& e) {
+                    printf("  Error parsing arguments: %s\n", e.what());
+                }
+            }
+        }
+    } else if (!parsed_response.content.empty()) {
+        printf("Response: %s\n", parsed_response.content.c_str());
+    }
+
+    const auto t_main_end = ggml_time_us();
+
+    fprintf(stderr, "%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
+            __func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
+
+    fprintf(stderr, "\n");
+    llama_perf_sampler_print(smpl);
+    llama_perf_context_print(ctx);
+    fprintf(stderr, "\n");
+
+    llama_sampler_free(smpl);
+    llama_free(ctx);
+    llama_model_free(model);
+
+    return 0;
+}
\ No newline at end of file

From 65f3cd4987921ebf06058d44fe33a281cc3187ad Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 13:00:52 +0200
Subject: [PATCH 3/8] Update CMakeLists.txt

---
 examples/simple-function-call/CMakeLists.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/simple-function-call/CMakeLists.txt b/examples/simple-function-call/CMakeLists.txt
index 15aa09c2a6f32..4f6057fed2801 100644
--- a/examples/simple-function-call/CMakeLists.txt
+++ b/examples/simple-function-call/CMakeLists.txt
@@ -1,8 +1,8 @@
-set(TARGET simple-function-call)
+set(TARGET llama-simple-function-call)
 
 include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
 
-add_executable(${TARGET} llama-simple-function-call.cpp)
+add_executable(${TARGET} simple-function-call.cpp)
 install(TARGETS ${TARGET} RUNTIME)
 target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(${TARGET} PRIVATE cxx_std_17)
@@ -11,4 +11,4 @@ target_compile_features(${TARGET} PRIVATE cxx_std_17)
 target_include_directories(${TARGET} PRIVATE ../llava)
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR})
 target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor)
-target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) 
\ No newline at end of file
+target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) 

From 0f3e60bdd583adf6eee6b132e85087293a72ef29 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 13:01:39 +0200
Subject: [PATCH 4/8] Rename llama-simple-function-call.cpp to
 simple-function-call.cpp

---
 ...{llama-simple-function-call.cpp => simple-function-call.cpp} | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
 rename examples/simple-function-call/{llama-simple-function-call.cpp => simple-function-call.cpp} (99%)

diff --git a/examples/simple-function-call/llama-simple-function-call.cpp b/examples/simple-function-call/simple-function-call.cpp
similarity index 99%
rename from examples/simple-function-call/llama-simple-function-call.cpp
rename to examples/simple-function-call/simple-function-call.cpp
index d9eecdc39fa44..575be6741248c 100644
--- a/examples/simple-function-call/llama-simple-function-call.cpp
+++ b/examples/simple-function-call/simple-function-call.cpp
@@ -445,4 +445,4 @@ int main(int argc, char ** argv) {
     llama_model_free(model);
 
     return 0;
-}
\ No newline at end of file
+}

From d8bd37855752710c447ee99308f981e9409697a0 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 13:23:45 +0200
Subject: [PATCH 5/8] Update README.md

---
 examples/simple-function-call/README.md | 203 ++++++++++++++++++++++++
 1 file changed, 203 insertions(+)

diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md
index e69de29bb2d1d..7bf6151af636e 100644
--- a/examples/simple-function-call/README.md
+++ b/examples/simple-function-call/README.md
@@ -0,0 +1,203 @@
+# Simple Function Call Example
+
+A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution.
+
+## What This Is
+
+- Users input natural language requests
+- The LLM generates appropriate shell commands using function calling
+- Commands are executed and results returned
+- The conversation continues with real command output
+
+## How It Works
+
+### Architecture
+- **Standalone executable** - no server architecture needed
+- **Direct function calling** within the same process
+- **Real shell command execution** via `popen()`
+- **JSON parsing** of LLM tool calls
+
+### Tool Schema
+The LLM has access to a single tool:
+```json
+{
+  "name": "shell_command",
+  "description": "Execute a shell command and return the output",
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "command": {
+        "type": "string",
+        "description": "The shell command to execute"
+      }
+    },
+    "required": ["command"]
+  }
+}
+```
+
+### Basic Usage
+```bash
+./simple-function-call -m model.gguf -p "your request here"
+```
+
+### Command Line Arguments
+- `--jinja` Is enabled by default
+- `-m model.gguf` - Model file path (REQUIRED)
+- `-p "prompt"` - User's request/command (REQUIRED)
+- `--chat-template-file template.jinja` - Optional chat template override
+- `--grammar "grammar"` - Optional grammar constraint
+- `-ngl N` - Number of GPU layers
+- `-n N` - Maximum number of tokens to generate
+- `--confirm` - Ask for confirmation before executing commands
+
+### Examples
+
+```bash
+# List files in current directory
+./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution
+
+{"name": "shell_command", "parameters": {"command": "ls -l"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"ls -l"}
+  Command: ls -l
+  Result:
+total 812
+-rw-r--r--  1 user user  47860 Jul 13 00:09 AUTHORS
+drwxr-xr-x 12 user user   4096 Jul 13 00:09 build
+-rwxr-xr-x  1 user user  21760 Jul 13 00:09 build-xcframework.sh
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 ci
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 cmake
+-rw-r--r--  1 user user   7973 Jul 13 00:09 CMakeLists.txt
+-rw-r--r--  1 user user   4008 Jul 13 00:09 CMakePresets.json
+-rw-r--r--  1 user user    434 Jul 13 00:09 CODEOWNERS
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 common
+-rw-r--r--  1 user user   6510 Jul 13 00:09 CONTRIBUTING.md
+-rwxr-xr-x  1 user user 317736 Jul 13 00:09 convert_hf_to_gguf.py
+-rwxr-xr-x  1 user user  21163 Jul 13 00:09 convert_hf_to_gguf_update.py
+-rwxr-xr-x  1 user user  19106 Jul 13 00:09 convert_llama_ggml_to_gguf.py
+-rwxr-xr-x  1 user user  18624 Jul 13 00:09 convert_lora_to_gguf.py
+drwxr-xr-x  5 user user   4096 Jul 13 00:09 docs
+drwxr-xr-x 29 user user   4096 Jul 13 00:09 examples
+-rw-r--r--  1 user user   1556 Jul 13 00:09 flake.lock
+-rw-r--r--  1 user user   7465 Jul 13 00:09 flake.nix
+drwxr-xr-x  5 user user   4096 Jul 13 00:09 ggml
+drwxr-xr-x  5 user user   4096 Jul 13 00:09 gguf-py
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 grammars
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 include
+-rw-r--r--  1 user user   1078 Jul 13 00:09 LICENSE
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 licenses
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 llamacppos
+-rw-r--r--  1 user user  50442 Jul 13 00:09 Makefile
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 media
+drwxr-xr-x  3 user user   4096 Jul 13 00:09 models
+-rw-r--r--  1 user user    163 Jul 13 00:09 mypy.ini
+drwxr-xr-x  3 user user   4096 Jul 13 00:09 pocs
+-rw-r--r--  1 user user 124786 Jul 13 00:09 poetry.lock
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 prompts
+-rw-r--r--  1 user user   1336 Jul 13 00:09 pyproject.toml
+-rw-r--r--  1 user user    616 Jul 13 00:09 pyrightconfig.json
+-rw-r--r--  1 user user  29793 Jul 13 00:09 README.md
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 requirements
+-rw-r--r--  1 user user    551 Jul 13 00:09 requirements.txt
+drwxr-xr-x  3 user user   4096 Jul 13 00:09 scripts
+-rw-r--r--  1 user user   5347 Jul 13 00:09 SECURITY.md
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 src
+drwxr-xr-x  2 user user   4096 Jul 13 00:09 tests
+drwxr-xr-x 18 user user   4096 Jul 13 00:09 tools
+drwxr-xr-x  8 user user   4096 Jul 13 00:09 vendor
+-rw-r--r--  1 user user   1165 Jul 13 00:09 windows-compat-loop.cpp
+
+Continuing conversation with command result...
+/home/user/llamacomp/llama.cpp/src/llama-context.cpp:919: GGML_ASSERT(n_tokens_all <= cparams.n_batch) failed
+/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(+0x12ff6) [0x7fdfb47bdff6]
+/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_print_backtrace+0x204) [0x7fdfb47be434]
+/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_abort+0x130) [0x7fdfb47be5d0]
+/home/user/llamacomp/llama.cpp/build/bin/libllama.so(_ZN13llama_context6decodeERK11llama_batch+0x14c3) [0x7fdfb4a16223]
+/home/user/llamacomp/llama.cpp/build/bin/libllama.so(llama_decode+0xe) [0x7fdfb4a1632e]
+./build/bin/simple-function-call(+0x35ba6) [0x557f2b789ba6]
+/usr/lib/libc.so.6(+0x276b5) [0x7fdfb41126b5]
+/usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769]
+./build/bin/simple-function-call(+0x37615) [0x557f2b78b615]
+Aborted (core dumped)
+
+
+# Check system information
+./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON
+{
+    "type": "function",
+    "function": {
+        "name": "shell_command",
+        "description": "Execute a shell command and return the output",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "description": "The shell command to execute"
+                }
+            },
+            "required": [
+                "command"
+            ]
+        }
+    },
+    "parameters": {
+        "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')"
+    }
+}
+
+Response:
+{
+    "type": "function",
+    "function": {
+        "name": "shell_command",
+        "description": "Execute a shell command and return the output",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "description": "The shell command to execute"
+                }
+            },
+            "required": [
+                "command"
+            ]
+        }
+    },
+    "parameters": { # Pay attention here
+        "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')"
+    } # The command in tried to use was probably too complex for the actual tool
+}
+main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results
+
+# You'll have less of these types of problems but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
+./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command
+
+{"type": "function", "name": "shell_command", "parameters": {"command": "date"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"date"}
+  Command: date
+  Result:
+Sun Jul 13 11:24:16 PM CEST 2025
+
+Continuing conversation with command result...
+<|python_tag|>{"type": "function", "name": "shell_command", "parameters": {"command": "date"}}
+main: decoded 45 tokens in 6.26 s, speed: 7.19 t/s
+
+# With confirmation ( For safety )
+./simple-function-call -m llama-3.2-1b-instruct.gguf -p "delete all .tmp files" --confirm
+
+# Using a specific chat template for better function calling
+./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \
+    --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
+
+# Using a model with native function calling support
+./simple-function-call -m llama-3.1-8b-instruct.gguf -p "check disk usage" \
+    --chat-template-file models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
+```

From a495196ebe9c399fe01755c2f3131e3bf75b6c58 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 13:27:00 +0200
Subject: [PATCH 6/8] Update README.md

---
 examples/simple-function-call/README.md | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md
index 7bf6151af636e..e1bc66b4fbe73 100644
--- a/examples/simple-function-call/README.md
+++ b/examples/simple-function-call/README.md
@@ -37,10 +37,9 @@ The LLM has access to a single tool:
 ```
 
 ### Basic Usage
-```bash
+```
 ./simple-function-call -m model.gguf -p "your request here"
 ```
-
 ### Command Line Arguments
 - `--jinja` Is enabled by default
 - `-m model.gguf` - Model file path (REQUIRED)
@@ -53,8 +52,9 @@ The LLM has access to a single tool:
 
 ### Examples
 
-```bash
+
 # List files in current directory
+```
 ./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution
 
 {"name": "shell_command", "parameters": {"command": "ls -l"}}
@@ -122,9 +122,11 @@ Continuing conversation with command result...
 /usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769]
 ./build/bin/simple-function-call(+0x37615) [0x557f2b78b615]
 Aborted (core dumped)
-
+```
 
 # Check system information
+
+```
 ./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON
 {
     "type": "function",
@@ -173,8 +175,9 @@ Response:
     } # The command in tried to use was probably too complex for the actual tool
 }
 main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results
-
-# You'll have less of these types of problems but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
+```
+# You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
+```
 ./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command
 
 {"type": "function", "name": "shell_command", "parameters": {"command": "date"}}

From 8291573bad0f397a110df20a65a820a5716e75f8 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <klog.website.notdotcom@gmail.com>
Date: Mon, 14 Jul 2025 13:33:16 +0200
Subject: [PATCH 7/8] Update README.md

---
 examples/simple-function-call/README.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md
index e1bc66b4fbe73..2fbbfba9c4f34 100644
--- a/examples/simple-function-call/README.md
+++ b/examples/simple-function-call/README.md
@@ -52,8 +52,6 @@ The LLM has access to a single tool:
 
 ### Examples
 
-
-# List files in current directory
 ```
 ./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution
 
@@ -127,7 +125,7 @@ Aborted (core dumped)
 # Check system information
 
 ```
-./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON
+./simple-function-call -m llama-3.2-1b.gguf -p "check the current time" # might produce some JSON
 {
     "type": "function",
     "function": {
@@ -172,11 +170,12 @@ Response:
     },
     "parameters": { # Pay attention here
         "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')"
-    } # The command in tried to use was probably too complex for the actual tool
-}
+    } # The command it tried to use was probably too complex for the actual tool
+} # A better parser or an LLM would've done a better job
+
 main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results
 ```
-# You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
+### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
 ```
 ./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command
 

From 102534160b565ddfb29a85ca8ba0c384a38b9617 Mon Sep 17 00:00:00 2001
From: K_log Televised - youtube <k.log.r.bot@gmail.com>
Date: Mon, 14 Jul 2025 19:35:48 +0200
Subject: [PATCH 8/8] Cleaned up / improved / simplified the README and the
 implementation, added some comments

---
 examples/simple-function-call/README.md       | 465 ++++++++++--------
 .../simple-function-call.cpp                  | 218 ++++----
 2 files changed, 372 insertions(+), 311 deletions(-)

diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md
index 2fbbfba9c4f34..46972492b915d 100644
--- a/examples/simple-function-call/README.md
+++ b/examples/simple-function-call/README.md
@@ -1,205 +1,260 @@
-# Simple Function Call Example
-
-A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution.
-
-## What This Is
-
-- Users input natural language requests
-- The LLM generates appropriate shell commands using function calling
-- Commands are executed and results returned
-- The conversation continues with real command output
-
-## How It Works
-
-### Architecture
-- **Standalone executable** - no server architecture needed
-- **Direct function calling** within the same process
-- **Real shell command execution** via `popen()`
-- **JSON parsing** of LLM tool calls
-
-### Tool Schema
-The LLM has access to a single tool:
-```json
-{
-  "name": "shell_command",
-  "description": "Execute a shell command and return the output",
-  "parameters": {
-    "type": "object",
-    "properties": {
-      "command": {
-        "type": "string",
-        "description": "The shell command to execute"
-      }
-    },
-    "required": ["command"]
-  }
-}
-```
-
-### Basic Usage
-```
-./simple-function-call -m model.gguf -p "your request here"
-```
-### Command Line Arguments
-- `--jinja` Is enabled by default
-- `-m model.gguf` - Model file path (REQUIRED)
-- `-p "prompt"` - User's request/command (REQUIRED)
-- `--chat-template-file template.jinja` - Optional chat template override
-- `--grammar "grammar"` - Optional grammar constraint
-- `-ngl N` - Number of GPU layers
-- `-n N` - Maximum number of tokens to generate
-- `--confirm` - Ask for confirmation before executing commands
-
-### Examples
-
-```
-./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution
-
-{"name": "shell_command", "parameters": {"command": "ls -l"}}
-
-Function calls detected:
-  Function: shell_command
-  Arguments: {"command":"ls -l"}
-  Command: ls -l
-  Result:
-total 812
--rw-r--r--  1 user user  47860 Jul 13 00:09 AUTHORS
-drwxr-xr-x 12 user user   4096 Jul 13 00:09 build
--rwxr-xr-x  1 user user  21760 Jul 13 00:09 build-xcframework.sh
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 ci
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 cmake
--rw-r--r--  1 user user   7973 Jul 13 00:09 CMakeLists.txt
--rw-r--r--  1 user user   4008 Jul 13 00:09 CMakePresets.json
--rw-r--r--  1 user user    434 Jul 13 00:09 CODEOWNERS
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 common
--rw-r--r--  1 user user   6510 Jul 13 00:09 CONTRIBUTING.md
--rwxr-xr-x  1 user user 317736 Jul 13 00:09 convert_hf_to_gguf.py
--rwxr-xr-x  1 user user  21163 Jul 13 00:09 convert_hf_to_gguf_update.py
--rwxr-xr-x  1 user user  19106 Jul 13 00:09 convert_llama_ggml_to_gguf.py
--rwxr-xr-x  1 user user  18624 Jul 13 00:09 convert_lora_to_gguf.py
-drwxr-xr-x  5 user user   4096 Jul 13 00:09 docs
-drwxr-xr-x 29 user user   4096 Jul 13 00:09 examples
--rw-r--r--  1 user user   1556 Jul 13 00:09 flake.lock
--rw-r--r--  1 user user   7465 Jul 13 00:09 flake.nix
-drwxr-xr-x  5 user user   4096 Jul 13 00:09 ggml
-drwxr-xr-x  5 user user   4096 Jul 13 00:09 gguf-py
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 grammars
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 include
--rw-r--r--  1 user user   1078 Jul 13 00:09 LICENSE
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 licenses
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 llamacppos
--rw-r--r--  1 user user  50442 Jul 13 00:09 Makefile
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 media
-drwxr-xr-x  3 user user   4096 Jul 13 00:09 models
--rw-r--r--  1 user user    163 Jul 13 00:09 mypy.ini
-drwxr-xr-x  3 user user   4096 Jul 13 00:09 pocs
--rw-r--r--  1 user user 124786 Jul 13 00:09 poetry.lock
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 prompts
--rw-r--r--  1 user user   1336 Jul 13 00:09 pyproject.toml
--rw-r--r--  1 user user    616 Jul 13 00:09 pyrightconfig.json
--rw-r--r--  1 user user  29793 Jul 13 00:09 README.md
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 requirements
--rw-r--r--  1 user user    551 Jul 13 00:09 requirements.txt
-drwxr-xr-x  3 user user   4096 Jul 13 00:09 scripts
--rw-r--r--  1 user user   5347 Jul 13 00:09 SECURITY.md
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 src
-drwxr-xr-x  2 user user   4096 Jul 13 00:09 tests
-drwxr-xr-x 18 user user   4096 Jul 13 00:09 tools
-drwxr-xr-x  8 user user   4096 Jul 13 00:09 vendor
--rw-r--r--  1 user user   1165 Jul 13 00:09 windows-compat-loop.cpp
-
-Continuing conversation with command result...
-/home/user/llamacomp/llama.cpp/src/llama-context.cpp:919: GGML_ASSERT(n_tokens_all <= cparams.n_batch) failed
-/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(+0x12ff6) [0x7fdfb47bdff6]
-/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_print_backtrace+0x204) [0x7fdfb47be434]
-/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_abort+0x130) [0x7fdfb47be5d0]
-/home/user/llamacomp/llama.cpp/build/bin/libllama.so(_ZN13llama_context6decodeERK11llama_batch+0x14c3) [0x7fdfb4a16223]
-/home/user/llamacomp/llama.cpp/build/bin/libllama.so(llama_decode+0xe) [0x7fdfb4a1632e]
-./build/bin/simple-function-call(+0x35ba6) [0x557f2b789ba6]
-/usr/lib/libc.so.6(+0x276b5) [0x7fdfb41126b5]
-/usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769]
-./build/bin/simple-function-call(+0x37615) [0x557f2b78b615]
-Aborted (core dumped)
-```
-
-# Check system information
-
-```
-./simple-function-call -m llama-3.2-1b.gguf -p "check the current time" # might produce some JSON
-{
-    "type": "function",
-    "function": {
-        "name": "shell_command",
-        "description": "Execute a shell command and return the output",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "command": {
-                    "type": "string",
-                    "description": "The shell command to execute"
-                }
-            },
-            "required": [
-                "command"
-            ]
-        }
-    },
-    "parameters": {
-        "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')"
-    }
-}
-
-Response:
-{
-    "type": "function",
-    "function": {
-        "name": "shell_command",
-        "description": "Execute a shell command and return the output",
-        "parameters": {
-            "type": "object",
-            "properties": {
-                "command": {
-                    "type": "string",
-                    "description": "The shell command to execute"
-                }
-            },
-            "required": [
-                "command"
-            ]
-        }
-    },
-    "parameters": { # Pay attention here
-        "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')"
-    } # The command it tried to use was probably too complex for the actual tool
-} # A better parser or an LLM would've done a better job
-
-main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results
-```
-### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
-```
-./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command
-
-{"type": "function", "name": "shell_command", "parameters": {"command": "date"}}
-
-Function calls detected:
-  Function: shell_command
-  Arguments: {"command":"date"}
-  Command: date
-  Result:
-Sun Jul 13 11:24:16 PM CEST 2025
-
-Continuing conversation with command result...
-<|python_tag|>{"type": "function", "name": "shell_command", "parameters": {"command": "date"}}
-main: decoded 45 tokens in 6.26 s, speed: 7.19 t/s
-
-# With confirmation ( For safety )
-./simple-function-call -m llama-3.2-1b-instruct.gguf -p "delete all .tmp files" --confirm
-
-# Using a specific chat template for better function calling
-./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \
-    --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
-
-# Using a model with native function calling support
-./simple-function-call -m llama-3.1-8b-instruct.gguf -p "check disk usage" \
-    --chat-template-file models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja
-```
+# Simple Function Call Example
+
+A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution.
+
+## What This Is
+
+- Users input natural language requests
+- The LLM generates appropriate shell commands using function calling
+- Commands are executed and results returned
+- The conversation continues with real command output
+
+## How It Works
+
+### Architecture
+- **Standalone executable** - no server architecture needed
+- **Direct function calling** within the same process
+- **Real shell command execution** via `popen()`
+- **JSON parsing** of LLM tool calls
+
+##### Tool Schema
+The LLM has access to a single tool:
+```json
+{
+  "name": "shell_command",
+  "description": "Execute a shell command and return the output",
+  "parameters": {
+    "type": "object",
+    "properties": {
+      "command": {
+        "type": "string",
+        "description": "The shell command to execute"
+      }
+    },
+    "required": ["command"]
+  }
+}
+```
+
+### Basic Usage
+```
+./simple-function-call -m model.gguf -p "your request here"
+```
+### Command Line Arguments
+- `--jinja` Is enabled by default
+- `-m model.gguf` - Model file path (REQUIRED)
+- `-p "prompt"` - User's request/command (REQUIRED)
+- `--chat-template-file template.jinja` - Optional chat template override
+- `--grammar "grammar"` - Optional grammar constraint
+- `-ngl N` - Number of GPU layers
+- `-n N` - Maximum number of tokens to generate
+- `--confirm` - Ask for confirmation before executing commands
+
+### Examples
+
+```
+./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Make a directory llamacppchaos"
+Simple Function Call Example
+Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf
+Prompt: Make a directory llamacppchaos
+GPU layers: 99
+Max tokens: 256
+
+llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
+...
+...
+...
+llama_context:        CPU compute buffer size =   254.50 MiB
+llama_context: graph nodes  = 582
+llama_context: graph splits = 1
+{"type": "function", "name": "shell_command", "parameters": {"command": "mkdir llamacppchaos"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"mkdir llamacppchaos"}
+  Command: mkdir llamacppchaos
+  Result:
+
+```
+
+```
+./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "List all the files in the current directory"
+Simple Function Call Example
+Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf
+Prompt: List all the files in the current directory
+GPU layers: 99
+Max tokens: 256
+
+llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
+...
+...
+...
+llama_context:        CPU compute buffer size =   254.50 MiB
+llama_context: graph nodes  = 582
+llama_context: graph splits = 1
+
+{"name": "shell_command", "parameters": {"command": "ls -l"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"ls -l"}
+  Command: ls -l
+  Result:
+total 840
+-rw-r--r--  1 user user  47860 Jul 14 19:12 AUTHORS
+drwxr-xr-x 12 user user   4096 Jul 14 19:13 build
+-rwxr-xr-x  1 user user  21760 Jul 14 19:12 build-xcframework.sh
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 ci
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 cmake
+-rw-r--r--  1 user user   7973 Jul 14 19:12 CMakeLists.txt
+-rw-r--r--  1 user user   4570 Jul 14 19:12 CMakePresets.json
+-rw-r--r--  1 user user    434 Jul 14 19:12 CODEOWNERS
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 common
+-rw-r--r--  1 user user   6510 Jul 14 19:12 CONTRIBUTING.md
+-rwxr-xr-x  1 user user 344837 Jul 14 19:12 convert_hf_to_gguf.py
+-rwxr-xr-x  1 user user  22622 Jul 14 19:12 convert_hf_to_gguf_update.py
+-rwxr-xr-x  1 user user  19106 Jul 14 19:12 convert_llama_ggml_to_gguf.py
+-rwxr-xr-x  1 user user  18624 Jul 14 19:12 convert_lora_to_gguf.py
+drwxr-xr-x  6 user user   4096 Jul 14 19:12 docs
+drwxr-xr-x 29 user user   4096 Jul 14 19:12 examples
+-rw-r--r--  1 user user   1556 Jul 14 19:12 flake.lock
+-rw-r--r--  1 user user   7465 Jul 14 19:12 flake.nix
+drwxr-xr-x  5 user user   4096 Jul 14 19:12 ggml
+drwxr-xr-x  5 user user   4096 Jul 14 19:12 gguf-py
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 grammars
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 include
+-rw-r--r--  1 user user   1078 Jul 14 19:12 LICENSE
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 licenses
+drwxr-xr-x  2 user user   4096 Jul 14 19:15 llamacppchaos
+-rw-r--r--  1 user user  50442 Jul 14 19:12 Makefile
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 media
+drwxr-xr-x  3 user user   4096 Jul 14 19:12 models
+-rw-r--r--  1 user user    163 Jul 14 19:12 mypy.ini
+drwxr-xr-x  3 user user   4096 Jul 14 19:12 pocs
+-rw-r--r--  1 user user 124786 Jul 14 19:12 poetry.lock
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 prompts
+-rw-r--r--  1 user user   1336 Jul 14 19:12 pyproject.toml
+-rw-r--r--  1 user user    616 Jul 14 19:12 pyrightconfig.json
+-rw-r--r--  1 user user  29598 Jul 14 19:12 README.md
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 requirements
+-rw-r--r--  1 user user    551 Jul 14 19:12 requirements.txt
+drwxr-xr-x  3 user user   4096 Jul 14 19:12 scripts
+-rw-r--r--  1 user user   5347 Jul 14 19:12 SECURITY.md
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 src
+drwxr-xr-x  2 user user   4096 Jul 14 19:12 tests
+drwxr-xr-x 17 user user   4096 Jul 14 19:12 tools
+drwxr-xr-x  7 user user   4096 Jul 14 19:12 vendor
+```
+
+### Where it might fail
+
+```
+ ./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Check the time"
+Simple Function Call Example
+Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf
+Prompt: Check the time
+GPU layers: 99
+Max tokens: 256
+
+llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
+
+...
+...
+...
+llama_context:        CPU compute buffer size =   254.50 MiB
+llama_context: graph nodes  = 582
+llama_context: graph splits = 1
+{
+    "type": "function",
+    "function": {
+        "name": "shell_command",
+        "description": "Check the time",
+        "parameters": {
+            "type": "string",
+            "description": "The shell command to execute"
+        }
+    },
+    "result": {
+        "type": "object",
+        "value": {
+            "time": "Current time"
+        }
+    }
+}
+
+Response: {
+    "type": "function",
+    "function": {
+        "name": "shell_command",
+        "description": "Check the time",
+        "parameters": {
+            "type": "string",
+            "description": "The shell command to execute"
+        }
+    },
+    "result": {
+        "type": "object",
+        "value": {
+            "time": "Current time"
+        }
+    }
+}
+```
+### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do
+```
+./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Check the current date"
+Simple Function Call Example
+Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf
+Prompt: Check the current date
+GPU layers: 99
+Max tokens: 256
+
+llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
+...
+...
+...
+llama_context:        CPU compute buffer size =   254.50 MiB
+llama_context: graph nodes  = 582
+llama_context: graph splits = 1
+{"type": "function", "name": "shell_command", "parameters": {"command": "date"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"date"}
+  Command: date
+  Result:
+Mon Jul 14 07:26:14 PM CEST 2025
+```
+# With confirmation ( For safety )
+```
+./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Remove directory llamacppchaos" --confirm
+Simple Function Call Example
+Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf
+Prompt: Remove directory llamacppchaos
+GPU layers: 99
+Max tokens: 256
+Command confirmation: enabled
+
+llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest))
+...
+...
+...
+llama_context:        CPU compute buffer size =   254.50 MiB
+llama_context: graph nodes  = 582
+llama_context: graph splits = 1
+{"type": "function", "name": "shell_command", "parameters": {"command": "rm -rf llamacppchaos"}}
+
+Function calls detected:
+  Function: shell_command
+  Arguments: {"command":"rm -rf llamacppchaos"}
+  Command: rm -rf llamacppchaos
+  Execute this command? (y/N): y
+  Result:
+
+```
+
+### Using a specific chat template for better function calling
+```
+./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \
+    --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja
+```
diff --git a/examples/simple-function-call/simple-function-call.cpp b/examples/simple-function-call/simple-function-call.cpp
index 575be6741248c..090abf41579bf 100644
--- a/examples/simple-function-call/simple-function-call.cpp
+++ b/examples/simple-function-call/simple-function-call.cpp
@@ -17,6 +17,10 @@ using json = nlohmann::json;
 // Forward declaration
 std::string execute_shell_command(const std::string& command);
 
+//=============================================================================
+// HELP/USAGE SECTION
+//=============================================================================
+
 static void print_usage(int argc, char ** argv) {
     (void)argc;  // Suppress unused parameter warning
     (void)argv;  // Suppress unused parameter warning
@@ -24,26 +28,40 @@ static void print_usage(int argc, char ** argv) {
     printf("\n");
 }
 
+//=============================================================================
+// SHELL COMMAND EXECUTION SECTION
+//=============================================================================
+
 // Real function to execute shell commands
+// Uses popen() to run system commands and capture their output
+// Returns the command output as a string
 std::string execute_shell_command(const std::string& command) {
     std::array<char, 128> buffer;
     std::string result;
-    
+
     // Use popen to execute the command
     std::unique_ptr<FILE, int(*)(FILE*)> pipe(popen(command.c_str(), "r"), pclose);
     if (!pipe) {
         return "Error: Failed to execute command";
     }
-    
+
     // Read the output
     while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
         result += buffer.data();
     }
-    
+
     return result;
 }
 
+//=============================================================================
+// MAIN PROGRAM ENTRY POINT
+//=============================================================================
+
 int main(int argc, char ** argv) {
+    //=========================================================================
+    // CONFIGURATION VARIABLES SECTION
+    //=========================================================================
+
     // path to the model gguf file
     std::string model_path;
     // prompt to generate text from
@@ -59,10 +77,15 @@ int main(int argc, char ** argv) {
     // confirmation flag
     bool confirm_commands = false;
 
+    //=========================================================================
+    // COMMAND LINE ARGUMENT PARSING SECTION
+    //=========================================================================
+
     // parse command line arguments
     {
         int i = 1;
         for (; i < argc; i++) {
+            // Model file path argument
             if (strcmp(argv[i], "-m") == 0) {
                 if (i + 1 < argc) {
                     model_path = argv[++i];
@@ -70,14 +93,18 @@ int main(int argc, char ** argv) {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "-p") == 0) {
+            }
+            // Prompt argument
+            else if (strcmp(argv[i], "-p") == 0) {
                 if (i + 1 < argc) {
                     prompt = argv[++i];
                 } else {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "-n") == 0) {
+            }
+            // Number of tokens to predict argument
+            else if (strcmp(argv[i], "-n") == 0) {
                 if (i + 1 < argc) {
                     try {
                         n_predict = std::stoi(argv[++i]);
@@ -89,7 +116,9 @@ int main(int argc, char ** argv) {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "-ngl") == 0) {
+            }
+            // GPU layers argument
+            else if (strcmp(argv[i], "-ngl") == 0) {
                 if (i + 1 < argc) {
                     try {
                         ngl = std::stoi(argv[++i]);
@@ -101,38 +130,49 @@ int main(int argc, char ** argv) {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "--chat-template-file") == 0) {
+            }
+            // Chat template file argument
+            else if (strcmp(argv[i], "--chat-template-file") == 0) {
                 if (i + 1 < argc) {
                     chat_template_file = argv[++i];
                 } else {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "--grammar") == 0) {
+            }
+            // Grammar constraint argument
+            else if (strcmp(argv[i], "--grammar") == 0) {
                 if (i + 1 < argc) {
                     grammar = argv[++i];
                 } else {
                     print_usage(argc, argv);
                     return 1;
                 }
-            } else if (strcmp(argv[i], "--confirm") == 0) {
+            }
+            // Command confirmation flag
+            else if (strcmp(argv[i], "--confirm") == 0) {
                 confirm_commands = true;
-            } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
+            }
+            // Help argument
+            else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) {
                 print_usage(argc, argv);
                 return 0;
-            } else {
+            }
+            // Unknown argument
+            else {
                 fprintf(stderr, "Unknown argument: %s\n", argv[i]);
                 print_usage(argc, argv);
                 return 1;
             }
         }
-        
+
+        // Validate required arguments
         if (model_path.empty()) {
             fprintf(stderr, "Error: Model file (-m) is required\n");
             print_usage(argc, argv);
             return 1;
         }
-        
+
         if (prompt.empty()) {
             fprintf(stderr, "Error: Prompt (-p) is required\n");
             print_usage(argc, argv);
@@ -140,6 +180,10 @@ int main(int argc, char ** argv) {
         }
     }
 
+    //=========================================================================
+    // CONFIGURATION DISPLAY SECTION
+    //=========================================================================
+
     printf("Simple Function Call Example\n");
     printf("Model: %s\n", model_path.c_str());
     printf("Prompt: %s\n", prompt.c_str());
@@ -156,6 +200,10 @@ int main(int argc, char ** argv) {
     }
     printf("\n");
 
+    //=========================================================================
+    // LLAMA.CPP MODEL INITIALIZATION SECTION
+    //=========================================================================
+
     // load dynamic backends
     ggml_backend_load_all();
 
@@ -170,8 +218,13 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    // Get vocabulary from the model
     const llama_vocab * vocab = llama_model_get_vocab(model);
 
+    //=========================================================================
+    // LLAMA.CPP CONTEXT INITIALIZATION SECTION
+    //=========================================================================
+
     // initialize the context
     llama_context_params ctx_params = llama_context_default_params();
     // n_ctx is the context size
@@ -188,10 +241,15 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    //=========================================================================
+    // FUNCTION CALLING SYSTEM SETUP SECTION
+    //=========================================================================
+
     // Initialize chat templates for function calling
     common_chat_templates_ptr chat_templates = common_chat_templates_init(model, chat_template_file);
 
     // Define available functions/tools - single shell command tool
+    // This defines what functions the LLM can call and their parameters
     std::vector<common_chat_tool> tools = {
         {
             "shell_command",
@@ -209,7 +267,12 @@ int main(int argc, char ** argv) {
         }
     };
 
+    //=========================================================================
+    // CHAT MESSAGE INITIALIZATION SECTION
+    //=========================================================================
+
     // Create chat messages
+    // This sets up the initial conversation context
     std::vector<common_chat_msg> messages = {
         {
             "system",
@@ -231,6 +294,10 @@ int main(int argc, char ** argv) {
         }
     };
 
+    //=========================================================================
+    // CHAT TEMPLATE APPLICATION SECTION
+    //=========================================================================
+
     // Set up chat template inputs with tools
     common_chat_templates_inputs inputs;
     inputs.messages = messages;
@@ -242,6 +309,10 @@ int main(int argc, char ** argv) {
     // Apply chat template
     auto chat_params = common_chat_templates_apply(chat_templates.get(), inputs);
 
+    //=========================================================================
+    // PROMPT TOKENIZATION SECTION
+    //=========================================================================
+
     // Tokenize the prompt
     const int n_prompt = -llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), NULL, 0, true, true);
 
@@ -255,6 +326,10 @@ int main(int argc, char ** argv) {
     // prepare a batch for the prompt
     llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size());
 
+    //=========================================================================
+    // SAMPLER INITIALIZATION SECTION
+    //=========================================================================
+
     // initialize the sampler
     auto sparams = llama_sampler_chain_default_params();
     sparams.no_perf = false;
@@ -262,12 +337,16 @@ int main(int argc, char ** argv) {
 
     llama_sampler_chain_add(smpl, llama_sampler_init_greedy());
 
+    //=========================================================================
+    // MAIN GENERATION LOOP SECTION
+    //=========================================================================
+
     // main loop
-    const auto t_main_start = ggml_time_us();
     int n_decode = 0;
     llama_token new_token_id;
     std::string response_text;
 
+    // Main text generation loop - processes tokens one by one
     for (int n_pos = 0; n_pos + batch.n_tokens < n_prompt + n_predict; ) {
         // evaluate the current batch with the transformer model
         if (llama_decode(ctx, batch)) {
@@ -286,6 +365,7 @@ int main(int argc, char ** argv) {
                 break;
             }
 
+            // Convert token to text and display it
             char buf[128];
             int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
             if (n < 0) {
@@ -306,29 +386,37 @@ int main(int argc, char ** argv) {
 
     printf("\n\n");
 
+    //=========================================================================
+    // FUNCTION CALL PARSING SECTION
+    //=========================================================================
+
     // Parse the response to check for function calls
     common_chat_syntax syntax;
     syntax.format = chat_params.format;
     syntax.parse_tool_calls = true;
-    
+
     common_chat_msg parsed_response = common_chat_parse(response_text, false, syntax);
 
+    //=========================================================================
+    // FUNCTION CALL EXECUTION SECTION
+    //=========================================================================
+
     // Handle function calls if any
     if (!parsed_response.tool_calls.empty()) {
         printf("Function calls detected:\n");
         for (const auto& tool_call : parsed_response.tool_calls) {
             printf("  Function: %s\n", tool_call.name.c_str());
             printf("  Arguments: %s\n", tool_call.arguments.c_str());
-            
+
             // Execute the function
             if (tool_call.name == "shell_command") {
                 try {
                     // Parse JSON arguments
                     json args = json::parse(tool_call.arguments);
                     std::string command = args["command"];
-                    
+
                     printf("  Command: %s\n", command.c_str());
-                    
+
                     // Ask for confirmation if enabled
                     if (confirm_commands) {
                         printf("  Execute this command? (y/N): ");
@@ -339,88 +427,11 @@ int main(int argc, char ** argv) {
                             continue;
                         }
                     }
-                    
+
                     // Execute the command
                     std::string result = execute_shell_command(command);
                     printf("  Result:\n%s", result.c_str());
-                    
-                    // Add the result to the conversation and continue
-                    messages.push_back({
-                        "assistant",
-                        response_text,
-                        {},  // content_parts
-                        {},  // tool_calls
-                        "",  // reasoning_content
-                        "",  // tool_name
-                        ""   // tool_call_id
-                    });
-                    messages.push_back({
-                        "tool",
-                        result,
-                        {},  // content_parts
-                        {},  // tool_calls
-                        "",  // reasoning_content
-                        "",  // tool_name
-                        tool_call.id
-                    });
-                    
-                    // Continue the conversation with the result
-                    printf("\nContinuing conversation with command result...\n");
-                    
-                    // Set up new chat template inputs
-                    common_chat_templates_inputs new_inputs;
-                    new_inputs.messages = messages;
-                    new_inputs.tools = tools;
-                    new_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
-                    new_inputs.add_generation_prompt = true;
-                    new_inputs.use_jinja = true;
-                    
-                    // Apply chat template for continuation
-                    auto new_chat_params = common_chat_templates_apply(chat_templates.get(), new_inputs);
-                    
-                    // Tokenize the new prompt
-                    const int n_new_prompt = -llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), NULL, 0, true, true);
-                    std::vector<llama_token> new_prompt_tokens(n_new_prompt);
-                    if (llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), new_prompt_tokens.data(), new_prompt_tokens.size(), true, true) < 0) {
-                        fprintf(stderr, "%s: error: failed to tokenize the continuation prompt\n", __func__);
-                        return 1;
-                    }
-                    
-                    // Continue generation
-                    batch = llama_batch_get_one(new_prompt_tokens.data(), new_prompt_tokens.size());
-                    std::string continuation_text;
-                    
-                    for (int n_pos = 0; n_pos + batch.n_tokens < n_new_prompt + n_predict; ) {
-                        if (llama_decode(ctx, batch)) {
-                            fprintf(stderr, "%s : failed to eval continuation, return code %d\n", __func__, 1);
-                            return 1;
-                        }
-                        
-                        n_pos += batch.n_tokens;
-                        
-                        new_token_id = llama_sampler_sample(smpl, ctx, -1);
-                        
-                        if (llama_vocab_is_eog(vocab, new_token_id)) {
-                            break;
-                        }
-                        
-                        char buf[128];
-                        int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true);
-                        if (n < 0) {
-                            fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__);
-                            return 1;
-                        }
-                        std::string s(buf, n);
-                        continuation_text += s;
-                        printf("%s", s.c_str());
-                        fflush(stdout);
-                        
-                        batch = llama_batch_get_one(&new_token_id, 1);
-                        n_decode += 1;
-                    }
-                    
-                    printf("\n");
-                    
+
                 } catch (const std::exception& e) {
                     printf("  Error parsing arguments: %s\n", e.what());
                 }
@@ -430,16 +441,11 @@ int main(int argc, char ** argv) {
         printf("Response: %s\n", parsed_response.content.c_str());
     }
 
-    const auto t_main_end = ggml_time_us();
-
-    fprintf(stderr, "%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n",
-            __func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f));
-
-    fprintf(stderr, "\n");
-    llama_perf_sampler_print(smpl);
-    llama_perf_context_print(ctx);
-    fprintf(stderr, "\n");
+    //=========================================================================
+    // CLEANUP SECTION
+    //=========================================================================
 
+    // Clean up resources
     llama_sampler_free(smpl);
     llama_free(ctx);
     llama_model_free(model);