From 9d755023acd09bcf2aef10d65c3a2673802bd7c1 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 00:58:18 +0200 Subject: [PATCH 1/8] Add the examples directory for function calling --- examples/simple-function-call/README.md | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 examples/simple-function-call/README.md diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md new file mode 100644 index 0000000000000..e69de29bb2d1d From 52767e4c3e90f1a8fdad656dca491e75a021be99 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 03:09:20 +0200 Subject: [PATCH 2/8] Added the llama-simple-function-call.cpp, README.md AND edited the CMakeLists.txts --- examples/CMakeLists.txt | 1 + examples/simple-function-call/CMakeLists.txt | 14 + .../llama-simple-function-call.cpp | 448 ++++++++++++++++++ 3 files changed, 463 insertions(+) create mode 100644 examples/simple-function-call/CMakeLists.txt create mode 100644 examples/simple-function-call/llama-simple-function-call.cpp diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 49e4d2cf8c198..c969103ce704c 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -29,6 +29,7 @@ else() add_subdirectory(save-load-state) add_subdirectory(simple) add_subdirectory(simple-chat) + add_subdirectory(simple-function-call) add_subdirectory(speculative) add_subdirectory(speculative-simple) add_subdirectory(gen-docs) diff --git a/examples/simple-function-call/CMakeLists.txt b/examples/simple-function-call/CMakeLists.txt new file mode 100644 index 0000000000000..15aa09c2a6f32 --- /dev/null +++ b/examples/simple-function-call/CMakeLists.txt @@ -0,0 +1,14 @@ +set(TARGET simple-function-call) + +include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) + +add_executable(${TARGET} llama-simple-function-call.cpp) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) + +# Add include directories +target_include_directories(${TARGET} PRIVATE ../llava) +target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) +target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor) +target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) \ No newline at end of file diff --git a/examples/simple-function-call/llama-simple-function-call.cpp b/examples/simple-function-call/llama-simple-function-call.cpp new file mode 100644 index 0000000000000..d9eecdc39fa44 --- /dev/null +++ b/examples/simple-function-call/llama-simple-function-call.cpp @@ -0,0 +1,448 @@ +#include "llama.h" +#include "chat.h" +#include "common.h" +#include "sampling.h" +#include "json.hpp" +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::json; + +// Forward declaration +std::string execute_shell_command(const std::string& command); + +static void print_usage(int argc, char ** argv) { + (void)argc; // Suppress unused parameter warning + (void)argv; // Suppress unused parameter warning + printf("\nSimple Function Call Example - Real Shell Command Execution\n"); + printf("\n"); +} + +// Real function to execute shell commands +std::string execute_shell_command(const std::string& command) { + std::array buffer; + std::string result; + + // Use popen to execute the command + std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); + if (!pipe) { + return "Error: Failed to execute command"; + } + + // Read the output + while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { + result += buffer.data(); + } + + return result; +} + +int main(int argc, char ** argv) { + // path to the model gguf file + std::string model_path; + // prompt to generate text from + std::string prompt; + // number of layers to offload to the GPU + int ngl = 99; + // number of tokens to predict + int n_predict = 256; + // chat template file + std::string chat_template_file; + // grammar constraint + std::string grammar; + // confirmation flag + bool confirm_commands = false; + + // parse command line arguments + { + int i = 1; + for (; i < argc; i++) { + if (strcmp(argv[i], "-m") == 0) { + if (i + 1 < argc) { + model_path = argv[++i]; + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "-p") == 0) { + if (i + 1 < argc) { + prompt = argv[++i]; + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "-n") == 0) { + if (i + 1 < argc) { + try { + n_predict = std::stoi(argv[++i]); + } catch (...) { + print_usage(argc, argv); + return 1; + } + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "-ngl") == 0) { + if (i + 1 < argc) { + try { + ngl = std::stoi(argv[++i]); + } catch (...) { + print_usage(argc, argv); + return 1; + } + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "--chat-template-file") == 0) { + if (i + 1 < argc) { + chat_template_file = argv[++i]; + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "--grammar") == 0) { + if (i + 1 < argc) { + grammar = argv[++i]; + } else { + print_usage(argc, argv); + return 1; + } + } else if (strcmp(argv[i], "--confirm") == 0) { + confirm_commands = true; + } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + print_usage(argc, argv); + return 0; + } else { + fprintf(stderr, "Unknown argument: %s\n", argv[i]); + print_usage(argc, argv); + return 1; + } + } + + if (model_path.empty()) { + fprintf(stderr, "Error: Model file (-m) is required\n"); + print_usage(argc, argv); + return 1; + } + + if (prompt.empty()) { + fprintf(stderr, "Error: Prompt (-p) is required\n"); + print_usage(argc, argv); + return 1; + } + } + + printf("Simple Function Call Example\n"); + printf("Model: %s\n", model_path.c_str()); + printf("Prompt: %s\n", prompt.c_str()); + printf("GPU layers: %d\n", ngl); + printf("Max tokens: %d\n", n_predict); + if (!chat_template_file.empty()) { + printf("Chat template: %s\n", chat_template_file.c_str()); + } + if (!grammar.empty()) { + printf("Grammar: %s\n", grammar.c_str()); + } + if (confirm_commands) { + printf("Command confirmation: enabled\n"); + } + printf("\n"); + + // load dynamic backends + ggml_backend_load_all(); + + // initialize the model + llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = ngl; + + llama_model * model = llama_model_load_from_file(model_path.c_str(), model_params); + + if (model == NULL) { + fprintf(stderr, "%s: error: unable to load model\n", __func__); + return 1; + } + + const llama_vocab * vocab = llama_model_get_vocab(model); + + // initialize the context + llama_context_params ctx_params = llama_context_default_params(); + // n_ctx is the context size + ctx_params.n_ctx = 2048; + // n_batch is the maximum number of tokens that can be processed in a single call to llama_decode + ctx_params.n_batch = 512; + // enable performance counters + ctx_params.no_perf = false; + + llama_context * ctx = llama_init_from_model(model, ctx_params); + + if (ctx == NULL) { + fprintf(stderr, "%s: error: failed to create the llama_context\n", __func__); + return 1; + } + + // Initialize chat templates for function calling + common_chat_templates_ptr chat_templates = common_chat_templates_init(model, chat_template_file); + + // Define available functions/tools - single shell command tool + std::vector tools = { + { + "shell_command", + "Execute a shell command and return the output", + R"({ + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + })" + } + }; + + // Create chat messages + std::vector messages = { + { + "system", + "You are a helpful assistant that can execute shell commands. When the user asks for something that requires a command, generate and execute the appropriate shell command. Be careful and only execute safe commands.", + {}, // content_parts + {}, // tool_calls + "", // reasoning_content + "", // tool_name + "" // tool_call_id + }, + { + "user", + prompt, + {}, // content_parts + {}, // tool_calls + "", // reasoning_content + "", // tool_name + "" // tool_call_id + } + }; + + // Set up chat template inputs with tools + common_chat_templates_inputs inputs; + inputs.messages = messages; + inputs.tools = tools; + inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + inputs.add_generation_prompt = true; + inputs.use_jinja = true; + + // Apply chat template + auto chat_params = common_chat_templates_apply(chat_templates.get(), inputs); + + // Tokenize the prompt + const int n_prompt = -llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), NULL, 0, true, true); + + // allocate space for the tokens and tokenize the prompt + std::vector prompt_tokens(n_prompt); + if (llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), prompt_tokens.data(), prompt_tokens.size(), true, true) < 0) { + fprintf(stderr, "%s: error: failed to tokenize the prompt\n", __func__); + return 1; + } + + // prepare a batch for the prompt + llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size()); + + // initialize the sampler + auto sparams = llama_sampler_chain_default_params(); + sparams.no_perf = false; + llama_sampler * smpl = llama_sampler_chain_init(sparams); + + llama_sampler_chain_add(smpl, llama_sampler_init_greedy()); + + // main loop + const auto t_main_start = ggml_time_us(); + int n_decode = 0; + llama_token new_token_id; + std::string response_text; + + for (int n_pos = 0; n_pos + batch.n_tokens < n_prompt + n_predict; ) { + // evaluate the current batch with the transformer model + if (llama_decode(ctx, batch)) { + fprintf(stderr, "%s : failed to eval, return code %d\n", __func__, 1); + return 1; + } + + n_pos += batch.n_tokens; + + // sample the next token + { + new_token_id = llama_sampler_sample(smpl, ctx, -1); + + // is it an end of generation? + if (llama_vocab_is_eog(vocab, new_token_id)) { + break; + } + + char buf[128]; + int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true); + if (n < 0) { + fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__); + return 1; + } + std::string s(buf, n); + response_text += s; + printf("%s", s.c_str()); + fflush(stdout); + + // prepare the next batch with the sampled token + batch = llama_batch_get_one(&new_token_id, 1); + + n_decode += 1; + } + } + + printf("\n\n"); + + // Parse the response to check for function calls + common_chat_syntax syntax; + syntax.format = chat_params.format; + syntax.parse_tool_calls = true; + + common_chat_msg parsed_response = common_chat_parse(response_text, false, syntax); + + // Handle function calls if any + if (!parsed_response.tool_calls.empty()) { + printf("Function calls detected:\n"); + for (const auto& tool_call : parsed_response.tool_calls) { + printf(" Function: %s\n", tool_call.name.c_str()); + printf(" Arguments: %s\n", tool_call.arguments.c_str()); + + // Execute the function + if (tool_call.name == "shell_command") { + try { + // Parse JSON arguments + json args = json::parse(tool_call.arguments); + std::string command = args["command"]; + + printf(" Command: %s\n", command.c_str()); + + // Ask for confirmation if enabled + if (confirm_commands) { + printf(" Execute this command? (y/N): "); + std::string response; + std::getline(std::cin, response); + if (response != "y" && response != "Y") { + printf(" Command execution cancelled.\n"); + continue; + } + } + + // Execute the command + std::string result = execute_shell_command(command); + printf(" Result:\n%s", result.c_str()); + + // Add the result to the conversation and continue + messages.push_back({ + "assistant", + response_text, + {}, // content_parts + {}, // tool_calls + "", // reasoning_content + "", // tool_name + "" // tool_call_id + }); + messages.push_back({ + "tool", + result, + {}, // content_parts + {}, // tool_calls + "", // reasoning_content + "", // tool_name + tool_call.id + }); + + // Continue the conversation with the result + printf("\nContinuing conversation with command result...\n"); + + // Set up new chat template inputs + common_chat_templates_inputs new_inputs; + new_inputs.messages = messages; + new_inputs.tools = tools; + new_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + new_inputs.add_generation_prompt = true; + new_inputs.use_jinja = true; + + // Apply chat template for continuation + auto new_chat_params = common_chat_templates_apply(chat_templates.get(), new_inputs); + + // Tokenize the new prompt + const int n_new_prompt = -llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), NULL, 0, true, true); + std::vector new_prompt_tokens(n_new_prompt); + if (llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), new_prompt_tokens.data(), new_prompt_tokens.size(), true, true) < 0) { + fprintf(stderr, "%s: error: failed to tokenize the continuation prompt\n", __func__); + return 1; + } + + // Continue generation + batch = llama_batch_get_one(new_prompt_tokens.data(), new_prompt_tokens.size()); + std::string continuation_text; + + for (int n_pos = 0; n_pos + batch.n_tokens < n_new_prompt + n_predict; ) { + if (llama_decode(ctx, batch)) { + fprintf(stderr, "%s : failed to eval continuation, return code %d\n", __func__, 1); + return 1; + } + + n_pos += batch.n_tokens; + + new_token_id = llama_sampler_sample(smpl, ctx, -1); + + if (llama_vocab_is_eog(vocab, new_token_id)) { + break; + } + + char buf[128]; + int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true); + if (n < 0) { + fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__); + return 1; + } + std::string s(buf, n); + continuation_text += s; + printf("%s", s.c_str()); + fflush(stdout); + + batch = llama_batch_get_one(&new_token_id, 1); + n_decode += 1; + } + + printf("\n"); + + } catch (const std::exception& e) { + printf(" Error parsing arguments: %s\n", e.what()); + } + } + } + } else if (!parsed_response.content.empty()) { + printf("Response: %s\n", parsed_response.content.c_str()); + } + + const auto t_main_end = ggml_time_us(); + + fprintf(stderr, "%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n", + __func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f)); + + fprintf(stderr, "\n"); + llama_perf_sampler_print(smpl); + llama_perf_context_print(ctx); + fprintf(stderr, "\n"); + + llama_sampler_free(smpl); + llama_free(ctx); + llama_model_free(model); + + return 0; +} \ No newline at end of file From 65f3cd4987921ebf06058d44fe33a281cc3187ad Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 13:00:52 +0200 Subject: [PATCH 3/8] Update CMakeLists.txt --- examples/simple-function-call/CMakeLists.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/simple-function-call/CMakeLists.txt b/examples/simple-function-call/CMakeLists.txt index 15aa09c2a6f32..4f6057fed2801 100644 --- a/examples/simple-function-call/CMakeLists.txt +++ b/examples/simple-function-call/CMakeLists.txt @@ -1,8 +1,8 @@ -set(TARGET simple-function-call) +set(TARGET llama-simple-function-call) include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) -add_executable(${TARGET} llama-simple-function-call.cpp) +add_executable(${TARGET} simple-function-call.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_17) @@ -11,4 +11,4 @@ target_compile_features(${TARGET} PRIVATE cxx_std_17) target_include_directories(${TARGET} PRIVATE ../llava) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}) target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor) -target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) \ No newline at end of file +target_include_directories(${TARGET} PRIVATE ${CMAKE_SOURCE_DIR}/vendor/nlohmann) From 0f3e60bdd583adf6eee6b132e85087293a72ef29 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 13:01:39 +0200 Subject: [PATCH 4/8] Rename llama-simple-function-call.cpp to simple-function-call.cpp --- ...{llama-simple-function-call.cpp => simple-function-call.cpp} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename examples/simple-function-call/{llama-simple-function-call.cpp => simple-function-call.cpp} (99%) diff --git a/examples/simple-function-call/llama-simple-function-call.cpp b/examples/simple-function-call/simple-function-call.cpp similarity index 99% rename from examples/simple-function-call/llama-simple-function-call.cpp rename to examples/simple-function-call/simple-function-call.cpp index d9eecdc39fa44..575be6741248c 100644 --- a/examples/simple-function-call/llama-simple-function-call.cpp +++ b/examples/simple-function-call/simple-function-call.cpp @@ -445,4 +445,4 @@ int main(int argc, char ** argv) { llama_model_free(model); return 0; -} \ No newline at end of file +} From d8bd37855752710c447ee99308f981e9409697a0 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 13:23:45 +0200 Subject: [PATCH 5/8] Update README.md --- examples/simple-function-call/README.md | 203 ++++++++++++++++++++++++ 1 file changed, 203 insertions(+) diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md index e69de29bb2d1d..7bf6151af636e 100644 --- a/examples/simple-function-call/README.md +++ b/examples/simple-function-call/README.md @@ -0,0 +1,203 @@ +# Simple Function Call Example + +A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution. + +## What This Is + +- Users input natural language requests +- The LLM generates appropriate shell commands using function calling +- Commands are executed and results returned +- The conversation continues with real command output + +## How It Works + +### Architecture +- **Standalone executable** - no server architecture needed +- **Direct function calling** within the same process +- **Real shell command execution** via `popen()` +- **JSON parsing** of LLM tool calls + +### Tool Schema +The LLM has access to a single tool: +```json +{ + "name": "shell_command", + "description": "Execute a shell command and return the output", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + } +} +``` + +### Basic Usage +```bash +./simple-function-call -m model.gguf -p "your request here" +``` + +### Command Line Arguments +- `--jinja` Is enabled by default +- `-m model.gguf` - Model file path (REQUIRED) +- `-p "prompt"` - User's request/command (REQUIRED) +- `--chat-template-file template.jinja` - Optional chat template override +- `--grammar "grammar"` - Optional grammar constraint +- `-ngl N` - Number of GPU layers +- `-n N` - Maximum number of tokens to generate +- `--confirm` - Ask for confirmation before executing commands + +### Examples + +```bash +# List files in current directory +./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution + +{"name": "shell_command", "parameters": {"command": "ls -l"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"ls -l"} + Command: ls -l + Result: +total 812 +-rw-r--r-- 1 user user 47860 Jul 13 00:09 AUTHORS +drwxr-xr-x 12 user user 4096 Jul 13 00:09 build +-rwxr-xr-x 1 user user 21760 Jul 13 00:09 build-xcframework.sh +drwxr-xr-x 2 user user 4096 Jul 13 00:09 ci +drwxr-xr-x 2 user user 4096 Jul 13 00:09 cmake +-rw-r--r-- 1 user user 7973 Jul 13 00:09 CMakeLists.txt +-rw-r--r-- 1 user user 4008 Jul 13 00:09 CMakePresets.json +-rw-r--r-- 1 user user 434 Jul 13 00:09 CODEOWNERS +drwxr-xr-x 2 user user 4096 Jul 13 00:09 common +-rw-r--r-- 1 user user 6510 Jul 13 00:09 CONTRIBUTING.md +-rwxr-xr-x 1 user user 317736 Jul 13 00:09 convert_hf_to_gguf.py +-rwxr-xr-x 1 user user 21163 Jul 13 00:09 convert_hf_to_gguf_update.py +-rwxr-xr-x 1 user user 19106 Jul 13 00:09 convert_llama_ggml_to_gguf.py +-rwxr-xr-x 1 user user 18624 Jul 13 00:09 convert_lora_to_gguf.py +drwxr-xr-x 5 user user 4096 Jul 13 00:09 docs +drwxr-xr-x 29 user user 4096 Jul 13 00:09 examples +-rw-r--r-- 1 user user 1556 Jul 13 00:09 flake.lock +-rw-r--r-- 1 user user 7465 Jul 13 00:09 flake.nix +drwxr-xr-x 5 user user 4096 Jul 13 00:09 ggml +drwxr-xr-x 5 user user 4096 Jul 13 00:09 gguf-py +drwxr-xr-x 2 user user 4096 Jul 13 00:09 grammars +drwxr-xr-x 2 user user 4096 Jul 13 00:09 include +-rw-r--r-- 1 user user 1078 Jul 13 00:09 LICENSE +drwxr-xr-x 2 user user 4096 Jul 13 00:09 licenses +drwxr-xr-x 2 user user 4096 Jul 13 00:09 llamacppos +-rw-r--r-- 1 user user 50442 Jul 13 00:09 Makefile +drwxr-xr-x 2 user user 4096 Jul 13 00:09 media +drwxr-xr-x 3 user user 4096 Jul 13 00:09 models +-rw-r--r-- 1 user user 163 Jul 13 00:09 mypy.ini +drwxr-xr-x 3 user user 4096 Jul 13 00:09 pocs +-rw-r--r-- 1 user user 124786 Jul 13 00:09 poetry.lock +drwxr-xr-x 2 user user 4096 Jul 13 00:09 prompts +-rw-r--r-- 1 user user 1336 Jul 13 00:09 pyproject.toml +-rw-r--r-- 1 user user 616 Jul 13 00:09 pyrightconfig.json +-rw-r--r-- 1 user user 29793 Jul 13 00:09 README.md +drwxr-xr-x 2 user user 4096 Jul 13 00:09 requirements +-rw-r--r-- 1 user user 551 Jul 13 00:09 requirements.txt +drwxr-xr-x 3 user user 4096 Jul 13 00:09 scripts +-rw-r--r-- 1 user user 5347 Jul 13 00:09 SECURITY.md +drwxr-xr-x 2 user user 4096 Jul 13 00:09 src +drwxr-xr-x 2 user user 4096 Jul 13 00:09 tests +drwxr-xr-x 18 user user 4096 Jul 13 00:09 tools +drwxr-xr-x 8 user user 4096 Jul 13 00:09 vendor +-rw-r--r-- 1 user user 1165 Jul 13 00:09 windows-compat-loop.cpp + +Continuing conversation with command result... +/home/user/llamacomp/llama.cpp/src/llama-context.cpp:919: GGML_ASSERT(n_tokens_all <= cparams.n_batch) failed +/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(+0x12ff6) [0x7fdfb47bdff6] +/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_print_backtrace+0x204) [0x7fdfb47be434] +/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_abort+0x130) [0x7fdfb47be5d0] +/home/user/llamacomp/llama.cpp/build/bin/libllama.so(_ZN13llama_context6decodeERK11llama_batch+0x14c3) [0x7fdfb4a16223] +/home/user/llamacomp/llama.cpp/build/bin/libllama.so(llama_decode+0xe) [0x7fdfb4a1632e] +./build/bin/simple-function-call(+0x35ba6) [0x557f2b789ba6] +/usr/lib/libc.so.6(+0x276b5) [0x7fdfb41126b5] +/usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769] +./build/bin/simple-function-call(+0x37615) [0x557f2b78b615] +Aborted (core dumped) + + +# Check system information +./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON +{ + "type": "function", + "function": { + "name": "shell_command", + "description": "Execute a shell command and return the output", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": [ + "command" + ] + } + }, + "parameters": { + "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')" + } +} + +Response: +{ + "type": "function", + "function": { + "name": "shell_command", + "description": "Execute a shell command and return the output", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": [ + "command" + ] + } + }, + "parameters": { # Pay attention here + "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')" + } # The command in tried to use was probably too complex for the actual tool +} +main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results + +# You'll have less of these types of problems but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do +./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command + +{"type": "function", "name": "shell_command", "parameters": {"command": "date"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"date"} + Command: date + Result: +Sun Jul 13 11:24:16 PM CEST 2025 + +Continuing conversation with command result... +<|python_tag|>{"type": "function", "name": "shell_command", "parameters": {"command": "date"}} +main: decoded 45 tokens in 6.26 s, speed: 7.19 t/s + +# With confirmation ( For safety ) +./simple-function-call -m llama-3.2-1b-instruct.gguf -p "delete all .tmp files" --confirm + +# Using a specific chat template for better function calling +./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \ + --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja + +# Using a model with native function calling support +./simple-function-call -m llama-3.1-8b-instruct.gguf -p "check disk usage" \ + --chat-template-file models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja +``` From a495196ebe9c399fe01755c2f3131e3bf75b6c58 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 13:27:00 +0200 Subject: [PATCH 6/8] Update README.md --- examples/simple-function-call/README.md | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md index 7bf6151af636e..e1bc66b4fbe73 100644 --- a/examples/simple-function-call/README.md +++ b/examples/simple-function-call/README.md @@ -37,10 +37,9 @@ The LLM has access to a single tool: ``` ### Basic Usage -```bash +``` ./simple-function-call -m model.gguf -p "your request here" ``` - ### Command Line Arguments - `--jinja` Is enabled by default - `-m model.gguf` - Model file path (REQUIRED) @@ -53,8 +52,9 @@ The LLM has access to a single tool: ### Examples -```bash + # List files in current directory +``` ./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution {"name": "shell_command", "parameters": {"command": "ls -l"}} @@ -122,9 +122,11 @@ Continuing conversation with command result... /usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769] ./build/bin/simple-function-call(+0x37615) [0x557f2b78b615] Aborted (core dumped) - +``` # Check system information + +``` ./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON { "type": "function", @@ -173,8 +175,9 @@ Response: } # The command in tried to use was probably too complex for the actual tool } main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results - -# You'll have less of these types of problems but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do +``` +# You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do +``` ./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command {"type": "function", "name": "shell_command", "parameters": {"command": "date"}} From 8291573bad0f397a110df20a65a820a5716e75f8 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 13:33:16 +0200 Subject: [PATCH 7/8] Update README.md --- examples/simple-function-call/README.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md index e1bc66b4fbe73..2fbbfba9c4f34 100644 --- a/examples/simple-function-call/README.md +++ b/examples/simple-function-call/README.md @@ -52,8 +52,6 @@ The LLM has access to a single tool: ### Examples - -# List files in current directory ``` ./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution @@ -127,7 +125,7 @@ Aborted (core dumped) # Check system information ``` -./simple-function-call -m llama-2-7b.gguf -p "check the current time" # might produce some JSON +./simple-function-call -m llama-3.2-1b.gguf -p "check the current time" # might produce some JSON { "type": "function", "function": { @@ -172,11 +170,12 @@ Response: }, "parameters": { # Pay attention here "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')" - } # The command in tried to use was probably too complex for the actual tool -} + } # The command it tried to use was probably too complex for the actual tool +} # A better parser or an LLM would've done a better job + main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results ``` -# You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do +### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do ``` ./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command From 102534160b565ddfb29a85ca8ba0c384a38b9617 Mon Sep 17 00:00:00 2001 From: K_log Televised - youtube Date: Mon, 14 Jul 2025 19:35:48 +0200 Subject: [PATCH 8/8] Cleaned up / improved / simplified the README and the implementation, added some comments --- examples/simple-function-call/README.md | 465 ++++++++++-------- .../simple-function-call.cpp | 218 ++++---- 2 files changed, 372 insertions(+), 311 deletions(-) diff --git a/examples/simple-function-call/README.md b/examples/simple-function-call/README.md index 2fbbfba9c4f34..46972492b915d 100644 --- a/examples/simple-function-call/README.md +++ b/examples/simple-function-call/README.md @@ -1,205 +1,260 @@ -# Simple Function Call Example - -A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution. - -## What This Is - -- Users input natural language requests -- The LLM generates appropriate shell commands using function calling -- Commands are executed and results returned -- The conversation continues with real command output - -## How It Works - -### Architecture -- **Standalone executable** - no server architecture needed -- **Direct function calling** within the same process -- **Real shell command execution** via `popen()` -- **JSON parsing** of LLM tool calls - -### Tool Schema -The LLM has access to a single tool: -```json -{ - "name": "shell_command", - "description": "Execute a shell command and return the output", - "parameters": { - "type": "object", - "properties": { - "command": { - "type": "string", - "description": "The shell command to execute" - } - }, - "required": ["command"] - } -} -``` - -### Basic Usage -``` -./simple-function-call -m model.gguf -p "your request here" -``` -### Command Line Arguments -- `--jinja` Is enabled by default -- `-m model.gguf` - Model file path (REQUIRED) -- `-p "prompt"` - User's request/command (REQUIRED) -- `--chat-template-file template.jinja` - Optional chat template override -- `--grammar "grammar"` - Optional grammar constraint -- `-ngl N` - Number of GPU layers -- `-n N` - Maximum number of tokens to generate -- `--confirm` - Ask for confirmation before executing commands - -### Examples - -``` -./simple-function-call -m llama-3.2-1b.gguf -p "list all files in this directory" # I've had success even with this small model but I think it has trouble solving complex tasks it can still generate proper JSON for execution - -{"name": "shell_command", "parameters": {"command": "ls -l"}} - -Function calls detected: - Function: shell_command - Arguments: {"command":"ls -l"} - Command: ls -l - Result: -total 812 --rw-r--r-- 1 user user 47860 Jul 13 00:09 AUTHORS -drwxr-xr-x 12 user user 4096 Jul 13 00:09 build --rwxr-xr-x 1 user user 21760 Jul 13 00:09 build-xcframework.sh -drwxr-xr-x 2 user user 4096 Jul 13 00:09 ci -drwxr-xr-x 2 user user 4096 Jul 13 00:09 cmake --rw-r--r-- 1 user user 7973 Jul 13 00:09 CMakeLists.txt --rw-r--r-- 1 user user 4008 Jul 13 00:09 CMakePresets.json --rw-r--r-- 1 user user 434 Jul 13 00:09 CODEOWNERS -drwxr-xr-x 2 user user 4096 Jul 13 00:09 common --rw-r--r-- 1 user user 6510 Jul 13 00:09 CONTRIBUTING.md --rwxr-xr-x 1 user user 317736 Jul 13 00:09 convert_hf_to_gguf.py --rwxr-xr-x 1 user user 21163 Jul 13 00:09 convert_hf_to_gguf_update.py --rwxr-xr-x 1 user user 19106 Jul 13 00:09 convert_llama_ggml_to_gguf.py --rwxr-xr-x 1 user user 18624 Jul 13 00:09 convert_lora_to_gguf.py -drwxr-xr-x 5 user user 4096 Jul 13 00:09 docs -drwxr-xr-x 29 user user 4096 Jul 13 00:09 examples --rw-r--r-- 1 user user 1556 Jul 13 00:09 flake.lock --rw-r--r-- 1 user user 7465 Jul 13 00:09 flake.nix -drwxr-xr-x 5 user user 4096 Jul 13 00:09 ggml -drwxr-xr-x 5 user user 4096 Jul 13 00:09 gguf-py -drwxr-xr-x 2 user user 4096 Jul 13 00:09 grammars -drwxr-xr-x 2 user user 4096 Jul 13 00:09 include --rw-r--r-- 1 user user 1078 Jul 13 00:09 LICENSE -drwxr-xr-x 2 user user 4096 Jul 13 00:09 licenses -drwxr-xr-x 2 user user 4096 Jul 13 00:09 llamacppos --rw-r--r-- 1 user user 50442 Jul 13 00:09 Makefile -drwxr-xr-x 2 user user 4096 Jul 13 00:09 media -drwxr-xr-x 3 user user 4096 Jul 13 00:09 models --rw-r--r-- 1 user user 163 Jul 13 00:09 mypy.ini -drwxr-xr-x 3 user user 4096 Jul 13 00:09 pocs --rw-r--r-- 1 user user 124786 Jul 13 00:09 poetry.lock -drwxr-xr-x 2 user user 4096 Jul 13 00:09 prompts --rw-r--r-- 1 user user 1336 Jul 13 00:09 pyproject.toml --rw-r--r-- 1 user user 616 Jul 13 00:09 pyrightconfig.json --rw-r--r-- 1 user user 29793 Jul 13 00:09 README.md -drwxr-xr-x 2 user user 4096 Jul 13 00:09 requirements --rw-r--r-- 1 user user 551 Jul 13 00:09 requirements.txt -drwxr-xr-x 3 user user 4096 Jul 13 00:09 scripts --rw-r--r-- 1 user user 5347 Jul 13 00:09 SECURITY.md -drwxr-xr-x 2 user user 4096 Jul 13 00:09 src -drwxr-xr-x 2 user user 4096 Jul 13 00:09 tests -drwxr-xr-x 18 user user 4096 Jul 13 00:09 tools -drwxr-xr-x 8 user user 4096 Jul 13 00:09 vendor --rw-r--r-- 1 user user 1165 Jul 13 00:09 windows-compat-loop.cpp - -Continuing conversation with command result... -/home/user/llamacomp/llama.cpp/src/llama-context.cpp:919: GGML_ASSERT(n_tokens_all <= cparams.n_batch) failed -/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(+0x12ff6) [0x7fdfb47bdff6] -/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_print_backtrace+0x204) [0x7fdfb47be434] -/home/user/llamacomp/llama.cpp/build/bin/libggml-base.so(ggml_abort+0x130) [0x7fdfb47be5d0] -/home/user/llamacomp/llama.cpp/build/bin/libllama.so(_ZN13llama_context6decodeERK11llama_batch+0x14c3) [0x7fdfb4a16223] -/home/user/llamacomp/llama.cpp/build/bin/libllama.so(llama_decode+0xe) [0x7fdfb4a1632e] -./build/bin/simple-function-call(+0x35ba6) [0x557f2b789ba6] -/usr/lib/libc.so.6(+0x276b5) [0x7fdfb41126b5] -/usr/lib/libc.so.6(__libc_start_main+0x89) [0x7fdfb4112769] -./build/bin/simple-function-call(+0x37615) [0x557f2b78b615] -Aborted (core dumped) -``` - -# Check system information - -``` -./simple-function-call -m llama-3.2-1b.gguf -p "check the current time" # might produce some JSON -{ - "type": "function", - "function": { - "name": "shell_command", - "description": "Execute a shell command and return the output", - "parameters": { - "type": "object", - "properties": { - "command": { - "type": "string", - "description": "The shell command to execute" - } - }, - "required": [ - "command" - ] - } - }, - "parameters": { - "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')" - } -} - -Response: -{ - "type": "function", - "function": { - "name": "shell_command", - "description": "Execute a shell command and return the output", - "parameters": { - "type": "object", - "properties": { - "command": { - "type": "string", - "description": "The shell command to execute" - } - }, - "required": [ - "command" - ] - } - }, - "parameters": { # Pay attention here - "command": "date && echo Current date and time: $(date +'%Y-%m-%d %H:%M:%S')" - } # The command it tried to use was probably too complex for the actual tool -} # A better parser or an LLM would've done a better job - -main: decoded 134 tokens in 5.85 s, speed: 22.89 t/s # A better parser bigger model should produce better results -``` -### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do -``` -./simple-function-call -m llama-3.2-1b-instruct.gguf -p "Check the current date" # And it will run the date command - -{"type": "function", "name": "shell_command", "parameters": {"command": "date"}} - -Function calls detected: - Function: shell_command - Arguments: {"command":"date"} - Command: date - Result: -Sun Jul 13 11:24:16 PM CEST 2025 - -Continuing conversation with command result... -<|python_tag|>{"type": "function", "name": "shell_command", "parameters": {"command": "date"}} -main: decoded 45 tokens in 6.26 s, speed: 7.19 t/s - -# With confirmation ( For safety ) -./simple-function-call -m llama-3.2-1b-instruct.gguf -p "delete all .tmp files" --confirm - -# Using a specific chat template for better function calling -./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \ - --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja - -# Using a model with native function calling support -./simple-function-call -m llama-3.1-8b-instruct.gguf -p "check disk usage" \ - --chat-template-file models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja -``` +# Simple Function Call Example + +A standalone executable that demonstrates function calling ***from scratch*** with llama.cpp, allowing natural language to shell command execution. + +## What This Is + +- Users input natural language requests +- The LLM generates appropriate shell commands using function calling +- Commands are executed and results returned +- The conversation continues with real command output + +## How It Works + +### Architecture +- **Standalone executable** - no server architecture needed +- **Direct function calling** within the same process +- **Real shell command execution** via `popen()` +- **JSON parsing** of LLM tool calls + +##### Tool Schema +The LLM has access to a single tool: +```json +{ + "name": "shell_command", + "description": "Execute a shell command and return the output", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The shell command to execute" + } + }, + "required": ["command"] + } +} +``` + +### Basic Usage +``` +./simple-function-call -m model.gguf -p "your request here" +``` +### Command Line Arguments +- `--jinja` Is enabled by default +- `-m model.gguf` - Model file path (REQUIRED) +- `-p "prompt"` - User's request/command (REQUIRED) +- `--chat-template-file template.jinja` - Optional chat template override +- `--grammar "grammar"` - Optional grammar constraint +- `-ngl N` - Number of GPU layers +- `-n N` - Maximum number of tokens to generate +- `--confirm` - Ask for confirmation before executing commands + +### Examples + +``` +./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Make a directory llamacppchaos" +Simple Function Call Example +Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf +Prompt: Make a directory llamacppchaos +GPU layers: 99 +Max tokens: 256 + +llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest)) +... +... +... +llama_context: CPU compute buffer size = 254.50 MiB +llama_context: graph nodes = 582 +llama_context: graph splits = 1 +{"type": "function", "name": "shell_command", "parameters": {"command": "mkdir llamacppchaos"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"mkdir llamacppchaos"} + Command: mkdir llamacppchaos + Result: + +``` + +``` +./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "List all the files in the current directory" +Simple Function Call Example +Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf +Prompt: List all the files in the current directory +GPU layers: 99 +Max tokens: 256 + +llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest)) +... +... +... +llama_context: CPU compute buffer size = 254.50 MiB +llama_context: graph nodes = 582 +llama_context: graph splits = 1 + +{"name": "shell_command", "parameters": {"command": "ls -l"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"ls -l"} + Command: ls -l + Result: +total 840 +-rw-r--r-- 1 user user 47860 Jul 14 19:12 AUTHORS +drwxr-xr-x 12 user user 4096 Jul 14 19:13 build +-rwxr-xr-x 1 user user 21760 Jul 14 19:12 build-xcframework.sh +drwxr-xr-x 2 user user 4096 Jul 14 19:12 ci +drwxr-xr-x 2 user user 4096 Jul 14 19:12 cmake +-rw-r--r-- 1 user user 7973 Jul 14 19:12 CMakeLists.txt +-rw-r--r-- 1 user user 4570 Jul 14 19:12 CMakePresets.json +-rw-r--r-- 1 user user 434 Jul 14 19:12 CODEOWNERS +drwxr-xr-x 2 user user 4096 Jul 14 19:12 common +-rw-r--r-- 1 user user 6510 Jul 14 19:12 CONTRIBUTING.md +-rwxr-xr-x 1 user user 344837 Jul 14 19:12 convert_hf_to_gguf.py +-rwxr-xr-x 1 user user 22622 Jul 14 19:12 convert_hf_to_gguf_update.py +-rwxr-xr-x 1 user user 19106 Jul 14 19:12 convert_llama_ggml_to_gguf.py +-rwxr-xr-x 1 user user 18624 Jul 14 19:12 convert_lora_to_gguf.py +drwxr-xr-x 6 user user 4096 Jul 14 19:12 docs +drwxr-xr-x 29 user user 4096 Jul 14 19:12 examples +-rw-r--r-- 1 user user 1556 Jul 14 19:12 flake.lock +-rw-r--r-- 1 user user 7465 Jul 14 19:12 flake.nix +drwxr-xr-x 5 user user 4096 Jul 14 19:12 ggml +drwxr-xr-x 5 user user 4096 Jul 14 19:12 gguf-py +drwxr-xr-x 2 user user 4096 Jul 14 19:12 grammars +drwxr-xr-x 2 user user 4096 Jul 14 19:12 include +-rw-r--r-- 1 user user 1078 Jul 14 19:12 LICENSE +drwxr-xr-x 2 user user 4096 Jul 14 19:12 licenses +drwxr-xr-x 2 user user 4096 Jul 14 19:15 llamacppchaos +-rw-r--r-- 1 user user 50442 Jul 14 19:12 Makefile +drwxr-xr-x 2 user user 4096 Jul 14 19:12 media +drwxr-xr-x 3 user user 4096 Jul 14 19:12 models +-rw-r--r-- 1 user user 163 Jul 14 19:12 mypy.ini +drwxr-xr-x 3 user user 4096 Jul 14 19:12 pocs +-rw-r--r-- 1 user user 124786 Jul 14 19:12 poetry.lock +drwxr-xr-x 2 user user 4096 Jul 14 19:12 prompts +-rw-r--r-- 1 user user 1336 Jul 14 19:12 pyproject.toml +-rw-r--r-- 1 user user 616 Jul 14 19:12 pyrightconfig.json +-rw-r--r-- 1 user user 29598 Jul 14 19:12 README.md +drwxr-xr-x 2 user user 4096 Jul 14 19:12 requirements +-rw-r--r-- 1 user user 551 Jul 14 19:12 requirements.txt +drwxr-xr-x 3 user user 4096 Jul 14 19:12 scripts +-rw-r--r-- 1 user user 5347 Jul 14 19:12 SECURITY.md +drwxr-xr-x 2 user user 4096 Jul 14 19:12 src +drwxr-xr-x 2 user user 4096 Jul 14 19:12 tests +drwxr-xr-x 17 user user 4096 Jul 14 19:12 tools +drwxr-xr-x 7 user user 4096 Jul 14 19:12 vendor +``` + +### Where it might fail + +``` + ./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Check the time" +Simple Function Call Example +Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf +Prompt: Check the time +GPU layers: 99 +Max tokens: 256 + +llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest)) + +... +... +... +llama_context: CPU compute buffer size = 254.50 MiB +llama_context: graph nodes = 582 +llama_context: graph splits = 1 +{ + "type": "function", + "function": { + "name": "shell_command", + "description": "Check the time", + "parameters": { + "type": "string", + "description": "The shell command to execute" + } + }, + "result": { + "type": "object", + "value": { + "time": "Current time" + } + } +} + +Response: { + "type": "function", + "function": { + "name": "shell_command", + "description": "Check the time", + "parameters": { + "type": "string", + "description": "The shell command to execute" + } + }, + "result": { + "type": "object", + "value": { + "time": "Current time" + } + } +} +``` +### You'll have less of these types of problems with bigger models but you can't still nudge the smaller ones, if you have some knowledge you can always tell it to run commands directly but it defeats the purpose of it a bit, it would be good with some Speech to Text system where you wouldn't have to type but just say what you want the computer to do +``` +./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Check the current date" +Simple Function Call Example +Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf +Prompt: Check the current date +GPU layers: 99 +Max tokens: 256 + +llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest)) +... +... +... +llama_context: CPU compute buffer size = 254.50 MiB +llama_context: graph nodes = 582 +llama_context: graph splits = 1 +{"type": "function", "name": "shell_command", "parameters": {"command": "date"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"date"} + Command: date + Result: +Mon Jul 14 07:26:14 PM CEST 2025 +``` +# With confirmation ( For safety ) +``` +./build/bin/llama-simple-function-call -m ~/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf -p "Remove directory llamacppchaos" --confirm +Simple Function Call Example +Model: /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf +Prompt: Remove directory llamacppchaos +GPU layers: 99 +Max tokens: 256 +Command confirmation: enabled + +llama_model_loader: loaded meta data with 36 key-value pairs and 147 tensors from /home/user/Downloads/Llama-3.2-1B-Instruct-Q6_K.gguf (version GGUF V3 (latest)) +... +... +... +llama_context: CPU compute buffer size = 254.50 MiB +llama_context: graph nodes = 582 +llama_context: graph splits = 1 +{"type": "function", "name": "shell_command", "parameters": {"command": "rm -rf llamacppchaos"}} + +Function calls detected: + Function: shell_command + Arguments: {"command":"rm -rf llamacppchaos"} + Command: rm -rf llamacppchaos + Execute this command? (y/N): y + Result: + +``` + +### Using a specific chat template for better function calling +``` +./simple-function-call -m qwen2.5-7b-instruct.gguf -p "list files in current directory" \ + --chat-template-file models/templates/Qwen-Qwen2.5-7B-Instruct.jinja +``` diff --git a/examples/simple-function-call/simple-function-call.cpp b/examples/simple-function-call/simple-function-call.cpp index 575be6741248c..090abf41579bf 100644 --- a/examples/simple-function-call/simple-function-call.cpp +++ b/examples/simple-function-call/simple-function-call.cpp @@ -17,6 +17,10 @@ using json = nlohmann::json; // Forward declaration std::string execute_shell_command(const std::string& command); +//============================================================================= +// HELP/USAGE SECTION +//============================================================================= + static void print_usage(int argc, char ** argv) { (void)argc; // Suppress unused parameter warning (void)argv; // Suppress unused parameter warning @@ -24,26 +28,40 @@ static void print_usage(int argc, char ** argv) { printf("\n"); } +//============================================================================= +// SHELL COMMAND EXECUTION SECTION +//============================================================================= + // Real function to execute shell commands +// Uses popen() to run system commands and capture their output +// Returns the command output as a string std::string execute_shell_command(const std::string& command) { std::array buffer; std::string result; - + // Use popen to execute the command std::unique_ptr pipe(popen(command.c_str(), "r"), pclose); if (!pipe) { return "Error: Failed to execute command"; } - + // Read the output while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) { result += buffer.data(); } - + return result; } +//============================================================================= +// MAIN PROGRAM ENTRY POINT +//============================================================================= + int main(int argc, char ** argv) { + //========================================================================= + // CONFIGURATION VARIABLES SECTION + //========================================================================= + // path to the model gguf file std::string model_path; // prompt to generate text from @@ -59,10 +77,15 @@ int main(int argc, char ** argv) { // confirmation flag bool confirm_commands = false; + //========================================================================= + // COMMAND LINE ARGUMENT PARSING SECTION + //========================================================================= + // parse command line arguments { int i = 1; for (; i < argc; i++) { + // Model file path argument if (strcmp(argv[i], "-m") == 0) { if (i + 1 < argc) { model_path = argv[++i]; @@ -70,14 +93,18 @@ int main(int argc, char ** argv) { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "-p") == 0) { + } + // Prompt argument + else if (strcmp(argv[i], "-p") == 0) { if (i + 1 < argc) { prompt = argv[++i]; } else { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "-n") == 0) { + } + // Number of tokens to predict argument + else if (strcmp(argv[i], "-n") == 0) { if (i + 1 < argc) { try { n_predict = std::stoi(argv[++i]); @@ -89,7 +116,9 @@ int main(int argc, char ** argv) { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "-ngl") == 0) { + } + // GPU layers argument + else if (strcmp(argv[i], "-ngl") == 0) { if (i + 1 < argc) { try { ngl = std::stoi(argv[++i]); @@ -101,38 +130,49 @@ int main(int argc, char ** argv) { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "--chat-template-file") == 0) { + } + // Chat template file argument + else if (strcmp(argv[i], "--chat-template-file") == 0) { if (i + 1 < argc) { chat_template_file = argv[++i]; } else { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "--grammar") == 0) { + } + // Grammar constraint argument + else if (strcmp(argv[i], "--grammar") == 0) { if (i + 1 < argc) { grammar = argv[++i]; } else { print_usage(argc, argv); return 1; } - } else if (strcmp(argv[i], "--confirm") == 0) { + } + // Command confirmation flag + else if (strcmp(argv[i], "--confirm") == 0) { confirm_commands = true; - } else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { + } + // Help argument + else if (strcmp(argv[i], "-h") == 0 || strcmp(argv[i], "--help") == 0) { print_usage(argc, argv); return 0; - } else { + } + // Unknown argument + else { fprintf(stderr, "Unknown argument: %s\n", argv[i]); print_usage(argc, argv); return 1; } } - + + // Validate required arguments if (model_path.empty()) { fprintf(stderr, "Error: Model file (-m) is required\n"); print_usage(argc, argv); return 1; } - + if (prompt.empty()) { fprintf(stderr, "Error: Prompt (-p) is required\n"); print_usage(argc, argv); @@ -140,6 +180,10 @@ int main(int argc, char ** argv) { } } + //========================================================================= + // CONFIGURATION DISPLAY SECTION + //========================================================================= + printf("Simple Function Call Example\n"); printf("Model: %s\n", model_path.c_str()); printf("Prompt: %s\n", prompt.c_str()); @@ -156,6 +200,10 @@ int main(int argc, char ** argv) { } printf("\n"); + //========================================================================= + // LLAMA.CPP MODEL INITIALIZATION SECTION + //========================================================================= + // load dynamic backends ggml_backend_load_all(); @@ -170,8 +218,13 @@ int main(int argc, char ** argv) { return 1; } + // Get vocabulary from the model const llama_vocab * vocab = llama_model_get_vocab(model); + //========================================================================= + // LLAMA.CPP CONTEXT INITIALIZATION SECTION + //========================================================================= + // initialize the context llama_context_params ctx_params = llama_context_default_params(); // n_ctx is the context size @@ -188,10 +241,15 @@ int main(int argc, char ** argv) { return 1; } + //========================================================================= + // FUNCTION CALLING SYSTEM SETUP SECTION + //========================================================================= + // Initialize chat templates for function calling common_chat_templates_ptr chat_templates = common_chat_templates_init(model, chat_template_file); // Define available functions/tools - single shell command tool + // This defines what functions the LLM can call and their parameters std::vector tools = { { "shell_command", @@ -209,7 +267,12 @@ int main(int argc, char ** argv) { } }; + //========================================================================= + // CHAT MESSAGE INITIALIZATION SECTION + //========================================================================= + // Create chat messages + // This sets up the initial conversation context std::vector messages = { { "system", @@ -231,6 +294,10 @@ int main(int argc, char ** argv) { } }; + //========================================================================= + // CHAT TEMPLATE APPLICATION SECTION + //========================================================================= + // Set up chat template inputs with tools common_chat_templates_inputs inputs; inputs.messages = messages; @@ -242,6 +309,10 @@ int main(int argc, char ** argv) { // Apply chat template auto chat_params = common_chat_templates_apply(chat_templates.get(), inputs); + //========================================================================= + // PROMPT TOKENIZATION SECTION + //========================================================================= + // Tokenize the prompt const int n_prompt = -llama_tokenize(vocab, chat_params.prompt.c_str(), chat_params.prompt.size(), NULL, 0, true, true); @@ -255,6 +326,10 @@ int main(int argc, char ** argv) { // prepare a batch for the prompt llama_batch batch = llama_batch_get_one(prompt_tokens.data(), prompt_tokens.size()); + //========================================================================= + // SAMPLER INITIALIZATION SECTION + //========================================================================= + // initialize the sampler auto sparams = llama_sampler_chain_default_params(); sparams.no_perf = false; @@ -262,12 +337,16 @@ int main(int argc, char ** argv) { llama_sampler_chain_add(smpl, llama_sampler_init_greedy()); + //========================================================================= + // MAIN GENERATION LOOP SECTION + //========================================================================= + // main loop - const auto t_main_start = ggml_time_us(); int n_decode = 0; llama_token new_token_id; std::string response_text; + // Main text generation loop - processes tokens one by one for (int n_pos = 0; n_pos + batch.n_tokens < n_prompt + n_predict; ) { // evaluate the current batch with the transformer model if (llama_decode(ctx, batch)) { @@ -286,6 +365,7 @@ int main(int argc, char ** argv) { break; } + // Convert token to text and display it char buf[128]; int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true); if (n < 0) { @@ -306,29 +386,37 @@ int main(int argc, char ** argv) { printf("\n\n"); + //========================================================================= + // FUNCTION CALL PARSING SECTION + //========================================================================= + // Parse the response to check for function calls common_chat_syntax syntax; syntax.format = chat_params.format; syntax.parse_tool_calls = true; - + common_chat_msg parsed_response = common_chat_parse(response_text, false, syntax); + //========================================================================= + // FUNCTION CALL EXECUTION SECTION + //========================================================================= + // Handle function calls if any if (!parsed_response.tool_calls.empty()) { printf("Function calls detected:\n"); for (const auto& tool_call : parsed_response.tool_calls) { printf(" Function: %s\n", tool_call.name.c_str()); printf(" Arguments: %s\n", tool_call.arguments.c_str()); - + // Execute the function if (tool_call.name == "shell_command") { try { // Parse JSON arguments json args = json::parse(tool_call.arguments); std::string command = args["command"]; - + printf(" Command: %s\n", command.c_str()); - + // Ask for confirmation if enabled if (confirm_commands) { printf(" Execute this command? (y/N): "); @@ -339,88 +427,11 @@ int main(int argc, char ** argv) { continue; } } - + // Execute the command std::string result = execute_shell_command(command); printf(" Result:\n%s", result.c_str()); - - // Add the result to the conversation and continue - messages.push_back({ - "assistant", - response_text, - {}, // content_parts - {}, // tool_calls - "", // reasoning_content - "", // tool_name - "" // tool_call_id - }); - messages.push_back({ - "tool", - result, - {}, // content_parts - {}, // tool_calls - "", // reasoning_content - "", // tool_name - tool_call.id - }); - - // Continue the conversation with the result - printf("\nContinuing conversation with command result...\n"); - - // Set up new chat template inputs - common_chat_templates_inputs new_inputs; - new_inputs.messages = messages; - new_inputs.tools = tools; - new_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; - new_inputs.add_generation_prompt = true; - new_inputs.use_jinja = true; - - // Apply chat template for continuation - auto new_chat_params = common_chat_templates_apply(chat_templates.get(), new_inputs); - - // Tokenize the new prompt - const int n_new_prompt = -llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), NULL, 0, true, true); - std::vector new_prompt_tokens(n_new_prompt); - if (llama_tokenize(vocab, new_chat_params.prompt.c_str(), new_chat_params.prompt.size(), new_prompt_tokens.data(), new_prompt_tokens.size(), true, true) < 0) { - fprintf(stderr, "%s: error: failed to tokenize the continuation prompt\n", __func__); - return 1; - } - - // Continue generation - batch = llama_batch_get_one(new_prompt_tokens.data(), new_prompt_tokens.size()); - std::string continuation_text; - - for (int n_pos = 0; n_pos + batch.n_tokens < n_new_prompt + n_predict; ) { - if (llama_decode(ctx, batch)) { - fprintf(stderr, "%s : failed to eval continuation, return code %d\n", __func__, 1); - return 1; - } - - n_pos += batch.n_tokens; - - new_token_id = llama_sampler_sample(smpl, ctx, -1); - - if (llama_vocab_is_eog(vocab, new_token_id)) { - break; - } - - char buf[128]; - int n = llama_token_to_piece(vocab, new_token_id, buf, sizeof(buf), 0, true); - if (n < 0) { - fprintf(stderr, "%s: error: failed to convert token to piece\n", __func__); - return 1; - } - std::string s(buf, n); - continuation_text += s; - printf("%s", s.c_str()); - fflush(stdout); - - batch = llama_batch_get_one(&new_token_id, 1); - n_decode += 1; - } - - printf("\n"); - + } catch (const std::exception& e) { printf(" Error parsing arguments: %s\n", e.what()); } @@ -430,16 +441,11 @@ int main(int argc, char ** argv) { printf("Response: %s\n", parsed_response.content.c_str()); } - const auto t_main_end = ggml_time_us(); - - fprintf(stderr, "%s: decoded %d tokens in %.2f s, speed: %.2f t/s\n", - __func__, n_decode, (t_main_end - t_main_start) / 1000000.0f, n_decode / ((t_main_end - t_main_start) / 1000000.0f)); - - fprintf(stderr, "\n"); - llama_perf_sampler_print(smpl); - llama_perf_context_print(ctx); - fprintf(stderr, "\n"); + //========================================================================= + // CLEANUP SECTION + //========================================================================= + // Clean up resources llama_sampler_free(smpl); llama_free(ctx); llama_model_free(model);