ggml-org · 0xs1d · Jul 11, 2025 · Jul 12, 2025 · Jul 12, 2025 · ggerganov
diff --git a/common/arg.cpp b/common/arg.cpp
@@ -3423,5 +3423,21 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
         }
     ).set_examples({LLAMA_EXAMPLE_SERVER}));
 
+    add_opt(common_arg(
+        {"--fim-falcon-7b-spec"},
+        string_format("use quantized Falcon 7B model (note: can download weights from the internet)"),
+        [](common_params & params) {
+            params.model.hf_repo = "maddes8cht/tiiuae-falcon-7b-gguf";
+            params.model.hf_file = "tiiuae-falcon-7b-Q6_K.gguf";
+            params.n_gpu_layers = 40;
+            params.flash_attn = true;
+            params.n_ubatch = 1024;
+            params.n_batch = 1024;
+            params.n_ctx = 2048;
+            params.n_cache_reuse = 256;
+        }
+    ).set_examples({LLAMA_EXAMPLE_SERVER}));
+
+
     return ctx_arg;
 }