Skip to content

Commit c2454e5

Browse files
authored
Merge pull request #13 from esolithe/concedo_experimental
Concedo experimental
2 parents 9c3bce1 + dec3cd9 commit c2454e5

File tree

96 files changed

+4532
-1404
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

96 files changed

+4532
-1404
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -438,6 +438,7 @@ target_include_directories(ggml PUBLIC . ./ggml/include ./ggml/src ./ggml/src/gg
438438
target_compile_features(ggml PUBLIC c_std_11) # don't bump
439439
target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
440440
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
441+
target_compile_options(ggml PRIVATE $<$<COMPILE_LANGUAGE:CUDA>:-use_fast_math -extended-lambda>)
441442

442443
add_library(ggml_v1
443444
otherarch/ggml_v1.c

Makefile

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ ifdef LLAMA_CUBLAS
183183
CUBLAS_OBJS += $(patsubst %.cu,%.o,$(filter-out ggml/src/ggml-cuda/ggml-cuda.cu, $(wildcard ggml/src/ggml-cuda/*.cu)))
184184
CUBLAS_OBJS += $(OBJS_CUDA_TEMP_INST)
185185
NVCC = nvcc
186-
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math
186+
NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math -extended-lambda
187187

188188
ifdef LLAMA_ADD_CONDA_PATHS
189189
CUBLASLD_FLAGS += -Lconda/envs/linux/lib -Lconda/envs/linux/lib/stubs
@@ -671,7 +671,7 @@ gpttype_adapter_vulkan_noavx2.o: $(GPTTYPE_ADAPTER)
671671
$(CXX) $(CXXFLAGS) $(FAILSAFE_FLAGS) $(VULKAN_FLAGS) -c $< -o $@
672672

673673
clean:
674-
rm -vf *.o main ttsmain sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen vulkan-shaders-gen-noext gguf-split gguf-split.exe vulkan-shaders-gen.exe vulkan-shaders-gen-noext.exe main.exe ttsmain.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so ggml/src/ggml-vulkan-shaders.cpp ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders-noext.cpp ggml/src/ggml-vulkan-shaders-noext.hpp
674+
rm -vf *.o main ttsmain sdmain whispermain quantize_gguf quantize_clip quantize_gpt2 quantize_gptj quantize_neox quantize_mpt vulkan-shaders-gen vulkan-shaders-gen-noext gguf-split mtmd-cli mainvk mainvk.exe mtmd-cli.exe gguf-split.exe vulkan-shaders-gen.exe vulkan-shaders-gen-noext.exe main.exe ttsmain.exe sdmain.exe whispermain.exe quantize_clip.exe quantize_gguf.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_clblast_failsafe.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_vulkan_noavx2.dll koboldcpp_default.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_clblast_failsafe.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so koboldcpp_vulkan_noavx2.so ggml/src/ggml-vulkan-shaders.cpp ggml/src/ggml-vulkan-shaders.hpp ggml/src/ggml-vulkan-shaders-noext.cpp ggml/src/ggml-vulkan-shaders-noext.hpp
675675
rm -vrf ggml/src/ggml-cuda/*.o
676676
rm -vrf ggml/src/ggml-cuda/template-instances/*.o
677677

@@ -688,6 +688,8 @@ gguf-split: tools/gguf-split/gguf-split.cpp ggml.o ggml-cpu.o ggml-ops.o ggml-ve
688688
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
689689
mtmd-cli: tools/mtmd/mtmd-cli.cpp tools/mtmd/mtmd.cpp common/arg.cpp build-info.h ggml.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_default.o llava.o ggml-backend_default.o ggml-backend-reg_default.o $(OBJS_FULL) $(OBJS)
690690
$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)
691+
mainvk: tools/main/main.cpp common/arg.cpp build-info.h ggml_v4_vulkan.o ggml-cpu.o ggml-ops.o ggml-vec.o ggml-binops.o ggml-unops.o llama.o console.o llavaclip_vulkan.o llava.o ggml-backend_vulkan.o ggml-backend-reg_vulkan.o ggml-vulkan.o $(OBJS_FULL) $(OBJS) lib/vulkan-1.lib
692+
$(CXX) $(CXXFLAGS) -DGGML_USE_VULKAN -DSD_USE_VULKAN $(filter-out %.h,$^) -o $@ $(LDFLAGS)
691693

692694
ggml/src/ggml-vulkan-shaders.cpp:
693695
ifdef VULKAN_BUILD

common/arg.cpp

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ using json = nlohmann::ordered_json;
4141

4242
std::initializer_list<enum llama_example> mmproj_examples = {
4343
LLAMA_EXAMPLE_LLAVA,
44-
// TODO: add LLAMA_EXAMPLE_SERVER when it's ready
44+
LLAMA_EXAMPLE_SERVER,
4545
};
4646

4747
static std::string read_file(const std::string & fname) {
@@ -2205,32 +2205,33 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
22052205
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING"));
22062206
add_opt(common_arg(
22072207
{"--mmproj"}, "FILE",
2208-
"path to a multimodal projector file. see tools/mtmd/README.md",
2208+
"path to a multimodal projector file. see tools/mtmd/README.md\n"
2209+
"note: if -hf is used, this argument can be omitted",
22092210
[](common_params & params, const std::string & value) {
22102211
params.mmproj.path = value;
22112212
}
2212-
).set_examples(mmproj_examples));
2213+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ"));
22132214
add_opt(common_arg(
22142215
{"--mmproj-url"}, "URL",
22152216
"URL to a multimodal projector file. see tools/mtmd/README.md",
22162217
[](common_params & params, const std::string & value) {
22172218
params.mmproj.url = value;
22182219
}
2219-
).set_examples(mmproj_examples));
2220+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_MMPROJ_URL"));
22202221
add_opt(common_arg(
22212222
{"--no-mmproj"},
22222223
"explicitly disable multimodal projector, useful when using -hf",
22232224
[](common_params & params) {
22242225
params.no_mmproj = true;
22252226
}
2226-
).set_examples(mmproj_examples));
2227+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ"));
22272228
add_opt(common_arg(
22282229
{"--no-mmproj-offload"},
22292230
"do not offload multimodal projector to GPU",
22302231
[](common_params & params) {
22312232
params.mmproj_use_gpu = false;
22322233
}
2233-
).set_examples(mmproj_examples));
2234+
).set_examples(mmproj_examples).set_env("LLAMA_ARG_NO_MMPROJ_OFFLOAD"));
22342235
add_opt(common_arg(
22352236
{"--image"}, "FILE",
22362237
"path to an image file. use with multimodal models. Specify multiple times for batching",
@@ -2437,6 +2438,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
24372438
}
24382439
}
24392440
));
2441+
add_opt(common_arg(
2442+
{"--no-op-offload"},
2443+
string_format("disable offloading host tensor operations to device (default: %s)", params.no_op_offload ? "true" : "false"),
2444+
[](common_params & params) {
2445+
params.no_op_offload = true;
2446+
}
2447+
));
24402448
add_opt(common_arg(
24412449
{"--lora"}, "FNAME",
24422450
"path to LoRA adapter (can be repeated to use multiple adapters)",

common/common.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include "json-schema-to-grammar.cpp"
1616
#include "llama.h"
1717
#include "chat.cpp"
18+
#include "ggml/src/ggml-opt.cpp" //dear god pls
1819

1920
#include <algorithm>
2021
#include <cinttypes>
@@ -1120,6 +1121,7 @@ struct llama_context_params common_context_params_to_llama(const common_params &
11201121
cparams.offload_kqv = !params.no_kv_offload;
11211122
cparams.flash_attn = params.flash_attn;
11221123
cparams.no_perf = params.no_perf;
1124+
cparams.op_offload = !params.no_op_offload;
11231125

11241126
if (params.reranking) {
11251127
cparams.embeddings = true;
@@ -1571,3 +1573,20 @@ common_control_vector_data common_control_vector_load(const std::vector<common_c
15711573

15721574
return result;
15731575
}
1576+
1577+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride) {
1578+
const int64_t ne_datapoint = llama_n_ctx(ctx);
1579+
const int64_t ndata = (tokens.size() - ne_datapoint - 1) / stride;
1580+
ggml_opt_dataset_t result = ggml_opt_dataset_init(
1581+
GGML_TYPE_I32, GGML_TYPE_I32, ne_datapoint, ne_datapoint, ndata, /*ndata_shard =*/ 1);
1582+
1583+
llama_token * data = (llama_token *) ggml_opt_dataset_data(result)->data;
1584+
llama_token * labels = (llama_token *) ggml_opt_dataset_labels(result)->data;
1585+
1586+
for (int64_t idata = 0; idata < ndata; ++idata) {
1587+
memcpy(data + idata*ne_datapoint, tokens.data() + idata*stride + 0, ne_datapoint*sizeof(llama_token));
1588+
memcpy(labels + idata*ne_datapoint, tokens.data() + idata*stride + 1, ne_datapoint*sizeof(llama_token));
1589+
}
1590+
1591+
return result;
1592+
}

common/common.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ struct common_params {
328328
bool no_kv_offload = false; // disable KV offloading
329329
bool warmup = true; // warmup run
330330
bool check_tensors = false; // validate tensor data
331+
bool no_op_offload = false; // globally disable offload host tensor operations to device
331332

332333
bool single_turn = false; // single turn chat conversation
333334

@@ -661,3 +662,9 @@ const char * const LLM_KV_SPLIT_COUNT = "split.count";
661662
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
662663

663664
}
665+
666+
//
667+
// training utils
668+
//
669+
670+
ggml_opt_dataset_t common_opt_dataset_init(struct llama_context * ctx, const std::vector<llama_token> & tokens, int64_t stride);

common/llguidance.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,7 @@ static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab)
189189
/* .tokenize_fn = */ llama_sampler_llg_tokenize_fn,
190190
/* .use_approximate_greedy_tokenize_fn = */ false,
191191
/* .tokenize_user_data = */ vocab,
192+
/* .slices = */ nullptr,
192193
};
193194

194195
char error_buffer[1024];

convert_hf_to_gguf.py

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -426,7 +426,11 @@ def load_hparams(dir_model: Path):
426426
logger.warning(f"Failed to load model config from {dir_model}: {e}")
427427
logger.warning("Trying to load config.json instead")
428428
with open(dir_model / "config.json", "r", encoding="utf-8") as f:
429-
return json.load(f)
429+
config = json.load(f)
430+
if "llm_config" in config:
431+
# rename for InternVL
432+
config["text_config"] = config["llm_config"]
433+
return config
430434

431435
@classmethod
432436
def register(cls, *names: str) -> Callable[[AnyModel], AnyModel]:
@@ -794,6 +798,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
794798
if chkhsh == "0e9433cbbb161f89e264eb32e8e64bfe69e834973ffca5d41d3948a604a3e2a3":
795799
# ref: https://huggingface.co/mistral-community/pixtral-12b
796800
res = "pixtral"
801+
if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec":
802+
# ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base
803+
res = "seed-coder"
797804

798805
if res is None:
799806
logger.warning("\n")
@@ -2606,6 +2613,11 @@ def set_gguf_parameters(self):
26062613
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
26072614
if self.hf_arch == "Qwen2Model":
26082615
name = f"model.{name}" # map to Qwen2ForCausalLM tensors
2616+
if "language_model." in name:
2617+
name = name.replace("language_model.", "") # for InternVL
2618+
if name.startswith("mlp") or name.startswith("vision_model"):
2619+
# skip visual tensors
2620+
return []
26092621
yield from super().modify_tensors(data_torch, name, bid)
26102622

26112623

@@ -2709,6 +2721,62 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
27092721
return [] # skip other tensors
27102722

27112723

2724+
@ModelBase.register("InternVisionModel")
2725+
class InternVisionModel(VisionModel):
2726+
def set_gguf_parameters(self):
2727+
super().set_gguf_parameters()
2728+
hparams = self.hparams
2729+
self.gguf_writer.add_vision_projector_type(gguf.VisionProjectorType.INTERNVL)
2730+
self.gguf_writer.add_vision_attention_layernorm_eps(hparams["layer_norm_eps"])
2731+
# hidden_act
2732+
if hparams["hidden_act"] == "silu":
2733+
self.gguf_writer.add_vision_use_silu(True)
2734+
elif hparams["hidden_act"] == "gelu":
2735+
self.gguf_writer.add_vision_use_gelu(True)
2736+
else:
2737+
raise ValueError(f"Unsupported hidden_act: {hparams['hidden_act']}")
2738+
# downsample_ratio
2739+
downsample_ratio = self.global_config.get("downsample_ratio")
2740+
assert downsample_ratio is not None
2741+
self.gguf_writer.add_vision_projector_scale_factor(int(1.0 / downsample_ratio))
2742+
2743+
def tensor_force_quant(self, name, new_name, bid, n_dims):
2744+
del bid, name, n_dims # unused
2745+
if ".patch_embd." in new_name:
2746+
return gguf.GGMLQuantizationType.F16
2747+
if ".position_embd." in new_name:
2748+
return gguf.GGMLQuantizationType.F32
2749+
return False
2750+
2751+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
2752+
del bid # unused
2753+
if name.startswith("vision_model") or name.startswith("mlp"):
2754+
# process visual tensors
2755+
# correct name
2756+
if name.startswith("vision_model"):
2757+
name = "vision_tower." + name
2758+
if (".ls" in name or "position_embedding" in name) and not name.endswith(".weight"):
2759+
name += ".weight"
2760+
# split QKV tensors if needed
2761+
if ".qkv." in name:
2762+
if data_torch.ndim == 2: # weight
2763+
c3, _ = data_torch.shape
2764+
else: # bias
2765+
c3 = data_torch.shape[0]
2766+
assert c3 % 3 == 0
2767+
c = c3 // 3
2768+
wq = data_torch[:c]
2769+
wk = data_torch[c: c * 2]
2770+
wv = data_torch[c * 2:]
2771+
return [
2772+
(self.map_tensor_name(name.replace("attn.qkv", "self_attn.q_proj")), wq),
2773+
(self.map_tensor_name(name.replace("attn.qkv", "self_attn.k_proj")), wk),
2774+
(self.map_tensor_name(name.replace("attn.qkv", "self_attn.v_proj")), wv),
2775+
]
2776+
return [(self.map_tensor_name(name), data_torch)]
2777+
return [] # skip other tensors
2778+
2779+
27122780
@ModelBase.register("WavTokenizerDec")
27132781
class WavTokenizerDecModel(TextModel):
27142782
model_arch = gguf.MODEL_ARCH.WAVTOKENIZER_DEC
@@ -3360,6 +3428,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
33603428
head_dim = n_embd // num_heads
33613429
num_groups = num_heads // q_per_kv
33623430

3431+
name = name.replace("language_model.", "") # InternVL
3432+
if name.startswith("mlp") or name.startswith("vision_model"):
3433+
# skip visual tensors
3434+
return []
3435+
33633436
if bid is not None and f"model.layers.{bid}.attention.wqkv" in name:
33643437
qkv = data_torch
33653438

@@ -3433,6 +3506,10 @@ def set_gguf_parameters(self):
34333506
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
34343507
n_head = self.hparams["num_attention_heads"]
34353508
n_kv_head = self.hparams.get("num_key_value_heads")
3509+
name = name.replace("language_model.", "") # InternVL
3510+
if name.startswith("mlp") or name.startswith("vision_model"):
3511+
# skip visual tensors
3512+
return []
34363513
if name.endswith(("q_proj.weight", "q_proj.bias")):
34373514
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
34383515
if name.endswith(("k_proj.weight", "k_proj.bias")):

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ class TOKENIZER_TYPE(IntEnum):
116116
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
117117
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", },
118118
{"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", },
119+
{"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", },
119120
]
120121

121122

docs/multimodal.md

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
# Multimodal
2+
3+
llama.cpp supports multimodal input via `libmtmd`. Currently, there are 2 tools support this feature:
4+
- [llama-mtmd-cli](../tools/mtmd/README.md)
5+
- [llama-server](../tools/server/README.md) via OpenAI-compatible `/chat/completions` API
6+
7+
To enable it, can use use one of the 2 methods below:
8+
9+
- Use `-hf` option with a supported model (see a list of pre-quantized model below)
10+
- To load a model using `-hf` while disabling multimodal, use `--no-mmproj`
11+
- To load a model using `-hf` while using a custom mmproj file, use `--mmproj local_file.gguf`
12+
- Use `-m model.gguf` option with `--mmproj file.gguf` to specify text and multimodal projector respectively
13+
14+
By default, multimodal projector will be offloaded to GPU. To disable this, add `--no-mmproj-offload`
15+
16+
For example:
17+
18+
```sh
19+
# simple usage with CLI
20+
llama-mtmd-cli -hf ggml-org/gemma-3-4b-it-GGUF
21+
22+
# simple usage with server
23+
llama-server -hf ggml-org/gemma-3-4b-it-GGUF
24+
25+
# using local file
26+
llama-server -m gemma-3-4b-it-Q4_K_M.gguf --mmproj mmproj-gemma-3-4b-it-Q4_K_M.gguf
27+
28+
# no GPU offload
29+
llama-server -hf ggml-org/gemma-3-4b-it-GGUF --no-mmproj-offload
30+
```
31+
32+
## Pre-quantized models
33+
34+
These are ready-to-use models, most of them come with `Q4_K_M` quantization by default.
35+
36+
Replaces the `(tool_name)` with the name of binary you want to use. For example, `llama-mtmd-cli` or `llama-server`
37+
38+
NOTE: some models may require large context window, for example: `-c 8192`
39+
40+
```sh
41+
# Gemma 3
42+
(tool_name) -hf ggml-org/gemma-3-4b-it-GGUF
43+
(tool_name) -hf ggml-org/gemma-3-12b-it-GGUF
44+
(tool_name) -hf ggml-org/gemma-3-27b-it-GGUF
45+
46+
# SmolVLM
47+
(tool_name) -hf ggml-org/SmolVLM-Instruct-GGUF
48+
(tool_name) -hf ggml-org/SmolVLM-256M-Instruct-GGUF
49+
(tool_name) -hf ggml-org/SmolVLM-500M-Instruct-GGUF
50+
(tool_name) -hf ggml-org/SmolVLM2-2.2B-Instruct-GGUF
51+
(tool_name) -hf ggml-org/SmolVLM2-256M-Video-Instruct-GGUF
52+
(tool_name) -hf ggml-org/SmolVLM2-500M-Video-Instruct-GGUF
53+
54+
# Pixtral 12B
55+
(tool_name) -hf ggml-org/pixtral-12b-GGUF
56+
57+
# Qwen 2 VL
58+
(tool_name) -hf ggml-org/Qwen2-VL-2B-Instruct-GGUF
59+
(tool_name) -hf ggml-org/Qwen2-VL-7B-Instruct-GGUF
60+
61+
# Qwen 2.5 VL
62+
(tool_name) -hf ggml-org/Qwen2.5-VL-3B-Instruct-GGUF
63+
(tool_name) -hf ggml-org/Qwen2.5-VL-7B-Instruct-GGUF
64+
(tool_name) -hf ggml-org/Qwen2.5-VL-32B-Instruct-GGUF
65+
(tool_name) -hf ggml-org/Qwen2.5-VL-72B-Instruct-GGUF
66+
67+
# Mistral Small 3.1 24B (IQ2_M quantization)
68+
(tool_name) -hf ggml-org/Mistral-Small-3.1-24B-Instruct-2503-GGUF
69+
70+
# InternVL 2.5 and 3
71+
(tool_name) -hf ggml-org/InternVL2_5-1B-GGUF
72+
(tool_name) -hf ggml-org/InternVL2_5-4B-GGUF
73+
(tool_name) -hf ggml-org/InternVL3-1B-Instruct-GGUF
74+
(tool_name) -hf ggml-org/InternVL3-2B-Instruct-GGUF
75+
(tool_name) -hf ggml-org/InternVL3-8B-Instruct-GGUF
76+
(tool_name) -hf ggml-org/InternVL3-14B-Instruct-GGUF
77+
```

examples/training/CMakeLists.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
set(TARGET llama-finetune)
2+
add_executable(${TARGET} finetune.cpp)
3+
install(TARGETS ${TARGET} RUNTIME)
4+
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
5+
target_compile_features(${TARGET} PRIVATE cxx_std_11)

0 commit comments

Comments
 (0)