Skip to content

Commit a934ab7

Browse files
committed
Merge branch 'post-cpu-merge' into croco_nex_0
2 parents 1bcb13d + 2cb3b87 commit a934ab7

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

67 files changed

+3288
-2736
lines changed
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
name: 'Windows - Setup CURL'
2+
description: 'Composite action, to be reused in other workflow'
3+
inputs:
4+
curl_version:
5+
description: 'CURL version'
6+
required: false
7+
default: '8.6.0_6'
8+
outputs:
9+
curl_path:
10+
description: "Path to the downloaded libcurl"
11+
value: ${{ steps.get_libcurl.outputs.curl_path }}
12+
13+
runs:
14+
using: "composite"
15+
steps:
16+
- name: libCURL
17+
id: get_libcurl
18+
shell: powershell
19+
env:
20+
CURL_VERSION: ${{ inputs.curl_version }}
21+
run: |
22+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
23+
mkdir $env:RUNNER_TEMP/libcurl
24+
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
25+
echo "curl_path=$env:RUNNER_TEMP/libcurl" >> $env:GITHUB_OUTPUT

.github/workflows/build-linux-cross.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ jobs:
1919
sudo apt-get install -y --no-install-recommends \
2020
build-essential \
2121
gcc-14-riscv64-linux-gnu \
22-
g++-14-riscv64-linux-gnu
22+
g++-14-riscv64-linux-gnu \
23+
libcurl4-openssl-dev:riscv64
2324
2425
- name: Build
2526
run: |
@@ -59,7 +60,8 @@ jobs:
5960
glslc \
6061
gcc-14-riscv64-linux-gnu \
6162
g++-14-riscv64-linux-gnu \
62-
libvulkan-dev:riscv64
63+
libvulkan-dev:riscv64 \
64+
libcurl4-openssl-dev:riscv64
6365
6466
- name: Build
6567
run: |
@@ -99,7 +101,8 @@ jobs:
99101
build-essential \
100102
glslc \
101103
crossbuild-essential-arm64 \
102-
libvulkan-dev:arm64
104+
libvulkan-dev:arm64 \
105+
libcurl4-openssl-dev:arm64
103106
104107
- name: Build
105108
run: |

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@ find_package(Threads REQUIRED)
108108
add_compile_definitions(LOG_DISABLE_LOGS)
109109
add_compile_definitions(GGML_USE_CPU)
110110
add_compile_definitions(GGML_USE_CPU_AARCH64)
111+
add_compile_definitions(NOMINMAX)
111112

112113
if (MSVC)
113114
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")

common/arg.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ struct common_hf_file_res {
164164
# if !defined(PATH_MAX)
165165
# define PATH_MAX MAX_PATH
166166
# endif
167+
#elif defined(_AIX)
168+
#include <sys/limits.h>
167169
#else
168170
#include <sys/syslimits.h>
169171
#endif

convert_hf_to_gguf.py

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
714714
if chkhsh == "96a5f08be6259352137b512d4157e333e21df7edd3fcd152990608735a65b224":
715715
# ref: https://huggingface.co/inclusionAI/Ling-lite
716716
res = "bailingmoe"
717+
if chkhsh == "d353350c764d8c3b39c763113960e4fb4919bea5fbf208a0e3b22e8469dc7406":
718+
# ref: https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct
719+
res = "llama4"
717720

718721
if res is None:
719722
logger.warning("\n")
@@ -1608,6 +1611,7 @@ def prepare_tensors(self):
16081611
@Model.register("LLaMAForCausalLM", "LlamaForCausalLM", "MistralForCausalLM", "MixtralForCausalLM")
16091612
class LlamaModel(Model):
16101613
model_arch = gguf.MODEL_ARCH.LLAMA
1614+
undo_permute = True
16111615

16121616
def set_vocab(self):
16131617
try:
@@ -1672,10 +1676,11 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
16721676
n_head = self.hparams["num_attention_heads"]
16731677
n_kv_head = self.hparams.get("num_key_value_heads")
16741678

1675-
if name.endswith(("q_proj.weight", "q_proj.bias")):
1676-
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
1677-
if name.endswith(("k_proj.weight", "k_proj.bias")):
1678-
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
1679+
if self.undo_permute:
1680+
if name.endswith(("q_proj.weight", "q_proj.bias")):
1681+
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
1682+
if name.endswith(("k_proj.weight", "k_proj.bias")):
1683+
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
16791684

16801685
# process the experts separately
16811686
if name.find("block_sparse_moe.experts") != -1:
@@ -1752,6 +1757,61 @@ def prepare_tensors(self):
17521757
raise ValueError(f"Unprocessed experts: {experts}")
17531758

17541759

1760+
@Model.register("Llama4ForConditionalGeneration")
1761+
class Llama4Model(LlamaModel):
1762+
model_arch = gguf.MODEL_ARCH.LLAMA4
1763+
has_vision: bool = False
1764+
undo_permute = False
1765+
1766+
# TODO @ngxson : avoid duplicate this code everywhere by at least support "text_config"
1767+
# same with llama, but we need to merge the text_config into the root level of hparams
1768+
def __init__(self, *args, **kwargs):
1769+
hparams = kwargs["hparams"] if "hparams" in kwargs else Model.load_hparams(args[0])
1770+
if "text_config" in hparams:
1771+
hparams = {**hparams, **hparams["text_config"]}
1772+
kwargs["hparams"] = hparams
1773+
super().__init__(*args, **kwargs)
1774+
if "vision_config" in hparams:
1775+
logger.info("Has vision encoder, but it will be ignored")
1776+
self.has_vision = True
1777+
# IMPORTANT: the normal "intermediate_size" is renamed to "intermediate_size_mlp", we need to undo this
1778+
self.hparams["intermediate_size_moe"] = self.hparams["intermediate_size"]
1779+
self.hparams["intermediate_size"] = self.hparams["intermediate_size_mlp"]
1780+
1781+
def set_vocab(self):
1782+
self._set_vocab_gpt2()
1783+
self.gguf_writer.add_add_bos_token(True)
1784+
1785+
def set_gguf_parameters(self):
1786+
super().set_gguf_parameters()
1787+
self.gguf_writer.add_interleave_moe_layer_step(self.hparams["interleave_moe_layer_step"])
1788+
self.gguf_writer.add_expert_feed_forward_length(self.hparams["intermediate_size_moe"])
1789+
1790+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
1791+
name = name.replace("language_model.", "")
1792+
name = name.replace("feed_forward.", "mlp.") # a bit hacky for now
1793+
name = name.replace(".router.weight", ".gate.weight") # a bit hacky for now
1794+
1795+
# split the gate_up into gate and up
1796+
if "gate_up_proj" in name:
1797+
name_up = name.replace("gate_up_proj", "up_proj.weight")
1798+
name_gate = name.replace("gate_up_proj", "gate_proj.weight")
1799+
dim_half = data_torch.shape[-1] // 2
1800+
gate_proj_weight, up_proj_weight = data_torch.transpose(-1, -2).split(dim_half, dim=-2)
1801+
return [
1802+
(self.map_tensor_name(name_gate), gate_proj_weight),
1803+
(self.map_tensor_name(name_up), up_proj_weight)
1804+
]
1805+
1806+
if name.endswith("down_proj"):
1807+
name += ".weight"
1808+
data_torch = data_torch.transpose(-1, -2)
1809+
1810+
if "multi_modal_projector" in name or "vision_model" in name:
1811+
return []
1812+
return super().modify_tensors(data_torch, name, bid)
1813+
1814+
17551815
@Model.register("Mistral3ForConditionalGeneration")
17561816
class Mistral3Model(LlamaModel):
17571817
model_arch = gguf.MODEL_ARCH.LLAMA

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ class TOKENIZER_TYPE(IntEnum):
113113
{"name": "superbpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/UW/OLMo2-8B-SuperBPE-t180k", },
114114
{"name": "trillion", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/trillionlabs/Trillion-7B-preview", },
115115
{"name": "bailingmoe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/inclusionAI/Ling-lite", },
116+
{"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", },
116117
]
117118

118119

examples/llava/clip-impl.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#define KEY_HAS_QWEN2VL_MERGER "clip.has_qwen2vl_merger"
2323
#define KEY_USE_GELU "clip.use_gelu"
2424
#define KEY_USE_SILU "clip.use_silu"
25+
#define KEY_USE_GLU_MLP "clip.use_glu_mlp"
26+
#define KEY_USE_RMS_NORM "clip.use_rms_norm"
2527
#define KEY_N_EMBD "clip.%s.embedding_length"
2628
#define KEY_N_FF "clip.%s.feed_forward_length"
2729
#define KEY_N_BLOCK "clip.%s.block_count"
@@ -40,6 +42,8 @@
4042
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
4143
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
4244
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
45+
#define KEY_FULLATTN_BLK_IDX "clip.vision.fullatt_block_indexes"
46+
#define KEY_ATTN_WINDOW_SIZE "clip.vision.window_size"
4347

4448

4549
//
@@ -58,6 +62,7 @@
5862
#define TN_ATTN_OUTPUT "%s.blk.%d.attn_out.%s"
5963
#define TN_FFN_DOWN "%s.blk.%d.ffn_down.%s"
6064
#define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
65+
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
6166
#define TN_LN_1 "%s.blk.%d.ln1.%s"
6267
#define TN_LN_2 "%s.blk.%d.ln2.%s"
6368
#define TN_LN_PRE "%s.pre_ln.%s"

0 commit comments

Comments
 (0)