Skip to content

Commit 8edd2cf

Browse files
committed
add embedding model tokenizer chkhsh
1 parent 0777cd3 commit 8edd2cf

File tree

2 files changed

+7
-0
lines changed

2 files changed

+7
-0
lines changed

convert_hf_to_gguf.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -809,6 +809,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
809809
if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35":
810810
# ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0
811811
res = "minerva-7b"
812+
if chkhsh == "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c":
813+
# ref: https://huggingface.co/Qwen/Qwen3-Embedding-0.6B
814+
res = "qwen2"
812815

813816
if res is None:
814817
logger.warning("\n")
@@ -3107,6 +3110,9 @@ def _get_cls_out_tensor(self, data_torch: Tensor) -> Tensor:
31073110
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
31083111
is_rerank = self.token_false_id is not None and self.token_true_id is not None
31093112

3113+
if not name.startswith("model."):
3114+
name = "model." + name
3115+
31103116
if is_rerank:
31113117
if self.is_tied_embeddings and "embed_tokens" in name:
31123118
return [

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ class TOKENIZER_TYPE(IntEnum):
137137
{"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"},
138138
{"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"},
139139
{"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"},
140+
{"name": "qwen2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/Qwen/Qwen3-Embedding-0.6B", "chkhsh": "d4540891389ea895b53b399da6ac824becc30f2fba0e9ddbb98f92e55ca0e97c"},
140141
]
141142

142143

0 commit comments

Comments
 (0)