From 5c0785d2b0ef6bfd9aedc5f8a60e061d9a44b3f5 Mon Sep 17 00:00:00 2001 From: stephantul Date: Tue, 20 May 2025 08:58:12 +0200 Subject: [PATCH] feat: smaller tokenizers --- model2vec/hf_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model2vec/hf_utils.py b/model2vec/hf_utils.py index 1d12f43..03c3223 100644 --- a/model2vec/hf_utils.py +++ b/model2vec/hf_utils.py @@ -40,7 +40,7 @@ def save_pretrained( folder_path = folder_path / subfolder if subfolder else folder_path folder_path.mkdir(exist_ok=True, parents=True) save_file({"embeddings": embeddings}, folder_path / "model.safetensors") - tokenizer.save(str(folder_path / "tokenizer.json")) + tokenizer.save(str(folder_path / "tokenizer.json"), pretty=False) json.dump(config, open(folder_path / "config.json", "w"), indent=4) # Create modules.json