Skip to content

Commit 19e5943

Browse files
authored
convert : make hf token optional (#14717)
* make hf token optional * fail if we can't get necessary tokenizer config
1 parent 496957e commit 19e5943

File tree

1 file changed

+7
-11
lines changed

1 file changed

+7
-11
lines changed

convert_hf_to_gguf_update.py

Lines changed: 7 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import re
88

99
import requests
10-
import sys
1110
import json
1211
import shutil
1312
import argparse
@@ -69,8 +68,7 @@ class TOKENIZER_TYPE(IntEnum):
6968
hf_token = args.hf_token if args.hf_token is not None else hf_token
7069

7170
if hf_token is None:
72-
logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token")
73-
sys.exit(1)
71+
logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token")
7472

7573
# TODO: this string has to exercise as much pre-tokenizer functionality as possible
7674
# will be updated with time - contributions welcome
@@ -151,7 +149,7 @@ class TOKENIZER_TYPE(IntEnum):
151149

152150

153151
def download_file_with_auth(url, token, save_path):
154-
headers = {"Authorization": f"Bearer {token}"}
152+
headers = {"Authorization": f"Bearer {token}"} if token else None
155153
response = sess.get(url, headers=headers)
156154
response.raise_for_status()
157155
os.makedirs(os.path.dirname(save_path), exist_ok=True)
@@ -250,20 +248,18 @@ def get_existing_models(convert_py):
250248
else:
251249
# otherwise, compute the hash of the tokenizer
252250

253-
# Skip if the tokenizer folder does not exist or there are other download issues previously
254-
if not os.path.exists(f"models/tokenizers/{name}"):
255-
logger.warning(f"Directory for tokenizer {name} not found. Skipping...")
256-
continue
251+
# Fail if the tokenizer folder with config does not exist or there are other download issues previously
252+
if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"):
253+
raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.")
257254

258255
try:
259256
logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...")
260257
if name == "t5":
261258
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False)
262259
else:
263260
tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}")
264-
except OSError as e:
265-
logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}")
266-
continue # Skip to the next model if the tokenizer can't be loaded
261+
except Exception as e:
262+
raise OSError(f"Error loading tokenizer for model {name}.") from e
267263

268264
chktok = tokenizer.encode(CHK_TXT)
269265
chkhsh = sha256(str(chktok).encode()).hexdigest()

0 commit comments

Comments
 (0)