|
1 | 1 | import os
|
2 | 2 |
|
| 3 | + |
3 | 4 | def call(*args, **kwargs):
|
4 | 5 | import subprocess
|
| 6 | + |
5 | 7 | out = subprocess.call(*args, **kwargs)
|
6 | 8 | if out != 0:
|
7 | 9 | raise ValueError(f"Output: {out}")
|
8 | 10 |
|
9 |
| -def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP"): |
| 11 | + |
| 12 | +model_description_generator = """ |
| 13 | +from hf_hub_ctranslate2 import GeneratorCT2fromHfHub |
| 14 | +model = GeneratorCT2fromHfHub( |
| 15 | + # load in int8 on CUDA |
| 16 | + model_name_or_path=model_name, |
| 17 | + device="cuda", |
| 18 | + compute_type="int8_float16", |
| 19 | + # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}") |
| 20 | +) |
| 21 | +outputs = model.generate( |
| 22 | + text=["def fibonnaci(", "User: How are you doing? Bot:"], |
| 23 | + max_length=64, |
| 24 | + include_prompt_in_result=False |
| 25 | +) |
| 26 | +print(outputs)""" |
| 27 | + |
| 28 | +model_description_translator = """ |
| 29 | +from hf_hub_ctranslate2 import TranslatorCT2fromHfHub |
| 30 | +model = TranslatorCT2fromHfHub( |
| 31 | + # load in int8 on CUDA |
| 32 | + model_name_or_path=model_name, |
| 33 | + device="cuda", |
| 34 | + compute_type="int8_float16", |
| 35 | + # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}") |
| 36 | +) |
| 37 | +outputs = model.generate( |
| 38 | + text=["def fibonnaci(", "User: How are you doing? Bot:"], |
| 39 | + max_length=64, |
| 40 | +) |
| 41 | +print(outputs)""" |
| 42 | + |
| 43 | +model_description_encoder = """ |
| 44 | +from hf_hub_ctranslate2 import EncoderCT2fromHfHub |
| 45 | +model = EncoderCT2fromHfHub( |
| 46 | + # load in int8 on CUDA |
| 47 | + model_name_or_path=model_name, |
| 48 | + device="cuda", |
| 49 | + compute_type="float16", |
| 50 | + # tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}") |
| 51 | +) |
| 52 | +embeddings = model.encode( |
| 53 | + ["I like soccer", "I like tennis", "The eiffel tower is in Paris"], |
| 54 | + batch_size=32, |
| 55 | + convert_to_numpy=True, |
| 56 | + normalize_embeddings=True, |
| 57 | +) |
| 58 | +print(embeddings.shape, embeddings) |
| 59 | +scores = (embeddings @ embeddings.T) * 100 |
| 60 | +""" |
| 61 | + |
| 62 | + |
| 63 | +def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP", description="generator"): |
| 64 | + print(f"converting {ORG}/{NAME} ") |
10 | 65 | import re
|
11 | 66 | import datetime
|
12 | 67 | from huggingface_hub import HfApi, snapshot_download
|
| 68 | + |
13 | 69 | api = HfApi()
|
14 |
| - |
15 |
| - HUB_NAME=f"ct2fast-{NAME}" |
| 70 | + |
| 71 | + HUB_NAME = f"ct2fast-{NAME}" |
16 | 72 | repo_id = f"michaelfeil/{HUB_NAME}"
|
17 | 73 | api.create_repo(repo_id=repo_id, exist_ok=True, repo_type="model")
|
18 | 74 | tmp_dir = os.path.join(os.path.expanduser("~"), f"tmp-{HUB_NAME}")
|
19 | 75 | os.chdir(os.path.expanduser("~"))
|
20 |
| - |
| 76 | + |
21 | 77 | path = snapshot_download(
|
22 |
| - f'{ORG}/{NAME}', |
| 78 | + f"{ORG}/{NAME}", |
| 79 | + ) |
| 80 | + files = [f for f in os.listdir(path) if "." in f] |
| 81 | + filtered_f = [ |
| 82 | + f |
| 83 | + for f in files |
| 84 | + if not ("model" in f or "config.json" == f or f.endswith(".py")) |
| 85 | + ] |
| 86 | + |
| 87 | + conv_arg = ( |
| 88 | + [ |
| 89 | + "ct2-transformers-converter", |
| 90 | + "--model", |
| 91 | + f"{ORG}/{NAME}", |
| 92 | + "--output_dir", |
| 93 | + str(tmp_dir), |
| 94 | + "--force", |
| 95 | + "--copy_files", |
| 96 | + ] |
| 97 | + + filtered_f |
| 98 | + + [ |
| 99 | + "--quantization", |
| 100 | + "float16" if description == "encoder" else "int8_float16", |
| 101 | + "--trust_remote_code", |
| 102 | + ] |
23 | 103 | )
|
24 |
| - files = os.listdir(path) |
25 |
| - filtered_f = [f for f in files if not ("model" in f or "config.json" == f)] |
26 |
| - |
27 |
| - conv_arg = [ |
28 |
| - 'ct2-transformers-converter', |
29 |
| - '--model', |
30 |
| - f'{ORG}/{NAME}', |
31 |
| - '--output_dir', |
32 |
| - str(tmp_dir), |
33 |
| - '--force', |
34 |
| - '--copy_files', |
35 |
| - ]+ filtered_f + [ |
36 |
| - '--quantization', |
37 |
| - 'float16'] |
38 | 104 | call(conv_arg)
|
39 |
| - |
40 |
| - with open(os.path.join(tmp_dir,'README.md'),'r') as f: |
| 105 | + if not "vocabulary.txt" in os.listdir(tmp_dir) and "vocab.txt" in os.listdir( |
| 106 | + tmp_dir |
| 107 | + ): |
| 108 | + import shutil |
| 109 | + |
| 110 | + shutil.copyfile( |
| 111 | + os.path.join(tmp_dir, "vocab.txt"), |
| 112 | + os.path.join(tmp_dir, "vocabulary.txt"), |
| 113 | + ) |
| 114 | + |
| 115 | + with open(os.path.join(tmp_dir, "README.md"), "r") as f: |
41 | 116 | content = f.read()
|
42 | 117 | if "tags:" in content:
|
43 |
| - content = content.replace("tags:","tags:\n- ctranslate2\n- int8\n- float16") |
| 118 | + content = content.replace("tags:", "tags:\n- ctranslate2\n- int8\n- float16", 1) |
44 | 119 | else:
|
45 |
| - content = content.replace("---","---\ntags:\n- ctranslate2\n- int8\n- float16\n") |
| 120 | + content = content.replace( |
| 121 | + "---", "---\ntags:\n- ctranslate2\n- int8\n- float16\n", 1 |
| 122 | + ) |
46 | 123 |
|
47 |
| - end_header = [m.start() for m in re.finditer(r"---",content)] |
| 124 | + end_header = [m.start() for m in re.finditer(r"---", content)] |
48 | 125 | if len(end_header) > 1:
|
49 | 126 | end_header = end_header[1] + 3
|
50 | 127 | else:
|
51 | 128 | end_header = 0
|
52 | 129 | conv_arg_nice = " ".join(conv_arg)
|
| 130 | + conv_arg_nice = conv_arg_nice.replace(os.path.expanduser("~"), "~") |
| 131 | + if description == "generator": |
| 132 | + model_description = model_description_generator |
| 133 | + elif description == "encoder": |
| 134 | + model_description = model_description_encoder |
| 135 | + elif description == "translator": |
| 136 | + model_description = model_description_translator |
53 | 137 | add_string = f"""
|
54 | 138 | # # Fast-Inference with Ctranslate2
|
55 | 139 | Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on CPU or GPU.
|
56 | 140 |
|
57 | 141 | quantized version of [{ORG}/{NAME}](https://huggingface.co/{ORG}/{NAME})
|
58 | 142 | ```bash
|
59 |
| -pip install hf-hub-ctranslate2>=2.0.6 |
60 |
| -``` |
61 |
| -Converted on {str(datetime.datetime.now())[:10]} using |
| 143 | +pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0 |
62 | 144 | ```
|
63 |
| -{conv_arg_nice} |
| 145 | +
|
| 146 | +```python |
| 147 | +# from transformers import AutoTokenizer |
| 148 | +model_name = "{repo_id}" |
| 149 | +{model_description} |
64 | 150 | ```
|
65 | 151 |
|
66 |
| -Checkpoint compatible to [ctranslate2>=3.13.0](https://github.com/OpenNMT/CTranslate2) and [hf-hub-ctranslate2>=2.0.6](https://github.com/michaelfeil/hf-hub-ctranslate2) |
67 |
| -- `compute_type=int8_float16` for `device="cuda"` |
| 152 | +Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2) |
| 153 | +and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2) |
| 154 | +- `compute_type=int8_float16` for `device="cuda"` |
68 | 155 | - `compute_type=int8` for `device="cpu"`
|
69 | 156 |
|
70 |
| -```python |
71 |
| -from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub |
72 |
| -from transformers import AutoTokenizer |
73 |
| -
|
74 |
| -model_name = "{repo_id}" |
75 |
| -# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model. |
76 |
| -model = GeneratorCT2fromHfHub( |
77 |
| - # load in int8 on CUDA |
78 |
| - model_name_or_path=model_name, |
79 |
| - device="cuda", |
80 |
| - compute_type="int8_float16", |
81 |
| - tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}") |
82 |
| -) |
83 |
| -outputs = model.generate( |
84 |
| - text=["How do you call a fast Flan-ingo?", "User: How are you doing? Bot:"], |
85 |
| -) |
86 |
| -print(outputs) |
| 157 | +Converted on {str(datetime.datetime.now())[:10]} using |
| 158 | +``` |
| 159 | +{conv_arg_nice} |
87 | 160 | ```
|
88 | 161 |
|
89 | 162 | # Licence and other remarks:
|
90 | 163 | This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
|
91 | 164 |
|
92 | 165 | # Original description
|
93 | 166 | """
|
94 |
| - |
95 |
| - with open(os.path.join(tmp_dir,'README.md'),'w') as f: |
| 167 | + |
| 168 | + with open(os.path.join(tmp_dir, "README.md"), "w") as f: |
96 | 169 | f.write(content[:end_header] + add_string + content[end_header:])
|
97 |
| - |
98 | 170 |
|
99 | 171 | api.upload_folder(
|
100 | 172 | folder_path=tmp_dir,
|
101 |
| - repo_id=repo_id, repo_type="model", |
102 |
| - commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights" |
| 173 | + repo_id=repo_id, |
| 174 | + repo_type="model", |
| 175 | + commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights", |
103 | 176 | )
|
104 |
| - call(["rm","-rf", tmp_dir]) |
105 |
| - |
| 177 | + call(["rm", "-rf", tmp_dir]) |
| 178 | + |
| 179 | + |
106 | 180 | if __name__ == "__main__":
|
107 | 181 | generators = [
|
108 |
| - ("togethercomputer/RedPajama-INCITE-Instruct-3B-v1"), |
109 |
| - ("togethercomputer/GPT-JT-6B-v0"), |
110 |
| - "togethercomputer/RedPajama-INCITE-Chat-7B-v0.1", |
111 |
| - "togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1", |
112 |
| - "EleutherAI/pythia-160m", |
113 |
| - "EleutherAI/pythia-2.8b", |
114 |
| - "EleutherAI/pythia-6.9b", |
115 |
| - "EleutherAI/pythia-12b", |
116 |
| - "togethercomputer/Pythia-Chat-Base-7B", |
117 |
| - "stabilityai/stablelm-base-alpha-7b", |
118 |
| - "stabilityai/stablelm-tuned-alpha-7b", |
119 |
| - "stabilityai/stablelm-base-alpha-3b", |
120 |
| - "stabilityai/stablelm-tuned-alpha-3b", |
121 |
| - "OpenAssistant/stablelm-7b-sft-v7-epoch-3", |
122 |
| - "EleutherAI/gpt-j-6b", |
123 |
| - "EleutherAI/gpt-neox-20b", |
124 |
| - "OpenAssistant/pythia-12b-sft-v8-7k-steps" |
| 182 | + # "togethercomputer/RedPajama-INCITE-Instruct-3B-v1", |
| 183 | + # "togethercomputer/GPT-JT-6B-v0", |
| 184 | + # "togethercomputer/RedPajama-INCITE-7B-Instruct", |
| 185 | + # "togethercomputer/RedPajama-INCITE-7B-Chat", |
| 186 | + # "EleutherAI/pythia-160m", |
| 187 | + # "EleutherAI/pythia-2.8b", |
| 188 | + # "EleutherAI/pythia-6.9b", |
| 189 | + # "EleutherAI/pythia-12b", |
| 190 | + # "togethercomputer/Pythia-Chat-Base-7B", |
| 191 | + # "stabilityai/stablelm-base-alpha-7b", |
| 192 | + # "stabilityai/stablelm-tuned-alpha-7b", |
| 193 | + # "stabilityai/stablelm-base-alpha-3b", |
| 194 | + # "stabilityai/stablelm-tuned-alpha-3b", |
| 195 | + # "OpenAssistant/stablelm-7b-sft-v7-epoch-3", |
| 196 | + # "EleutherAI/gpt-j-6b", |
| 197 | + # "EleutherAI/gpt-neox-20b", |
| 198 | + # "OpenAssistant/pythia-12b-sft-v8-7k-steps", |
| 199 | + # "Salesforce/codegen-350M-mono", |
| 200 | + # "Salesforce/codegen-350M-multi", |
| 201 | + # "Salesforce/codegen-2B-mono", |
| 202 | + # "Salesforce/codegen-2B-multi", |
| 203 | + # "Salesforce/codegen-6B-multi", |
| 204 | + # "Salesforce/codegen-6B-mono", |
| 205 | + # "Salesforce/codegen-16B-mono", |
| 206 | + # "Salesforce/codegen-16B-multi", |
| 207 | + # "Salesforce/codegen2-1B", |
| 208 | + # "Salesforce/codegen2-3_7B", |
| 209 | + # "Salesforce/codegen2-7B", |
| 210 | + # "Salesforce/codegen2-16B", |
| 211 | + # "bigcode/gpt_bigcode-santacoder", |
| 212 | + # 'bigcode/starcoder', |
| 213 | + # "mosaicml/mpt-7b", |
| 214 | + # "mosaicml/mpt-7b-instruct", |
| 215 | + # "mosaicml/mpt-7b-chat" |
| 216 | + "VMware/open-llama-7b-open-instruct", |
| 217 | + # "tiiuae/falcon-7b-instruct", |
| 218 | + # 'tiiuae/falcon-7b', |
| 219 | + "tiiuae/falcon-40b-instruct", |
| 220 | + "tiiuae/falcon-40b", |
| 221 | + "OpenAssistant/falcon-7b-sft-top1-696", |
| 222 | + "OpenAssistant/falcon-7b-sft-mix-2000", |
| 223 | + "OpenAssistant/falcon-40b-sft-mix-1226", |
| 224 | + # "HuggingFaceH4/starchat-beta", |
| 225 | + "WizardLM/WizardCoder-15B-V1.0", |
| 226 | + ] |
| 227 | + translators = [ |
| 228 | + # 'Salesforce/codet5p-770m-py', 'Salesforce/codet5p-770m' |
125 | 229 | ]
|
| 230 | + encoders = [ |
| 231 | + "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", |
| 232 | + "intfloat/e5-small-v2", |
| 233 | + "intfloat/e5-large-v2", |
| 234 | + "intfloat/e5-large", |
| 235 | + "sentence-transformers/all-MiniLM-L6-v2", |
| 236 | + "setu4993/LaBSE", |
| 237 | + ] |
| 238 | + for m in encoders: |
| 239 | + ORG, NAME = m.split("/") |
| 240 | + convert(NAME=NAME, ORG=ORG, description="encoder") |
| 241 | + |
| 242 | + for m in translators: |
| 243 | + ORG, NAME = m.split("/") |
| 244 | + convert(NAME=NAME, ORG=ORG, description="translator") |
| 245 | + |
126 | 246 | for m in generators:
|
127 |
| - ORG , NAME = m.split("/") |
128 |
| - convert(NAME=NAME, ORG=ORG) |
| 247 | + ORG, NAME = m.split("/") |
| 248 | + # import huggingface_hub |
| 249 | + # huggingface_hub.snapshot_download( |
| 250 | + # m |
| 251 | + # ) |
| 252 | + convert(NAME=NAME, ORG=ORG, description="generator") |
| 253 | + |
| 254 | + from hf_hub_ctranslate2 import GeneratorCT2fromHfHub |
| 255 | + from transformers import AutoTokenizer |
| 256 | + |
| 257 | + model_name = f"michaelfeil/ct2fast-{NAME}" |
| 258 | + # use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model. |
| 259 | + model = GeneratorCT2fromHfHub( |
| 260 | + # load in int8 on CUDA |
| 261 | + model_name_or_path=model_name, |
| 262 | + device="cuda", |
| 263 | + compute_type="int8", |
| 264 | + tokenizer=AutoTokenizer.from_pretrained(m), |
| 265 | + ) |
| 266 | + outputs = model.generate( |
| 267 | + text=["def print_hello_world():", "def hello_name(name:"], max_length=64 |
| 268 | + ) |
| 269 | + print(outputs) |
0 commit comments