Skip to content

Commit 69a3dfd

Browse files
authored
New release (#8)
* new new_release * format with black * update converter * update converter * update test --------- Co-authored-by: michaelfeil <me@michaelfeil.eu>
1 parent 49730fe commit 69a3dfd

File tree

6 files changed

+511
-122
lines changed

6 files changed

+511
-122
lines changed

conversion_utils/convert.py

Lines changed: 214 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,128 +1,269 @@
11
import os
22

3+
34
def call(*args, **kwargs):
45
import subprocess
6+
57
out = subprocess.call(*args, **kwargs)
68
if out != 0:
79
raise ValueError(f"Output: {out}")
810

9-
def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP"):
11+
12+
model_description_generator = """
13+
from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
14+
model = GeneratorCT2fromHfHub(
15+
# load in int8 on CUDA
16+
model_name_or_path=model_name,
17+
device="cuda",
18+
compute_type="int8_float16",
19+
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
20+
)
21+
outputs = model.generate(
22+
text=["def fibonnaci(", "User: How are you doing? Bot:"],
23+
max_length=64,
24+
include_prompt_in_result=False
25+
)
26+
print(outputs)"""
27+
28+
model_description_translator = """
29+
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub
30+
model = TranslatorCT2fromHfHub(
31+
# load in int8 on CUDA
32+
model_name_or_path=model_name,
33+
device="cuda",
34+
compute_type="int8_float16",
35+
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
36+
)
37+
outputs = model.generate(
38+
text=["def fibonnaci(", "User: How are you doing? Bot:"],
39+
max_length=64,
40+
)
41+
print(outputs)"""
42+
43+
model_description_encoder = """
44+
from hf_hub_ctranslate2 import EncoderCT2fromHfHub
45+
model = EncoderCT2fromHfHub(
46+
# load in int8 on CUDA
47+
model_name_or_path=model_name,
48+
device="cuda",
49+
compute_type="float16",
50+
# tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
51+
)
52+
embeddings = model.encode(
53+
["I like soccer", "I like tennis", "The eiffel tower is in Paris"],
54+
batch_size=32,
55+
convert_to_numpy=True,
56+
normalize_embeddings=True,
57+
)
58+
print(embeddings.shape, embeddings)
59+
scores = (embeddings @ embeddings.T) * 100
60+
"""
61+
62+
63+
def convert(NAME="opus-mt-en-fr", ORG="Helsinki-NLP", description="generator"):
64+
print(f"converting {ORG}/{NAME} ")
1065
import re
1166
import datetime
1267
from huggingface_hub import HfApi, snapshot_download
68+
1369
api = HfApi()
14-
15-
HUB_NAME=f"ct2fast-{NAME}"
70+
71+
HUB_NAME = f"ct2fast-{NAME}"
1672
repo_id = f"michaelfeil/{HUB_NAME}"
1773
api.create_repo(repo_id=repo_id, exist_ok=True, repo_type="model")
1874
tmp_dir = os.path.join(os.path.expanduser("~"), f"tmp-{HUB_NAME}")
1975
os.chdir(os.path.expanduser("~"))
20-
76+
2177
path = snapshot_download(
22-
f'{ORG}/{NAME}',
78+
f"{ORG}/{NAME}",
79+
)
80+
files = [f for f in os.listdir(path) if "." in f]
81+
filtered_f = [
82+
f
83+
for f in files
84+
if not ("model" in f or "config.json" == f or f.endswith(".py"))
85+
]
86+
87+
conv_arg = (
88+
[
89+
"ct2-transformers-converter",
90+
"--model",
91+
f"{ORG}/{NAME}",
92+
"--output_dir",
93+
str(tmp_dir),
94+
"--force",
95+
"--copy_files",
96+
]
97+
+ filtered_f
98+
+ [
99+
"--quantization",
100+
"float16" if description == "encoder" else "int8_float16",
101+
"--trust_remote_code",
102+
]
23103
)
24-
files = os.listdir(path)
25-
filtered_f = [f for f in files if not ("model" in f or "config.json" == f)]
26-
27-
conv_arg = [
28-
'ct2-transformers-converter',
29-
'--model',
30-
f'{ORG}/{NAME}',
31-
'--output_dir',
32-
str(tmp_dir),
33-
'--force',
34-
'--copy_files',
35-
]+ filtered_f + [
36-
'--quantization',
37-
'float16']
38104
call(conv_arg)
39-
40-
with open(os.path.join(tmp_dir,'README.md'),'r') as f:
105+
if not "vocabulary.txt" in os.listdir(tmp_dir) and "vocab.txt" in os.listdir(
106+
tmp_dir
107+
):
108+
import shutil
109+
110+
shutil.copyfile(
111+
os.path.join(tmp_dir, "vocab.txt"),
112+
os.path.join(tmp_dir, "vocabulary.txt"),
113+
)
114+
115+
with open(os.path.join(tmp_dir, "README.md"), "r") as f:
41116
content = f.read()
42117
if "tags:" in content:
43-
content = content.replace("tags:","tags:\n- ctranslate2\n- int8\n- float16")
118+
content = content.replace("tags:", "tags:\n- ctranslate2\n- int8\n- float16", 1)
44119
else:
45-
content = content.replace("---","---\ntags:\n- ctranslate2\n- int8\n- float16\n")
120+
content = content.replace(
121+
"---", "---\ntags:\n- ctranslate2\n- int8\n- float16\n", 1
122+
)
46123

47-
end_header = [m.start() for m in re.finditer(r"---",content)]
124+
end_header = [m.start() for m in re.finditer(r"---", content)]
48125
if len(end_header) > 1:
49126
end_header = end_header[1] + 3
50127
else:
51128
end_header = 0
52129
conv_arg_nice = " ".join(conv_arg)
130+
conv_arg_nice = conv_arg_nice.replace(os.path.expanduser("~"), "~")
131+
if description == "generator":
132+
model_description = model_description_generator
133+
elif description == "encoder":
134+
model_description = model_description_encoder
135+
elif description == "translator":
136+
model_description = model_description_translator
53137
add_string = f"""
54138
# # Fast-Inference with Ctranslate2
55139
Speedup inference while reducing memory by 2x-4x using int8 inference in C++ on CPU or GPU.
56140
57141
quantized version of [{ORG}/{NAME}](https://huggingface.co/{ORG}/{NAME})
58142
```bash
59-
pip install hf-hub-ctranslate2>=2.0.6
60-
```
61-
Converted on {str(datetime.datetime.now())[:10]} using
143+
pip install hf-hub-ctranslate2>=2.10.0 ctranslate2>=3.16.0
62144
```
63-
{conv_arg_nice}
145+
146+
```python
147+
# from transformers import AutoTokenizer
148+
model_name = "{repo_id}"
149+
{model_description}
64150
```
65151
66-
Checkpoint compatible to [ctranslate2>=3.13.0](https://github.com/OpenNMT/CTranslate2) and [hf-hub-ctranslate2>=2.0.6](https://github.com/michaelfeil/hf-hub-ctranslate2)
67-
- `compute_type=int8_float16` for `device="cuda"`
152+
Checkpoint compatible to [ctranslate2>=3.16.0](https://github.com/OpenNMT/CTranslate2)
153+
and [hf-hub-ctranslate2>=2.10.0](https://github.com/michaelfeil/hf-hub-ctranslate2)
154+
- `compute_type=int8_float16` for `device="cuda"`
68155
- `compute_type=int8` for `device="cpu"`
69156
70-
```python
71-
from hf_hub_ctranslate2 import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub
72-
from transformers import AutoTokenizer
73-
74-
model_name = "{repo_id}"
75-
# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
76-
model = GeneratorCT2fromHfHub(
77-
# load in int8 on CUDA
78-
model_name_or_path=model_name,
79-
device="cuda",
80-
compute_type="int8_float16",
81-
tokenizer=AutoTokenizer.from_pretrained("{ORG}/{NAME}")
82-
)
83-
outputs = model.generate(
84-
text=["How do you call a fast Flan-ingo?", "User: How are you doing? Bot:"],
85-
)
86-
print(outputs)
157+
Converted on {str(datetime.datetime.now())[:10]} using
158+
```
159+
{conv_arg_nice}
87160
```
88161
89162
# Licence and other remarks:
90163
This is just a quantized version. Licence conditions are intended to be idential to original huggingface repo.
91164
92165
# Original description
93166
"""
94-
95-
with open(os.path.join(tmp_dir,'README.md'),'w') as f:
167+
168+
with open(os.path.join(tmp_dir, "README.md"), "w") as f:
96169
f.write(content[:end_header] + add_string + content[end_header:])
97-
98170

99171
api.upload_folder(
100172
folder_path=tmp_dir,
101-
repo_id=repo_id, repo_type="model",
102-
commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights"
173+
repo_id=repo_id,
174+
repo_type="model",
175+
commit_message=f"Upload {ORG}/{NAME} ctranslate fp16 weights",
103176
)
104-
call(["rm","-rf", tmp_dir])
105-
177+
call(["rm", "-rf", tmp_dir])
178+
179+
106180
if __name__ == "__main__":
107181
generators = [
108-
("togethercomputer/RedPajama-INCITE-Instruct-3B-v1"),
109-
("togethercomputer/GPT-JT-6B-v0"),
110-
"togethercomputer/RedPajama-INCITE-Chat-7B-v0.1",
111-
"togethercomputer/RedPajama-INCITE-Instruct-7B-v0.1",
112-
"EleutherAI/pythia-160m",
113-
"EleutherAI/pythia-2.8b",
114-
"EleutherAI/pythia-6.9b",
115-
"EleutherAI/pythia-12b",
116-
"togethercomputer/Pythia-Chat-Base-7B",
117-
"stabilityai/stablelm-base-alpha-7b",
118-
"stabilityai/stablelm-tuned-alpha-7b",
119-
"stabilityai/stablelm-base-alpha-3b",
120-
"stabilityai/stablelm-tuned-alpha-3b",
121-
"OpenAssistant/stablelm-7b-sft-v7-epoch-3",
122-
"EleutherAI/gpt-j-6b",
123-
"EleutherAI/gpt-neox-20b",
124-
"OpenAssistant/pythia-12b-sft-v8-7k-steps"
182+
# "togethercomputer/RedPajama-INCITE-Instruct-3B-v1",
183+
# "togethercomputer/GPT-JT-6B-v0",
184+
# "togethercomputer/RedPajama-INCITE-7B-Instruct",
185+
# "togethercomputer/RedPajama-INCITE-7B-Chat",
186+
# "EleutherAI/pythia-160m",
187+
# "EleutherAI/pythia-2.8b",
188+
# "EleutherAI/pythia-6.9b",
189+
# "EleutherAI/pythia-12b",
190+
# "togethercomputer/Pythia-Chat-Base-7B",
191+
# "stabilityai/stablelm-base-alpha-7b",
192+
# "stabilityai/stablelm-tuned-alpha-7b",
193+
# "stabilityai/stablelm-base-alpha-3b",
194+
# "stabilityai/stablelm-tuned-alpha-3b",
195+
# "OpenAssistant/stablelm-7b-sft-v7-epoch-3",
196+
# "EleutherAI/gpt-j-6b",
197+
# "EleutherAI/gpt-neox-20b",
198+
# "OpenAssistant/pythia-12b-sft-v8-7k-steps",
199+
# "Salesforce/codegen-350M-mono",
200+
# "Salesforce/codegen-350M-multi",
201+
# "Salesforce/codegen-2B-mono",
202+
# "Salesforce/codegen-2B-multi",
203+
# "Salesforce/codegen-6B-multi",
204+
# "Salesforce/codegen-6B-mono",
205+
# "Salesforce/codegen-16B-mono",
206+
# "Salesforce/codegen-16B-multi",
207+
# "Salesforce/codegen2-1B",
208+
# "Salesforce/codegen2-3_7B",
209+
# "Salesforce/codegen2-7B",
210+
# "Salesforce/codegen2-16B",
211+
# "bigcode/gpt_bigcode-santacoder",
212+
# 'bigcode/starcoder',
213+
# "mosaicml/mpt-7b",
214+
# "mosaicml/mpt-7b-instruct",
215+
# "mosaicml/mpt-7b-chat"
216+
"VMware/open-llama-7b-open-instruct",
217+
# "tiiuae/falcon-7b-instruct",
218+
# 'tiiuae/falcon-7b',
219+
"tiiuae/falcon-40b-instruct",
220+
"tiiuae/falcon-40b",
221+
"OpenAssistant/falcon-7b-sft-top1-696",
222+
"OpenAssistant/falcon-7b-sft-mix-2000",
223+
"OpenAssistant/falcon-40b-sft-mix-1226",
224+
# "HuggingFaceH4/starchat-beta",
225+
"WizardLM/WizardCoder-15B-V1.0",
226+
]
227+
translators = [
228+
# 'Salesforce/codet5p-770m-py', 'Salesforce/codet5p-770m'
125229
]
230+
encoders = [
231+
"sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
232+
"intfloat/e5-small-v2",
233+
"intfloat/e5-large-v2",
234+
"intfloat/e5-large",
235+
"sentence-transformers/all-MiniLM-L6-v2",
236+
"setu4993/LaBSE",
237+
]
238+
for m in encoders:
239+
ORG, NAME = m.split("/")
240+
convert(NAME=NAME, ORG=ORG, description="encoder")
241+
242+
for m in translators:
243+
ORG, NAME = m.split("/")
244+
convert(NAME=NAME, ORG=ORG, description="translator")
245+
126246
for m in generators:
127-
ORG , NAME = m.split("/")
128-
convert(NAME=NAME, ORG=ORG)
247+
ORG, NAME = m.split("/")
248+
# import huggingface_hub
249+
# huggingface_hub.snapshot_download(
250+
# m
251+
# )
252+
convert(NAME=NAME, ORG=ORG, description="generator")
253+
254+
from hf_hub_ctranslate2 import GeneratorCT2fromHfHub
255+
from transformers import AutoTokenizer
256+
257+
model_name = f"michaelfeil/ct2fast-{NAME}"
258+
# use either TranslatorCT2fromHfHub or GeneratorCT2fromHfHub here, depending on model.
259+
model = GeneratorCT2fromHfHub(
260+
# load in int8 on CUDA
261+
model_name_or_path=model_name,
262+
device="cuda",
263+
compute_type="int8",
264+
tokenizer=AutoTokenizer.from_pretrained(m),
265+
)
266+
outputs = model.generate(
267+
text=["def print_hello_world():", "def hello_name(name:"], max_length=64
268+
)
269+
print(outputs)

hf_hub_ctranslate2/__init__.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# -*- coding: utf-8 -*-
22
"""Compatability between Huggingface and Ctranslate2."""
33
# __all__ = ["__version__", "TranslatorCT2fromHfHub", "GeneratorCT2fromHfHub", "MultiLingualTranslatorCT2fromHfHub"]
4-
from hf_hub_ctranslate2.translate import TranslatorCT2fromHfHub, GeneratorCT2fromHfHub, MultiLingualTranslatorCT2fromHfHub
5-
__version__ = "2.0.9"
4+
from hf_hub_ctranslate2.translate import (
5+
TranslatorCT2fromHfHub,
6+
GeneratorCT2fromHfHub,
7+
MultiLingualTranslatorCT2fromHfHub,
8+
EncoderCT2fromHfHub,
9+
)
10+
11+
__version__ = "2.0.10"

0 commit comments

Comments
 (0)