23
23
import torch
24
24
from compressed_tensors .registry .registry import RegistryMixin
25
25
from compressed_tensors .utils .converters .transformations import (
26
+ remove_unused_tensors ,
26
27
transform_autogptq_weights_and_reshape_tensors ,
27
28
transform_exllama_names ,
28
29
)
30
+ from compressed_tensors .utils .safetensors_load import validate_safetensors_file_path
29
31
from safetensors import safe_open
30
32
from safetensors .torch import save_file
31
33
from tqdm import tqdm
38
40
39
41
40
42
class ConverterNames (str , Enum ):
41
- EXLLAMA_TO_COMPRESSED_TENSOR = "exllama_to_compressed_tensor"
43
+ AutoGPTQConverter : str = "exllama_to_compressed_tensor"
42
44
43
45
44
46
class BaseConverter (ABC , RegistryMixin ):
@@ -71,7 +73,7 @@ def convert_from_safetensors(
71
73
:param save_dir: The directory to save the converted state_dict to
72
74
:return: The directory where the converted state_dict was saved
73
75
"""
74
- _validate_safetensors_file_path (filepath )
76
+ validate_safetensors_file_path (filepath )
75
77
76
78
filepath_ : Path = Path (filepath )
77
79
if not save_dir :
@@ -84,30 +86,42 @@ def convert_from_safetensors(
84
86
# transform and save the state_dict
85
87
if filepath_ .is_dir ():
86
88
tqdm .write (f"Converting directory: { filepath } " )
87
- tqdm .write (f"Found: { len (list (filepath_ .glob ('*.safetensors' )))} .safetensors files" )
89
+ tqdm .write (
90
+ f"Found: { len (list (filepath_ .glob ('*.safetensors' )))} "
91
+ ".safetensors files"
92
+ )
88
93
for file in filepath_ .glob ("*.safetensors" ):
89
94
tqdm .write (f"Converting file: { file .name } " )
90
95
new_state_dict = {}
91
96
state_dict : Iterable [StateDictType ] = load_safetensors_state_dict (
92
97
file , by_layers = True
93
98
)
94
- layer_progress_bar = tqdm (state_dict , total = layer_count (file ), desc = "Converting layers" )
99
+ layer_progress_bar = tqdm (
100
+ state_dict , total = layer_count (file ), desc = "Converting layers"
101
+ )
95
102
for layer_state_dict in layer_progress_bar :
96
- layer_name = list (layer_state_dict .keys ())[0 ][:len ("model.layers.0" )]
103
+ layer_name = list (layer_state_dict .keys ())[0 ][
104
+ : len ("model.layers.0" )
105
+ ]
97
106
layer_progress_bar .set_description (f"Converting layer { layer_name } " )
98
107
layer_progress_bar .update ()
99
108
new_state_dict .update (
100
109
cls .translate (state_dict = layer_state_dict , ** kwargs )
101
110
)
102
111
103
112
if new_state_dict :
113
+ # compress before saving
114
+ # compressor = Compressor.load_from_registry(
115
+ # name=CompressionFormat.pack_quantized.value
116
+ # )
117
+ # new_state_dict = compressor.compress(new_state_dict)
104
118
save_file (
105
119
new_state_dict ,
106
120
filename = save_dir_ / file .name ,
107
121
metadata = metadata ,
108
122
)
109
123
_copy_non_safetensor_files_ (filepath_ , save_dir_ )
110
- _update_quantization_config (filepath_ , save_dir_ )
124
+ # _update_quantization_config(filepath_, save_dir_)
111
125
112
126
elif filepath_ .is_file ():
113
127
new_state_dict = {}
@@ -134,39 +148,28 @@ def transformations(cls) -> Iterable[TransformationType]:
134
148
raise NotImplementedError ()
135
149
136
150
137
- @BaseConverter .register (name = ConverterNames .EXLLAMA_TO_COMPRESSED_TENSOR )
138
- class ExllamaToCompressedTensorConverter (BaseConverter ):
151
+ @BaseConverter .register (name = ConverterNames .AutoGPTQConverter )
152
+ class AutoGPTQConverter (BaseConverter ):
139
153
"""
140
154
A converter that applies transformations to the state_dict of a autogptq
141
- quantized model to convert it to a compressed tensor model, which can be
142
- loaded by the SparseAutoModel classes
143
- """
144
-
145
- @classmethod
146
- def transformations (cls ):
147
- return (transform_autogptq_weights_and_reshape_tensors , transform_exllama_names )
155
+ quantized model to convert it to a compressed tensor model
148
156
157
+ Transformations made:
149
158
150
- def _validate_safetensors_file_path (filepath : str ):
151
- """
152
- Given a file path, it is valid if:
153
- - The file exists
154
- - The file is either a single .safetensors file or a
155
- directory containing .safetensors files
156
-
157
- :param filepath: A string file path to validate
159
+ -> Unpack autogptq 4 bit weight packing
160
+ -> Translate exllama names to compressed tensor names
161
+ -> Pack 4 bit weights with compressed tensor format
162
+ -> Remove unused tensors
163
+ -> Update quantization config in config.json file
158
164
"""
159
165
160
- filepath_ : Path = Path (filepath )
161
-
162
- if not filepath_ .exists ():
163
- raise FileNotFoundError (f"File not found: { filepath } " )
164
-
165
- if filepath_ .is_dir () and not any (filepath_ .glob ("*.safetensors" )):
166
- raise FileNotFoundError (f"No .safetensors files found in directory: { filepath } " )
167
-
168
- if filepath_ .is_file () and not filepath_ .suffix == ".safetensors" :
169
- raise ValueError (f"File must be a .safetensors file: { filepath } " )
166
+ @classmethod
167
+ def transformations (cls ):
168
+ return (
169
+ transform_autogptq_weights_and_reshape_tensors ,
170
+ transform_exllama_names ,
171
+ remove_unused_tensors ,
172
+ )
170
173
171
174
172
175
def _copy_non_safetensor_files_ (source_dir : Path , dest_dir : Path ):
@@ -178,7 +181,7 @@ def _copy_non_safetensor_files_(source_dir: Path, dest_dir: Path):
178
181
:param dest_dir: The directory to copy files to
179
182
"""
180
183
for file in source_dir .glob ("*" ):
181
- if file .suffix != ".safetensors" :
184
+ if file .suffix != ".safetensors" and file . name != "config.json" :
182
185
_LOGGER .info (f"Copying file: { file } to { dest_dir } " )
183
186
shutil .copy (file , dest_dir / file .name )
184
187
@@ -198,7 +201,9 @@ def _update_quantization_config(source_dir: Path, dest_dir: Path):
198
201
if hasattr (config , "quantization_config" ):
199
202
_LOGGER .info ("Updating quantization config..." )
200
203
quantization_config = config .quantization_config
201
- config .quantization_config = _convert_to_compressed_tensors_config (quantization_config )
204
+ config .quantization_config = _convert_to_compressed_tensors_config (
205
+ quantization_config
206
+ )
202
207
config .save_pretrained (dest_dir )
203
208
204
209
@@ -207,12 +212,14 @@ def _convert_to_compressed_tensors_config(quantization_config):
207
212
Converts the quantization_config attribute from a config.json file
208
213
to a dictionary
209
214
210
- :param quantization_config: The quantization_config attribute from a config.json file
215
+ :param quantization_config: The quantization_config
216
+ attribute from a config.json file
211
217
:return: The quantization_config as a dictionary
212
218
"""
213
219
compressed_tensor_config = ...
214
220
return compressed_tensor_config
215
221
222
+
216
223
def layer_count (file_path : str ) -> int :
217
224
"""
218
225
Count the number of layers in a safetensors file
@@ -222,16 +229,15 @@ def layer_count(file_path: str) -> int:
222
229
"""
223
230
with safe_open (file_path , framework = "pt" , device = "cpu" ) as f :
224
231
keys = sorted (f .keys ())
225
-
232
+
226
233
last_layer_name = None
227
234
layer_count = 0
228
235
for key in keys :
229
- layer_name = key [:len ("model.layers.0" )]
236
+ layer_name = key [: len ("model.layers.0" )]
230
237
if layer_name != last_layer_name :
231
238
last_layer_name = layer_name
232
239
layer_count += 1
233
240
return layer_count
234
-
235
241
236
242
237
243
def load_safetensors_state_dict (
@@ -251,7 +257,7 @@ def load_safetensors_state_dict(
251
257
current_layer = None
252
258
layer_data = {}
253
259
for key in sorted (f .keys ()):
254
- layer_name = key [:len ("model.layers.0" )]
260
+ layer_name = key [: len ("model.layers.0" )]
255
261
if current_layer is None :
256
262
current_layer = layer_name
257
263
elif layer_name != current_layer :
0 commit comments