17
17
#print("add path", str(Path(__file__).parent.parent))
18
18
sys .path .insert (0 , str (Path (__file__ ).parent .parent ))
19
19
20
- from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys , NamedObject # noqa: E402
20
+ from gguf import GGUFReader , GGUFWriter , ReaderField , GGUFEndian , GGUFValueType , Keys # noqa: E402
21
21
22
22
logger = logging .getLogger ("gguf-addfile" )
23
23
@@ -49,10 +49,10 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
49
49
if len (field .types ) == 1 :
50
50
curr_type = field .types [0 ]
51
51
if curr_type == GGUFValueType .STRING :
52
- print ( ' = {0}' . format ( repr ( str ( bytes ( field .parts [ - 1 ]), encoding = 'utf8' )[: 60 ])), end = '' )
53
- elif curr_type == GGUFValueType . NAMEDOBJECT :
54
- print ( ' = {0}' . format ( repr ( str ( bytes ( field . parts [ 4 ]), encoding = 'utf8' )[: 60 ])), end = '' )
55
- print (', {0}' . format ( int ( field . parts [ 5 ]))[: 20 ] , end = '' )
52
+ if not field .name [ 0 ] == Keys . General . FILE_MARK :
53
+ print ( ' = {0}' . format ( repr ( str ( bytes ( field . parts [ - 1 ]), encoding = 'utf8' )[: 60 ])), end = '' )
54
+ else :
55
+ print (' = binary data' , end = '' )
56
56
elif field .types [0 ] in reader .gguf_scalar_to_np :
57
57
print (' = {0}' .format (field .parts [- 1 ][0 ]), end = '' )
58
58
print ()
@@ -88,16 +88,17 @@ def dump_metadata_json(reader: GGUFReader, args: argparse.Namespace) -> None:
88
88
continue
89
89
itype = field .types [- 1 ]
90
90
if itype == GGUFValueType .STRING :
91
- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
92
- elif itype == GGUFValueType .NAMEDOBJECT :
93
- curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
91
+ if not field .name [0 ] == Keys .General .FILE_MARK :
92
+ curr ["value" ] = [str (bytes (field .parts [idx ]), encoding = "utf-8" ) for idx in field .data ]
93
+ else :
94
+ curr ["value" ] = [bytes (field .parts [idx ]) for idx in field .data ]
94
95
else :
95
96
curr ["value" ] = [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
96
97
elif field .types [0 ] == GGUFValueType .STRING :
97
- curr [ "value" ] = str ( bytes ( field .parts [ - 1 ]), encoding = "utf-8" )
98
- elif field . types [ 0 ] == GGUFValueType . NAMEDOBJECT :
99
- curr [ "value" ] = str ( bytes ( field . parts [ 4 ]), encoding = "utf-8" )
100
- curr ["value" ] = int (field .parts [5 ])
98
+ if not field .name [ 0 ] == Keys . General . FILE_MARK :
99
+ curr [ "value" ] = str ( bytes ( field . parts [ - 1 ]), encoding = "utf-8" )
100
+ else :
101
+ curr ["value" ] = bytes (field .parts [- 1 ])
101
102
else :
102
103
curr ["value" ] = field .parts [- 1 ].tolist ()[0 ]
103
104
if not args .no_tensors :
@@ -135,15 +136,17 @@ def decode_field(field: ReaderField) -> Any:
135
136
sub_type = field .types [- 1 ]
136
137
137
138
if sub_type == GGUFValueType .STRING :
138
- return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
139
- elif sub_type == GGUFValueType .NAMEDOBJECT :
140
- return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
139
+ if not field .name [0 ] == Keys .General .FILE_MARK :
140
+ return [str (bytes (field .parts [idx ]), encoding = 'utf8' ) for idx in field .data ]
141
+ else :
142
+ return [bytes (field .parts [idx ]) for idx in field .data ]
141
143
else :
142
144
return [pv for idx in field .data for pv in field .parts [idx ].tolist ()]
143
145
if main_type == GGUFValueType .STRING :
144
- return str (bytes (field .parts [- 1 ]), encoding = 'utf8' )
145
- elif main_type == GGUFValueType .NAMEDOBJECT :
146
- return str (bytes (field .parts [4 ]), encoding = 'utf8' )
146
+ if not field .name [0 ] == Keys .General .FILE_MARK :
147
+ return str (bytes (field .parts [- 1 ]), encoding = 'utf8' )
148
+ else :
149
+ return bytes (field .parts [- 1 ])
147
150
else :
148
151
return field .parts [- 1 ][0 ]
149
152
@@ -156,7 +159,7 @@ def get_field_data(reader: GGUFReader, key: str) -> Any:
156
159
return decode_field (field )
157
160
158
161
159
- def copy_with_new_metadata (reader : gguf .GGUFReader , writer : gguf .GGUFWriter , new_metadata : Mapping [str , str ], array : NamedObject [ Any ] | None = None ) -> None :
162
+ def copy_with_new_metadata (reader : gguf .GGUFReader , writer : gguf .GGUFWriter , new_metadata : Mapping [str , str ]) -> None :
160
163
for field in reader .fields .values ():
161
164
# Suppress virtual fields and fields written by GGUFWriter
162
165
if field .name == Keys .General .ARCHITECTURE or field .name .startswith ('GGUF.' ):
@@ -186,18 +189,11 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
186
189
writer .add_chat_template (new_metadata [Keys .Tokenizer .CHAT_TEMPLATE ])
187
190
del new_metadata [Keys .Tokenizer .CHAT_TEMPLATE ]
188
191
189
- if array is None :
190
- for key , name in new_metadata .items ():
191
- logger .debug (f'Adding { key } : { name } ' )
192
- # named object
193
- with open (name , "rb" ) as f :
194
- val = f .read ()
195
- writer .add_namedobject (key , val , name )
196
- else :
197
- for key , name in new_metadata .items ():
198
- logger .debug (f'Adding array { key } : { name } ' )
199
- # named object
200
- writer .add_namedobject (key , 'val' , name , array = array )
192
+ for key , name in new_metadata .items ():
193
+ logger .debug (f'Adding { key } : { name } ' )
194
+ with open (name , "rb" ) as f :
195
+ val = f .read ()
196
+ writer .add_object (key , val )
201
197
202
198
for tensor in reader .tensors :
203
199
# Dimensions are written in reverse order, so flip them first
@@ -219,7 +215,6 @@ def main() -> None:
219
215
parser .add_argument ("input" , type = str , help = "GGUF format model input filename" )
220
216
parser .add_argument ("output" , type = str , help = "GGUF format model output filename" )
221
217
parser .add_argument ("addfiles" , type = str , nargs = '+' , help = "add filenames ..." )
222
- parser .add_argument ("--array" , action = "store_true" , help = "add files to namedobject array" )
223
218
parser .add_argument ("--no-tensors" , action = "store_true" , help = "Don't dump tensor metadata" )
224
219
parser .add_argument ("--json" , action = "store_true" , help = "Produce JSON output" )
225
220
parser .add_argument ("--json-array" , action = "store_true" , help = "Include full array values in JSON output (long)" )
@@ -242,27 +237,12 @@ def main() -> None:
242
237
243
238
logger .info (f'* Adding: { args .addfiles } ' )
244
239
new_metadata = {}
245
- count = 0
246
- if args .array is False :
247
- for path in args .addfiles :
248
- count += 1
249
- key = Keys .General .NAMEDOBJECT + Keys .General .CONNECT + str (count )
250
- new_metadata [key ] = path
251
- logger .info (f'* Adding: { key } = { path } ' )
252
- copy_with_new_metadata (reader , writer , new_metadata )
253
- else :
254
- key = Keys .General .NAMEDOBJECT
255
- # array is dummy
256
- new_metadata [key ] = 'array'
257
- files = []
258
- for path in args .addfiles :
259
- with open (path , "rb" ) as f :
260
- val = f .read ()
261
- #print(f'files[{count}] = {path}')
262
- files .append (NamedObject (path , val ))
263
- logger .info (f'* Adding: { key } [{ count } ] = { path } ' )
264
- count += 1
265
- copy_with_new_metadata (reader , writer , new_metadata , array = files )
240
+ for path in args .addfiles :
241
+ # add FILE_MARK to key
242
+ key = Keys .General .FILE_MARK + path
243
+ new_metadata [key ] = path
244
+ logger .info (f'* Adding: { key } = { path } ' )
245
+ copy_with_new_metadata (reader , writer , new_metadata )
266
246
267
247
if args .json :
268
248
dump_metadata_json (reader , args )
0 commit comments