@@ -86,6 +86,11 @@ void GGUFHandler::OpenFile(const std::string& file_path) {
86
86
#endif
87
87
}
88
88
89
+ void GGUFHandler::CheckOffset (int offset) const {
90
+ if (offset > file_size_)
91
+ throw std::runtime_error (" Unexpected EOF" );
92
+ }
93
+
89
94
void GGUFHandler::CloseFile () {
90
95
#ifdef _WIN32
91
96
if (data_ != nullptr ) {
@@ -101,14 +106,11 @@ void GGUFHandler::CloseFile() {
101
106
102
107
std::pair<std::size_t , std::string> GGUFHandler::ReadString (
103
108
std::size_t offset) const {
109
+ CheckOffset (offset + 8 );
104
110
uint64_t length;
105
111
std::memcpy (&length, data_ + offset, sizeof (uint64_t ));
106
112
107
- if (offset + 8 + length > file_size_) {
108
- throw std::runtime_error (
109
- " GGUF metadata string length exceeds file size.\n " );
110
- }
111
-
113
+ CheckOffset (offset + 8 + length);
112
114
std::string value (reinterpret_cast <const char *>(data_ + offset + 8 ), length);
113
115
return {8 + static_cast <std::size_t >(length), value};
114
116
}
@@ -117,29 +119,37 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
117
119
const std::string& key) {
118
120
switch (type) {
119
121
case 0 : // UINT8
122
+ CheckOffset (offset + 1 );
120
123
metadata_uint8_[key] = data_[offset];
121
124
return 1 ;
122
125
case 1 : // INT8
126
+ CheckOffset (offset + 1 );
123
127
metadata_int8_[key] = static_cast <int8_t >(data_[offset]);
124
128
return 1 ;
125
129
case 2 : // UINT16
130
+ CheckOffset (offset + 2 );
126
131
metadata_uint16_[key] =
127
132
*reinterpret_cast <const uint16_t *>(data_ + offset);
128
133
return 2 ;
129
134
case 3 : // INT16
135
+ CheckOffset (offset + 2 );
130
136
metadata_int16_[key] = *reinterpret_cast <const int16_t *>(data_ + offset);
131
137
return 2 ;
132
138
case 4 : // UINT32
139
+ CheckOffset (offset + 4 );
133
140
metadata_uint32_[key] =
134
141
*reinterpret_cast <const uint32_t *>(data_ + offset);
135
142
return 4 ;
136
143
case 5 : // INT32
144
+ CheckOffset (offset + 4 );
137
145
metadata_int32_[key] = *reinterpret_cast <const int32_t *>(data_ + offset);
138
146
return 4 ;
139
147
case 6 : // FLOAT32
148
+ CheckOffset (offset + 4 );
140
149
metadata_float_[key] = *reinterpret_cast <const float *>(data_ + offset);
141
150
return 4 ;
142
151
case 7 : // BOOL
152
+ CheckOffset (offset + 1 );
143
153
metadata_bool_[key] = data_[offset] != 0 ;
144
154
return 1 ;
145
155
case 8 : // STRING
@@ -152,13 +162,16 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
152
162
153
163
return ReadArray (offset, key);
154
164
case 10 : // UINT64
165
+ CheckOffset (offset + 8 );
155
166
metadata_uint64_[key] =
156
167
*reinterpret_cast <const uint64_t *>(data_ + offset);
157
168
return 8 ;
158
169
case 11 : // INT64
170
+ CheckOffset (offset + 8 );
159
171
metadata_int64_[key] = *reinterpret_cast <const int64_t *>(data_ + offset);
160
172
return 8 ;
161
173
case 12 : // FLOAT64
174
+ CheckOffset (offset + 8 );
162
175
metadata_double_[key] = *reinterpret_cast <const double *>(data_ + offset);
163
176
return 8 ;
164
177
default :
@@ -168,9 +181,11 @@ size_t GGUFHandler::ReadMetadataValue(int type, std::size_t offset,
168
181
}
169
182
170
183
size_t GGUFHandler::ReadArray (std::size_t offset, const std::string& key) {
184
+ CheckOffset (offset + 4 );
171
185
uint32_t array_type = *reinterpret_cast <const uint32_t *>(data_ + offset);
172
186
// std::memcpy(&array_type, data_ + offset, sizeof(uint32_t));
173
187
188
+ CheckOffset (offset + 4 + 8 );
174
189
uint64_t array_length =
175
190
*reinterpret_cast <const uint64_t *>(data_ + offset + 4 );
176
191
// std::memcpy(&array_length, data_ + offset + 4, sizeof(uint64_t));
@@ -199,53 +214,62 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
199
214
// assume that array ony has 2 type string and int
200
215
switch (array_type) {
201
216
case 0 :
217
+ CheckOffset (offset + array_offset + 1 );
202
218
uint8_value = data_[offset + array_offset];
203
219
length = 1 ;
204
220
array_values_float.push_back (static_cast <float >(uint8_value));
205
221
break ;
206
222
case 1 : {
223
+ CheckOffset (offset + array_offset + 1 );
207
224
int8_value = static_cast <int8_t >(data_[offset + array_offset]);
208
225
length = 1 ;
209
226
array_values_float.push_back (static_cast <float >(int8_value));
210
227
}
211
228
212
229
break ;
213
230
case 2 :
231
+ CheckOffset (offset + array_offset + 2 );
214
232
uint16_value =
215
233
*reinterpret_cast <const uint16_t *>(data_ + offset + array_offset);
216
234
length = 2 ;
217
235
array_values_float.push_back (static_cast <float >(uint16_value));
218
236
break ;
219
237
case 3 :
238
+ CheckOffset (offset + array_offset + 2 );
220
239
int16_value =
221
240
*reinterpret_cast <const int16_t *>(data_ + offset + array_offset);
222
241
length = 2 ;
223
242
array_values_float.push_back (static_cast <float >(int16_value));
224
243
break ;
225
244
case 4 :
245
+ CheckOffset (offset + array_offset + 4 );
226
246
uint32_value =
227
247
*reinterpret_cast <const uint32_t *>(data_ + offset + array_offset);
228
248
length = 4 ;
229
249
array_values_float.push_back (static_cast <float >(uint32_value));
230
250
break ;
231
251
case 5 :
252
+ CheckOffset (offset + array_offset + 4 );
232
253
int32_value =
233
254
*reinterpret_cast <const int32_t *>(data_ + offset + array_offset);
234
255
length = 4 ;
235
256
array_values_float.push_back (static_cast <float >(int32_value));
236
257
break ;
237
258
case 6 :
259
+ CheckOffset (offset + array_offset + 4 );
238
260
float_value =
239
261
*reinterpret_cast <const float *>(data_ + offset + array_offset);
240
262
length = 4 ;
241
263
array_values_float.push_back (static_cast <float >(float_value));
242
264
break ;
243
265
case 7 :
266
+ CheckOffset (offset + array_offset + 1 );
244
267
bool_value = data_[offset + array_offset] != 0 ;
245
268
length = 1 ;
246
269
array_values_float.push_back (static_cast <float >(bool_value));
247
270
break ;
248
271
case 8 : {
272
+ CheckOffset (offset + array_offset + 8 );
249
273
uint64_t length_ =
250
274
*reinterpret_cast <const uint64_t *>(data_ + offset + array_offset);
251
275
std::string value (
@@ -255,18 +279,21 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
255
279
array_values_string.push_back (value);
256
280
} break ;
257
281
case 10 :
282
+ CheckOffset (offset + array_offset + 8 );
258
283
uint64_value =
259
284
*reinterpret_cast <const uint64_t *>(data_ + offset + array_offset);
260
285
length = 8 ;
261
286
array_values_float.push_back (static_cast <float >(uint64_value));
262
287
break ;
263
288
case 11 :
289
+ CheckOffset (offset + array_offset + 8 );
264
290
int64_value =
265
291
*reinterpret_cast <const int64_t *>(data_ + offset + array_offset);
266
292
length = 8 ;
267
293
array_values_float.push_back (static_cast <float >(int64_value));
268
294
break ;
269
295
case 12 :
296
+ CheckOffset (offset + array_offset + 8 );
270
297
double_value =
271
298
*reinterpret_cast <const double *>(data_ + offset + array_offset);
272
299
length = 8 ;
@@ -279,9 +306,6 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
279
306
}
280
307
281
308
array_offset += length;
282
- if (offset + array_offset > file_size_) {
283
- throw std::runtime_error (" GGUF Parser Array exceeded file size.\n " );
284
- }
285
309
}
286
310
if (array_values_string.size () > 0 )
287
311
metadata_array_string_[key] = array_values_string;
@@ -290,8 +314,11 @@ size_t GGUFHandler::ReadArray(std::size_t offset, const std::string& key) {
290
314
return array_offset;
291
315
}
292
316
317
+ // https://github.com/ggml-org/ggml/blob/master/docs/gguf.md
293
318
void GGUFHandler::Parse (const std::string& file_path) {
294
319
OpenFile (file_path);
320
+ CheckOffset (4 + 4 + 8 + 8 );
321
+
295
322
LOG_INFO << " GGUF magic number: " << *reinterpret_cast <const uint32_t *>(data_)
296
323
<< " \n " ;
297
324
if (*reinterpret_cast <const uint32_t *>(data_) != GGUF_MAGIC_NUMBER) {
@@ -311,6 +338,7 @@ void GGUFHandler::Parse(const std::string& file_path) {
311
338
auto [key_byte_length, key] = ReadString (offset);
312
339
offset += key_byte_length;
313
340
LOG_INFO << " key: " << key << " \n " ;
341
+ CheckOffset (offset + 4 );
314
342
uint32_t value_type = *reinterpret_cast <const uint32_t *>(data_ + offset);
315
343
offset += 4 ;
316
344
LOG_INFO << " value type number: " << value_type << " \n " ;
0 commit comments