10
10
11
11
static bool g_verbose = false ;
12
12
13
+ struct tensor_transformation {
14
+ struct ggml_tensor * in;
15
+ struct ggml_tensor * out;
16
+ bool is_copy;
17
+ };
18
+
13
19
static std::string get_kv_str (struct gguf_context * ctx_gguf, const std::string & key){
14
20
int id = gguf_find_key (ctx_gguf, key.c_str ());
15
21
return id < 0 ? " " : std::string (gguf_get_val_str (ctx_gguf, id));
@@ -198,8 +204,7 @@ struct lora_merge_ctx {
198
204
}
199
205
200
206
// mapping base tensor to out tensor (same shape with base, but different type)
201
- // if out_tensor == nullptr, we only copy it
202
- std::vector<std::pair<struct ggml_tensor *, struct ggml_tensor *>> base_to_out_tensors;
207
+ std::vector<tensor_transformation> trans;
203
208
for (auto & it : base_model.tensors ) {
204
209
bool t_a = true ;
205
210
bool t_b = true ;
@@ -212,14 +217,22 @@ struct lora_merge_ctx {
212
217
// only copy
213
218
struct ggml_tensor * cpy_tensor = ggml_dup_tensor (ctx_out_ggml, base_tensor);
214
219
ggml_set_name (cpy_tensor, base_tensor->name );
215
- base_to_out_tensors.push_back (std::make_pair (cpy_tensor, nullptr ));
220
+ trans.push_back ({
221
+ cpy_tensor,
222
+ cpy_tensor,
223
+ true ,
224
+ });
216
225
gguf_add_tensor (ctx_out, cpy_tensor);
217
226
} else if (t_a && t_b) {
218
227
// need merging
219
228
struct ggml_tensor * out_tensor = ggml_new_tensor (
220
229
ctx_out_ggml, get_out_tensor_type (base_tensor), GGML_MAX_DIMS, base_tensor->ne );
221
230
ggml_set_name (out_tensor, base_tensor->name );
222
- base_to_out_tensors.push_back (std::make_pair (base_tensor, out_tensor));
231
+ trans.push_back ({
232
+ base_tensor,
233
+ out_tensor,
234
+ false ,
235
+ });
223
236
gguf_add_tensor (ctx_out, out_tensor);
224
237
} else {
225
238
throw std::runtime_error (" tensor " + it.first + " missing either lora_a or lora_b" );
@@ -234,12 +247,12 @@ struct lora_merge_ctx {
234
247
235
248
// process base model tensors
236
249
size_t n_merged = 0 ;
237
- for (auto & it : base_to_out_tensors ) {
238
- if (it.second != nullptr ) {
239
- merge_tensor (it.first , it.second );
250
+ for (auto & it : trans ) {
251
+ if (! it.is_copy ) {
252
+ merge_tensor (it.in , it.out );
240
253
n_merged++;
241
254
} else {
242
- copy_tensor (it.first );
255
+ copy_tensor (it.in );
243
256
}
244
257
}
245
258
@@ -252,7 +265,7 @@ struct lora_merge_ctx {
252
265
}
253
266
254
267
printf (" %s : merged %ld tensors with lora adapters\n " , __func__, n_merged);
255
- printf (" %s : wrote %ld tensors to output file\n " , __func__, base_to_out_tensors .size ());
268
+ printf (" %s : wrote %ld tensors to output file\n " , __func__, trans .size ());
256
269
}
257
270
258
271
void copy_tensor (struct ggml_tensor * base) {
@@ -285,6 +298,10 @@ struct lora_merge_ctx {
285
298
for (size_t i = 0 ; i < adapters.size (); ++i) {
286
299
auto t_a = adapters[i]->get_tensor (name_lora_a);
287
300
auto t_b = adapters[i]->get_tensor (name_lora_b);
301
+ // TODO: add support for quantized lora
302
+ if (ggml_is_quantized (t_a->type ) || ggml_is_quantized (t_b->type )) {
303
+ throw std::runtime_error (" quantized LoRA adapters is not supported, please retry with f16 or f32" );
304
+ }
288
305
inp_a[i] = ggml_dup_tensor (ctx, t_a);
289
306
inp_b[i] = ggml_dup_tensor (ctx, t_b);
290
307
}
0 commit comments