@@ -1696,8 +1696,12 @@ int llama_model_quantize(
1696
1696
1697
1697
int llama_apply_lora_from_file (struct llama_context * ctx, const char * path_lora, int n_threads) {
1698
1698
// TODO: refactor all of this after PR #801
1699
+ fprintf (stderr, " %s: applying lora adapter from '%s' - please wait ...\n " , __func__, path_lora);
1700
+
1699
1701
auto & model = ctx->model ;
1700
1702
1703
+ const int64_t t_start_lora_us = ggml_time_us ();
1704
+
1701
1705
auto fin = std::ifstream (path_lora, std::ios::binary);
1702
1706
if (!fin) {
1703
1707
fprintf (stderr, " %s: failed to open '%s'\n " , __func__, path_lora);
@@ -1810,7 +1814,7 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
1810
1814
lora_tensors.find (base_name + " .loraB" ) != lora_tensors.end ()) {
1811
1815
1812
1816
ggml_tensor * tensor = model.tensors [base_name];
1813
- ggml_tensor * loraA = ggml_transpose (lora_ctx, lora_tensors[base_name + " .loraA" ]) ;
1817
+ ggml_tensor * loraA = lora_tensors[base_name + " .loraA" ];
1814
1818
ggml_tensor * loraB = lora_tensors[base_name + " .loraB" ];
1815
1819
1816
1820
if (tensor->ne [0 ] != loraA->ne [1 ]) {
@@ -1837,7 +1841,11 @@ int llama_apply_lora_from_file(struct llama_context * ctx, const char * path_lor
1837
1841
fprintf (stderr, " ." );
1838
1842
}
1839
1843
}
1840
- fprintf (stderr, " done\n " );
1844
+
1845
+ ggml_free (lora_ctx);
1846
+
1847
+ const int64_t t_lora_us = ggml_time_us () - t_start_lora_us;
1848
+ fprintf (stderr, " done (%.2f ms)\n " , t_lora_us / 1000.0 );
1841
1849
1842
1850
return 0 ;
1843
1851
}
0 commit comments