File tree Expand file tree Collapse file tree 1 file changed +10
-3
lines changed Expand file tree Collapse file tree 1 file changed +10
-3
lines changed Original file line number Diff line number Diff line change @@ -1843,9 +1843,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
1843
1843
model_loader->mapping .reset (new llama_mmap (&model_loader->file_loaders .at (0 )->file , false ));
1844
1844
}
1845
1845
1846
- fprintf (stderr, " %s: " , __func__);
1847
-
1848
1846
// read tensors and apply
1847
+ bool warned = false ;
1849
1848
int n_tensors = 0 ;
1850
1849
while (true ) {
1851
1850
int32_t n_dims;
@@ -1938,6 +1937,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
1938
1937
base_t = dest_t ;
1939
1938
}
1940
1939
1940
+ if (base_t ->type == GGML_TYPE_Q4_0 || base_t ->type == GGML_TYPE_Q4_1) {
1941
+ if (!warned) {
1942
+ fprintf (stderr, " %s: warning: using a lora adapter with a quantized model may result in poor quality, "
1943
+ " use a f16 or f32 base model with --lora-base\n " , __func__);
1944
+ warned = true ;
1945
+ }
1946
+ }
1947
+
1941
1948
ggml_tensor * loraA = lora_tensors[base_name + " .loraA" ];
1942
1949
ggml_tensor * loraB = lora_tensors[base_name + " .loraB" ];
1943
1950
@@ -1974,7 +1981,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
1974
1981
lora_tensors.clear ();
1975
1982
1976
1983
n_tensors++;
1977
- if (n_tensors % 8 == 0 )
1984
+ if (n_tensors % 4 == 0 )
1978
1985
fprintf (stderr, " ." );
1979
1986
}
1980
1987
}
You can’t perform that action at this time.
0 commit comments