Skip to content

Commit 061f1ce

Browse files
committed
Show warning when using a quantized base model
1 parent ecd4827 commit 061f1ce

File tree

1 file changed

+10
-3
lines changed

1 file changed

+10
-3
lines changed

llama.cpp

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1842,9 +1842,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
18421842
model_loader->mapping.reset(new llama_mmap(&model_loader->file_loaders.at(0)->file, false));
18431843
}
18441844

1845-
fprintf(stderr, "%s: ", __func__);
1846-
18471845
// read tensors and apply
1846+
bool warned = false;
18481847
int n_tensors = 0;
18491848
while (true) {
18501849
int32_t n_dims;
@@ -1937,6 +1936,14 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
19371936
base_t = dest_t;
19381937
}
19391938

1939+
if (base_t->type == GGML_TYPE_Q4_0 || base_t->type == GGML_TYPE_Q4_1) {
1940+
if (!warned) {
1941+
fprintf(stderr, "%s: warning: using a lora adapter with a quantized model may result in poor quality, "
1942+
"use a f16 or f32 base model with --lora-base\n", __func__);
1943+
warned = true;
1944+
}
1945+
}
1946+
19401947
ggml_tensor * loraA = lora_tensors[base_name + ".loraA"];
19411948
ggml_tensor * loraB = lora_tensors[base_name + ".loraB"];
19421949

@@ -1973,7 +1980,7 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
19731980
lora_tensors.clear();
19741981

19751982
n_tensors++;
1976-
if (n_tensors % 8 == 0)
1983+
if (n_tensors % 4 == 0)
19771984
fprintf(stderr, ".");
19781985
}
19791986
}

0 commit comments

Comments
 (0)