Skip to content

Commit 0dd3907

Browse files
committed
qwen2 warning FA
1 parent 116d5fe commit 0dd3907

File tree

3 files changed

+7
-14
lines changed

3 files changed

+7
-14
lines changed

gpttype_adapter.cpp

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -826,17 +826,6 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
826826
}
827827
return rope_freq_base_with_positive_offset;
828828
}
829-
// else if(model_arch==GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2)
830-
// {
831-
// float extended_rope_negative_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / (3.14159265358979323846 * 3.14159265358979323846));
832-
// float rope_freq_base_with_negative_offset = gradient_ai_rope_freq_base_value / extended_rope_negative_offset_value;
833-
// if(debugmode==1)
834-
// {
835-
// printf("Extended RoPE Negative Offset (divisor) for Llama 1 and 2 based models. (value:%.3f).\n", extended_rope_negative_offset_value);
836-
// printf("RoPE base calculated via Gradient AI formula for Llama 1 and 2 based models. (value:%.1f).\n", rope_freq_base_with_negative_offset);
837-
// }
838-
// return rope_freq_base_with_negative_offset;
839-
// }
840829
else
841830
{
842831
return gradient_ai_rope_freq_base_value;
@@ -1087,6 +1076,10 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
10871076
printf("CUBLAS: Set main device to %d\n",cu_parseinfo_maindevice);
10881077
}
10891078
ggml_cuda_set_mul_mat_q(inputs.use_mmq);
1079+
if(file_format_meta.model_architecture == GGUFArch::ARCH_QWEN2 && kcpp_params->flash_attn)
1080+
{
1081+
printf("CUBLAS: Warning, you are running Qwen2 without Flash Attention and may observe incoherent output.\n");
1082+
}
10901083
#endif
10911084
model_params.main_gpu = cu_parseinfo_maindevice;
10921085

model_adapter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,9 +310,9 @@ void print_tok_vec(std::vector<float> &embd)
310310
{
311311
fileformatmeta->model_architecture = GGUFArch::ARCH_SOLAR;
312312
}
313-
else if(modelarch=="llama" && freq_base_train==10000.0f)
313+
else if(modelarch=="qwen2")
314314
{
315-
fileformatmeta->model_architecture = GGUFArch::ARCH_MISTRAL_LLAMA_1_AND_2;
315+
fileformatmeta->model_architecture = GGUFArch::ARCH_QWEN2;
316316
}
317317
printf("Arch Category: %d\n",fileformatmeta->model_architecture);
318318

model_adapter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ enum GGUFArch
5757
ARCH_PHI = 2,
5858
ARCH_MAMBA = 3,
5959
ARCH_SOLAR = 4,
60-
ARCH_MISTRAL_LLAMA_1_AND_2 = 5,
60+
ARCH_QWEN2 = 5,
6161
};
6262

6363
struct FileFormatExtraMeta

0 commit comments

Comments
 (0)