Fixed redundant code

jukofyork · jukofyork · commit b81e2e4f01ae · 2025-05-14T07:41:43.000+01:00
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -34,8 +34,6 @@ llama_kv_cache_unified::llama_kv_cache_unified(
 
     const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
 
-    is_mla_with_fa = model.arch != LLM_ARCH_DEEPSEEK2 || v_trans
-
     has_shift = false;
     can_shift = !is_mla || v_trans; // TODO: allow context shifting for MLA with flash attention