We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0dccd87 commit 306ea4bCopy full SHA for 306ea4b
src/llama-graph.cpp
@@ -1491,8 +1491,8 @@ ggml_tensor * llm_graph_context::build_attn(
1491
// note: MLA with flash attention now uses the last 512 elements of K in place of V
1492
v = ggml_view_3d(ctx0, kv_self->k_l[il],
1493
n_embd_head_v, n_kv, n_head_kv,
1494
- ggml_row_size(kv_self->v_l[il]->type, n_embd_k_gqa),
1495
- ggml_row_size(kv_self->v_l[il]->type, n_embd_head_k),
+ ggml_row_size(kv_self->k_l[il]->type, n_embd_k_gqa),
+ ggml_row_size(kv_self->k_l[il]->type, n_embd_head_k),
1496
n_embd_head_k-n_embd_head_v); // offset by n_rot elements
1497
}
1498
0 commit comments