From 418a31fb60d9f0dcd548a1a767ab3b7aa8f44886 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 29 Apr 2025 00:01:22 +0200 Subject: [PATCH 1/3] llama-graph : fix text position for mrope --- src/llama-graph.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 2706ea2635444..c5c0ba3dc7035 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -55,13 +55,15 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { if (ubatch->pos && pos) { const int64_t n_tokens = ubatch->n_tokens; - if (ubatch->token && n_pos_per_embd > 1) { - // in case we're using M-RoPE with text tokens, convert the 1D positions to 4D - // the other dimensions are all 0, they are unused for text tokens + if (ubatch->token && n_pos_per_embd == 4) { + // in case we're using M-RoPE with text tokens, convert the 1D positions to 3D + // the other dimensions are the same, except for 4th dim which will be all 0 std::vector pos_data(n_tokens*n_pos_per_embd, 0); // copy the first dimension for (int i = 0; i < n_tokens; ++i) { - pos_data[i] = ubatch->pos[i]; + pos_data[ i] = ubatch->pos[i]; + pos_data[ n_tokens + i] = ubatch->pos[i]; + pos_data[2 * n_tokens + i] = ubatch->pos[i]; } ggml_backend_tensor_set(pos, pos_data.data(), 0, pos_data.size()*ggml_element_size(pos)); } else { From 492345d3df6a336f2a1c421c31cd89147fe99257 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 29 Apr 2025 00:07:10 +0200 Subject: [PATCH 2/3] fix typo --- src/llama-graph.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index c5c0ba3dc7035..6aca9cdb336f5 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -56,8 +56,8 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { const int64_t n_tokens = ubatch->n_tokens; if (ubatch->token && n_pos_per_embd == 4) { - // in case we're using M-RoPE with text tokens, convert the 1D positions to 3D - // the other dimensions are the same, except for 4th dim which will be all 0 + // in case we're using M-RoPE with text tokens, convert the 1D positions to 4D + // the 3 first dims are the same, and 4th dim is all 0 std::vector pos_data(n_tokens*n_pos_per_embd, 0); // copy the first dimension for (int i = 0; i < n_tokens; ++i) { From bea76aaedf2443ea7953a3585e66c52826a04a37 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 29 Apr 2025 00:08:26 +0200 Subject: [PATCH 3/3] explicitly set 4th dim in the loop --- src/llama-graph.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 6aca9cdb336f5..fabb9ca237653 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -58,12 +58,13 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { if (ubatch->token && n_pos_per_embd == 4) { // in case we're using M-RoPE with text tokens, convert the 1D positions to 4D // the 3 first dims are the same, and 4th dim is all 0 - std::vector pos_data(n_tokens*n_pos_per_embd, 0); + std::vector pos_data(n_tokens*n_pos_per_embd); // copy the first dimension for (int i = 0; i < n_tokens; ++i) { pos_data[ i] = ubatch->pos[i]; pos_data[ n_tokens + i] = ubatch->pos[i]; pos_data[2 * n_tokens + i] = ubatch->pos[i]; + pos_data[3 * n_tokens + i] = 0; // 4th dim is 0 } ggml_backend_tensor_set(pos, pos_data.data(), 0, pos_data.size()*ggml_element_size(pos)); } else {