Skip to content

Commit cbe971a

Browse files
committed
kv-cache : restore find_slot impl
ggml-ci
1 parent 18fb95d commit cbe971a

File tree

1 file changed

+5
-8
lines changed

1 file changed

+5
-8
lines changed

src/llama-kv-cache-unified.cpp

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -789,7 +789,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
789789
res.s1 = std::max<llama_seq_id>(res.s1, seq_to_stream[seq_id]);
790790

791791
res.strm[s] = seq_to_stream[seq_id];
792-
res.idxs[s].resize(n_tokens);
792+
res.idxs[s].reserve(n_tokens);
793793

794794
const auto & cells = v_cells[seq_to_stream[seq_id]];
795795

@@ -806,7 +806,6 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
806806
return { };
807807
}
808808

809-
uint32_t n_found = 0;
810809
uint32_t n_tested = 0;
811810

812811
// for continuous slots, we test that all tokens in the ubatch fit, starting from the current head
@@ -857,22 +856,20 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
857856
}
858857

859858
if (can_use) {
860-
res.idxs[s][n_found] = idx;
861-
862-
n_found++;
859+
res.idxs[s].push_back(idx);
863860
} else {
864861
if (cont) {
865862
break;
866863
}
867864
}
868865
}
869866

870-
if (n_found == n_tokens) {
867+
if (res.idxs[s].size() == n_tokens) {
871868
break;
872869
}
873870

874871
if (cont) {
875-
n_found = 0;
872+
res.idxs[s].clear();
876873
}
877874

878875
if (n_tested >= cells.size()) {
@@ -882,7 +879,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
882879
}
883880

884881
// we didn't find a suitable slot - return empty result
885-
if (n_found < n_tokens) {
882+
if (res.idxs[s].size() < n_tokens) {
886883
return { };
887884
}
888885
}

0 commit comments

Comments
 (0)