File tree Expand file tree Collapse file tree 1 file changed +5
-8
lines changed Expand file tree Collapse file tree 1 file changed +5
-8
lines changed Original file line number Diff line number Diff line change @@ -789,7 +789,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
789
789
res.s1 = std::max<llama_seq_id>(res.s1 , seq_to_stream[seq_id]);
790
790
791
791
res.strm [s] = seq_to_stream[seq_id];
792
- res.idxs [s].resize (n_tokens);
792
+ res.idxs [s].reserve (n_tokens);
793
793
794
794
const auto & cells = v_cells[seq_to_stream[seq_id]];
795
795
@@ -806,7 +806,6 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
806
806
return { };
807
807
}
808
808
809
- uint32_t n_found = 0 ;
810
809
uint32_t n_tested = 0 ;
811
810
812
811
// for continuous slots, we test that all tokens in the ubatch fit, starting from the current head
@@ -857,22 +856,20 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
857
856
}
858
857
859
858
if (can_use) {
860
- res.idxs [s][n_found] = idx;
861
-
862
- n_found++;
859
+ res.idxs [s].push_back (idx);
863
860
} else {
864
861
if (cont) {
865
862
break ;
866
863
}
867
864
}
868
865
}
869
866
870
- if (n_found == n_tokens) {
867
+ if (res. idxs [s]. size () == n_tokens) {
871
868
break ;
872
869
}
873
870
874
871
if (cont) {
875
- n_found = 0 ;
872
+ res. idxs [s]. clear () ;
876
873
}
877
874
878
875
if (n_tested >= cells.size ()) {
@@ -882,7 +879,7 @@ llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_
882
879
}
883
880
884
881
// we didn't find a suitable slot - return empty result
885
- if (n_found < n_tokens) {
882
+ if (res. idxs [s]. size () < n_tokens) {
886
883
return { };
887
884
}
888
885
}
You can’t perform that action at this time.
0 commit comments