File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -2055,24 +2055,24 @@ struct server_context {
2055
2055
}
2056
2056
}
2057
2057
2058
- // find the slot that has been least recently used
2058
+ // choose the slot with the least amount of tokens cached
2059
2059
if (ret == nullptr ) {
2060
- int64_t t_last = ggml_time_us ();
2060
+ size_t smallest_cache_size = std::numeric_limits< size_t >:: max ();
2061
2061
for (server_slot & slot : slots) {
2062
2062
// skip the slot if it is not available
2063
2063
if (slot.is_processing ()) {
2064
2064
continue ;
2065
2065
}
2066
2066
2067
2067
// select the current slot if the criteria match
2068
- if (slot.t_last_used < t_last ) {
2069
- t_last = slot.t_last_used ;
2068
+ if (slot.cache_tokens . size () < smallest_cache_size ) {
2069
+ smallest_cache_size = slot.cache_tokens . size () ;
2070
2070
ret = &slot;
2071
2071
}
2072
2072
}
2073
2073
2074
2074
if (ret != nullptr ) {
2075
- SLT_DBG (*ret, " selected slot by lru, t_last = %" PRId64 " \n " , t_last );
2075
+ SLT_DBG (*ret, " selected slot by cache amount, cache_size = %" PRId64 " \n " , smallest_cache_size );
2076
2076
}
2077
2077
}
2078
2078
You can’t perform that action at this time.
0 commit comments