Skip to content

Commit e709dc0

Browse files
committed
Prefer smallest cache size
1 parent becc3c5 commit e709dc0

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

examples/server/server.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2055,24 +2055,24 @@ struct server_context {
20552055
}
20562056
}
20572057

2058-
// find the slot that has been least recently used
2058+
// choose the slot with the least amount of tokens cached
20592059
if (ret == nullptr) {
2060-
int64_t t_last = ggml_time_us();
2060+
size_t smallest_cache_size = std::numeric_limits<size_t>::max();
20612061
for (server_slot & slot : slots) {
20622062
// skip the slot if it is not available
20632063
if (slot.is_processing()) {
20642064
continue;
20652065
}
20662066

20672067
// select the current slot if the criteria match
2068-
if (slot.t_last_used < t_last) {
2069-
t_last = slot.t_last_used;
2068+
if (slot.cache_tokens.size() < smallest_cache_size) {
2069+
smallest_cache_size = slot.cache_tokens.size();
20702070
ret = &slot;
20712071
}
20722072
}
20732073

20742074
if (ret != nullptr) {
2075-
SLT_DBG(*ret, "selected slot by lru, t_last = %" PRId64 "\n", t_last);
2075+
SLT_DBG(*ret, "selected slot by cache amount, cache_size = %" PRId64 "\n", smallest_cache_size);
20762076
}
20772077
}
20782078

0 commit comments

Comments
 (0)