Skip to content

Commit a9baa1f

Browse files
authored
[Fix] Disable FlashInfer when sliding window is enabled (#3026)
This PR fixes the function table initialization, so that when sliding window is enabled, we won't pick the FlashInfer attn kernel.
1 parent 53966dd commit a9baa1f

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

cpp/serve/function_table.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ void FunctionTable::_InitFunctions() {
232232
this->apply_bitmask_func_ = mod->GetFunction("apply_bitmask_inplace", true);
233233
this->alloc_embedding_tensor_func_ = mod_get_func("alloc_embedding_tensor");
234234
this->create_kv_cache_func_ = mod_get_func("create_flashinfer_paged_kv_cache");
235-
if (!this->create_kv_cache_func_.defined()) {
235+
if (this->model_metadata_.sliding_window_size != -1 || !this->create_kv_cache_func_.defined()) {
236236
PackedFunc f_create_rnn_state = mod_get_func("create_rnn_state");
237237
if (f_create_rnn_state.defined()) {
238238
this->create_kv_cache_func_ = f_create_rnn_state;

0 commit comments

Comments
 (0)