[Fix] Fix attn kernel build issue (#2545)

MasterJH5574 · web-flow · commit 6bd049ec3648 · 2024-06-07T11:28:54.000-04:00
This PR fixes TIR issues in the attn kernels.
diff --git a/python/mlc_llm/nn/kv_cache.py b/python/mlc_llm/nn/kv_cache.py
@@ -641,6 +641,7 @@ def batch_prefill_paged_kv(
                                 if T.tvm_thread_invariant(batch_idx[0] < batch_size):
                                     b_idx: T.int32 = batch_idx[0]
                                     LH_start: T.int32 = tile_id[0] * tile_x
+                                    q_indptr_val: T.int32 = q_indptr[b_idx]
 
                                     cur_page_indptr_begin: T.int32 = page_indptr[b_idx]
                                     cur_page_indptr_end: T.int32 = page_indptr[b_idx + 1]
@@ -670,7 +671,7 @@ def batch_prefill_paged_kv(
                                             i, j = T.axis.remap("SS", [li, lj])
                                             T.reads()
                                             T.writes()
-                                            cur_L = q_indptr[b_idx] + (LH_start + i) // group_size
+                                            cur_L = q_indptr_val + (LH_start + i) // group_size
                                             cur_H_qo = by * group_size + (LH_start + i) % group_size
                                             if cur_L < q_indptr[b_idx + 1]:
                                                 Q_smem[i, j] = T.if_then_else(
@@ -1316,6 +1317,7 @@ def batch_prefill_ragged_kv(  # pylint: disable=too-many-branches
 
                                 if T.tvm_thread_invariant(batch_idx[0] < batch_size):
                                     b_idx: T.int32 = batch_idx[0]
+                                    q_indptr_val: T.int32 = q_indptr[b_idx]
                                     LH_start: T.int32 = tile_id[0] * tile_x
 
                                     kv_chunk_len[0] = kv_indptr[b_idx + 1] - kv_indptr[b_idx]
@@ -1340,7 +1342,7 @@ def batch_prefill_ragged_kv(  # pylint: disable=too-many-branches
                                             i, j = T.axis.remap("SS", [li, lj])
                                             T.reads()
                                             T.writes()
-                                            cur_L = q_indptr[b_idx] + (LH_start + i) // group_size
+                                            cur_L = q_indptr_val + (LH_start + i) // group_size
                                             cur_H_qo = by * group_size + (LH_start + i) % group_size
                                             if cur_L < q_indptr[b_idx + 1]:
                                                 Q_smem[i, j] = T.if_then_else(
diff --git a/python/mlc_llm/op/tree_attn.py b/python/mlc_llm/op/tree_attn.py
@@ -170,6 +170,7 @@ def batch_tree_attn(  # pylint: disable=too-many-branches
                                 if T.tvm_thread_invariant(batch_idx[0] < batch_size):
                                     b_idx: T.int32 = batch_idx[0]
                                     LH_start: T.int32 = tile_id[0] * tile_x
+                                    q_indptr_val: T.int32 = q_indptr[b_idx]
 
                                     kv_chunk_len[0] = kv_indptr[b_idx + 1] - kv_indptr[b_idx]
                                     T.tvm_storage_sync("shared")
@@ -193,7 +194,7 @@ def batch_tree_attn(  # pylint: disable=too-many-branches
                                             i, j = T.axis.remap("SS", [li, lj])
                                             T.reads()
                                             T.writes()
-                                            cur_L = q_indptr[b_idx] + (LH_start + i) // group_size
+                                            cur_L = q_indptr_val + (LH_start + i) // group_size
                                             cur_H_qo = by * group_size + (LH_start + i) % group_size
                                             if cur_L < q_indptr[b_idx + 1]:
                                                 Q_smem[i, j] = T.if_then_else(