Address comment: add expert_map==None assertion in pplx_prepare_finalize

minosfuture · minosfuture · commit 376bcde5a953 · 2025-07-03T11:37:57.000-07:00
Signed-off-by: Ming Yang &lt;yming@meta.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
@@ -65,7 +65,7 @@ def max_num_tokens_per_rank(self) -> Optional[int]:
         return self.max_tokens_per_rank
 
     def topk_indices_dtype(self) -> Optional[torch.dtype]:
-        return torch.int64
+        return torch.int32
 
     def _do_quant(
         self,
diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
@@ -100,7 +100,9 @@ def prepare(
         hidden_dim = a1.size(-1)  # K
 
         assert topk_ids.size(0) == num_tokens
-        # assert expert_map is None, "NYI"
+        assert expert_map is None, """with expert map, -1 id is used for
+            non-local token; this causes error when casting ids to the
+            topk_indices_dtype() uint32"""
 
         # Is this always going to be a1.device?
         device = a1.device