fix comments + lint

bnellnm · bnellnm · commit 639b86851d8c · 2025-06-25T20:42:49.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/tests/kernels/moe/test_batched_moe.py b/tests/kernels/moe/test_batched_moe.py
@@ -77,6 +77,9 @@ def ref_impl(
     B_scale: Optional[torch.Tensor],
     block_shape: Optional[list[int]],
 ) -> torch.Tensor:
+    assert (A.dtype.itemsize > 1
+            or (A_scale is not None and B_scale is not None))
+
     num_expert_tokens_cpu = num_expert_tokens.clone()
     num_expert_tokens_cpu = num_expert_tokens_cpu.to(device="cpu")
     num_experts = num_expert_tokens.size(0)
diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py
@@ -167,31 +167,31 @@ def make_test_weights(
         assert quant_dtype == torch.float8_e4m3fn, "only fp8 supported"
         w1_l = [None] * e
         w2_l = [None] * e
-        w1_s = [None] * e
-        w2_s = [None] * e
+        w1_s_l = [None] * e
+        w2_s_l = [None] * e
         for idx in range(e):
             if block_shape is not None:
-                w1_l[idx], w1_s[idx] = per_block_cast_to_fp8(
+                w1_l[idx], w1_s_l[idx] = per_block_cast_to_fp8(
                     w1_16[idx],
                     block_shape[1],
                 )
-                w2_l[idx], w2_s[idx] = per_block_cast_to_fp8(
+                w2_l[idx], w2_s_l[idx] = per_block_cast_to_fp8(
                     w2_16[idx],
                     block_shape[1],
                 )
             else:
-                tmp, w1_s[idx] = per_token_group_quant_fp8(
+                tmp, w1_s_l[idx] = per_token_group_quant_fp8(
                     w1_16[idx].view(1, -1), w1_16[idx].numel())
                 w1_l[idx] = tmp.view(*w1_16[idx].shape)
 
-                tmp, w2_s[idx] = per_token_group_quant_fp8(
+                tmp, w2_s_l[idx] = per_token_group_quant_fp8(
                     w2_16[idx].view(1, -1), w2_16[idx].numel())
                 w2_l[idx] = tmp.view(*w2_16[idx].shape)
 
         w1 = torch.stack(w1_l)
         w2 = torch.stack(w2_l)
-        w1_s = torch.stack(w1_s)
-        w2_s = torch.stack(w2_s)
+        w1_s = torch.stack(w1_s_l)
+        w2_s = torch.stack(w2_s_l)
         if w1_s.ndim == 2:
             assert w1_s.shape[-1] == 1
             w1_s = w1_s.view(-1, 1, 1)
diff --git a/tests/kernels/utils.py b/tests/kernels/utils.py
@@ -1075,6 +1075,10 @@ def torch_experts(
             or (global_num_experts == w1.shape[0] and expert_map is None)
             or (expert_map is not None
                 and global_num_experts == expert_map.shape[0]))
+
+    assert (quant_dtype is None
+            or (w1_scale is not None and w2_scale is not None))
+
     M, K = a.shape
     #N = w1.shape[1]
     topk = topk_ids.shape[1]
diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py
@@ -86,17 +86,13 @@ def _moe_problem_size(
 
 class FusedMoEActivationFormat(Enum):
     """
-    Add comment
+    The standard activation format (num_tokens, hidden dim).
     """
     Standard = "standard",
     """
-    Add comment
+    The batched experts format (num experts, max tokens per expert, hidden dim)
     """
-    TopkReplicated = "topk_replicated",
-    """
-    Add comment
-    """
-    BatchedExperts = "standard",
+    BatchedExperts = "batched_experts",
 
 
 # TODO: pass FusedMoEParallelConfig in as ctor parameter?
@@ -171,7 +167,8 @@ def finalize(
     @abstractmethod
     def activation_format(self) -> FusedMoEActivationFormat:
         """
-        Add comment
+        A property indicating the output format of the activations for the
+        'prepare' method.
         """
         raise NotImplementedError
 
@@ -217,7 +214,8 @@ def __init__(
     def activation_formats(
             self) -> tuple[FusedMoEActivationFormat, FusedMoEActivationFormat]:
         """
-        Add comment
+        A property which is a tuple of the input and output activation formats
+        for the 'apply' method.
         """
         raise NotImplementedError