pytorch · soulitzer · Jul 8, 2025 · Jul 8, 2025 · Jul 8, 2025 · Jul 10, 2025
@@ -486,6 +486,12 @@ class ActivationCheckpoint:
     Selective activation checkpointing options ['int', 'op'].
     'int' (e.g., 2) for every nth layer, or 'op' for op level ac.
     """
+    selective_op_ac_mm_flops_threshold: int = 0
+    """
+    When selective_ac_option is 'op', this threshold is used to determine whether to
+    save a given mm, e.g. 1e5 means excluding mms flops < 1e5, and then saving
+    every other mm from the remaining mms.
+    """
 
 
 @dataclass

@@ -265,6 +265,15 @@ def _get_custom_policy(meta):
             def _custom_policy(ctx, func, *args, **kwargs):
                 mode = "recompute" if ctx.is_recompute else "forward"
                 mm_count_key = f"{mode}_mm_count"
+
+                if func == torch.ops.aten.mm.default:
+                    m, k = args[0].shape
+                    k2, n = args[1].shape
+                    assert k == k2
+                    flops = m * n * 2 * k
+                    if flops < ac_config.selective_op_ac_mm_flops_threshold:
+                        return CheckpointPolicy.PREFER_RECOMPUTE
+
                 if func == torch.ops.aten.mm.default:
                     meta[mm_count_key] += 1
                 # Saves output of all compute ops, except every second mm