fix llama4 parallelize syntax bug (#1316)

tianyu-l · web-flow · commit f7084fc78567 · 2025-06-17T17:30:11.000-07:00
just to land the fix in #1307 as I'm hoping to publish a release today
diff --git a/torchtitan/experiments/llama4/infra/parallelize.py b/torchtitan/experiments/llama4/infra/parallelize.py
@@ -118,7 +118,7 @@ def parallelize_llama(
         )
 
     # for MoE auxiliary-loss-free load balancing
-    if parallel_dims.dp_cp_enabled is not None:
+    if parallel_dims.dp_cp_enabled:
         # NOTE: Currently this sync is blocking (thus exposed) and happens on the
         # default compute stream. Need to assess if this is OK performance-wise.
         dp_cp_mesh = world_mesh["dp_cp"]

Original file line number	Diff line number	Diff line change
`@@ -118,7 +118,7 @@ def parallelize_llama(`
`118`	`118`	`)`
`119`	`119`
`120`	`120`	`# for MoE auxiliary-loss-free load balancing`
`121`		`- if parallel_dims.dp_cp_enabled is not None:`
	`121`	`+ if parallel_dims.dp_cp_enabled:`
`122`	`122`	`# NOTE: Currently this sync is blocking (thus exposed) and happens on the`
`123`	`123`	`# default compute stream. Need to assess if this is OK performance-wise.`
`124`	`124`	`dp_cp_mesh = world_mesh["dp_cp"]`