From 5ba9e6d780d919a2b7642ca7cc031169dec39ac6 Mon Sep 17 00:00:00 2001 From: Daniel Vega-Myhre Date: Tue, 1 Jul 2025 14:57:12 -0700 Subject: [PATCH] validate float8 moe parallelism config --- torchtitan/components/quantization/float8.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/torchtitan/components/quantization/float8.py b/torchtitan/components/quantization/float8.py index 782889716..c7466a34c 100644 --- a/torchtitan/components/quantization/float8.py +++ b/torchtitan/components/quantization/float8.py @@ -55,6 +55,18 @@ def __init__(self, job_config: JobConfig, parallel_dims: ParallelDims): self.filter_fqns = float8_config.filter_fqns self.moe_fqns = float8_config.moe_fqns_prototype + # Validate MoE training prototype limitations. + if self.moe_fqns: + assert ( + job_config.parallelism.tensor_parallel_degree == 1 + ), "Float8 MoE training prototype does not yet support tensor parallelism" + assert ( + job_config.parallelism.pipeline_parallel_degree == 1 + ), "Float8 MoE training prototype does not yet support pipeline parallelism" + assert ( + job_config.parallelism.context_parallel_degree == 1 + ), "Float8 MoE training prototype does not yet support context parallelism" + if float8_config.recipe_name is not None: assert ( not float8_config.enable_fsdp_float8_all_gather