fix dtype bug

danielvegamyhre · danielvegamyhre · commit f673fb930fb1 · 2025-06-27T14:59:51.000-07:00
diff --git a/torchao/prototype/moe_training/scaled_grouped_mm.py b/torchao/prototype/moe_training/scaled_grouped_mm.py
@@ -40,7 +40,11 @@ def _scaled_grouped_mm(
         offs (int32 torch.Tensor): The offsets to use to mark the starting index of each group along dim0 of the A tensor.
         out_dtype (Optional[torch.dtype]): The dtype of the output tensor. Currently only torch.bfloat16 is supported.
     """
+<<<<<<< HEAD
     logger.info("Using scaled_grouped_mm")
+=======
+    logger.info("Using differentiable _scaled_grouped_mm")
+>>>>>>> eb2dd3e0 (fix dtype bug)
     return _Float8GroupedMM.apply(
         A,
         B_t,
diff --git a/torchao/prototype/moe_training/tensor.py b/torchao/prototype/moe_training/tensor.py
@@ -4,6 +4,7 @@
 # This source code is licensed under the BSD 3-Clause license found in the
 # LICENSE file in the root directory of this source tree.
 
+import logging
 from typing import Any, Optional, Tuple
 
 import torch
@@ -18,6 +19,10 @@
 
 logger: logging.Logger = logging.getLogger(__name__)
 
+<<<<<<< HEAD
+=======
+
+>>>>>>> eb2dd3e0 (fix dtype bug)
 _ops_to_preserve_subclass = {
     torch.ops.aten.empty_like.default,
     torch.ops.aten.new_zeros.default,
@@ -96,6 +101,7 @@ def __torch_function__(cls, func, types, args, kwargs={}):
 
     @classmethod
     def __torch_dispatch__(cls, func, types, args, kwargs={}):
+        logger.debug(f"{func.__name__}, args={args}, kwargs={kwargs}")
         # detach is special case
         if func == torch.ops.aten.detach.default:
             return ScaledGroupedMMTensor(args[0]._data, args[0]._dtype)
@@ -135,6 +141,7 @@ def __repr__(self):
     def __tensor_flatten__(self):
         return ["_data"], {"_dtype": self._dtype}
 
+
     @staticmethod
     def __tensor_unflatten__(inner_tensors, flatten_spec, outer_size, outer_stride):
         return ScaledGroupedMMTensor(