Update

jainapurva · jainapurva · commit 36b3639f88d8 · 2025-04-07T11:08:57.000-07:00
[ghstack-poisoned]
diff --git a/benchmarks/microbenchmarks/test/benchmark_config.yml b/benchmarks/microbenchmarks/test/benchmark_config.yml
@@ -2,27 +2,29 @@
 benchmark_mode: "inference"
 quantization_config_recipe_names:
   # Will run a baseline inference for model by default, without quantization for comparison
-  # - "int4wo-32"
+  - "int4wo-32"
   # - "marlin"
   - "int8wo"
+  - "int8dq"
+  - "float8dq"
 # sparsity_config_recipe_names:
   # Will run a baseline inference for model by default, without sparsity for comparison
   # - "semi-sparse"
   # - "block"
 output_dir: "benchmarks/microbenchmarks/results"
 model_params:
-  # - name: "small_bf16_linear"
-  #   matrix_shapes:
-  #     - name: "custom"
-  #       shapes: [
-  #         [1024, 1024, 1024],  # [m, k, n]
-  #       ]
-  #   high_precision_dtype: "torch.bfloat16"
-  #   use_torch_compile: true
-  #   torch_compile_mode: "max-autotune"
-  #   device: "cuda"
-  #   model_type: "linear"
-  #   enable_profiler: true  # Enable profiling for this model
+  - name: "small_bf16_linear"
+    matrix_shapes:
+      - name: "custom"
+        shapes: [
+          [1024, 1024, 1024],  # [m, k, n]
+        ]
+    high_precision_dtype: "torch.bfloat16"
+    use_torch_compile: true
+    torch_compile_mode: "max-autotune"
+    device: "cuda"
+    model_type: "linear"
+    enable_profiler: true  # Enable profiling for this model
 
   - name: "large_bf16_ln_linear"
     matrix_shapes:
@@ -65,30 +67,30 @@ model_params:
   #   model_type: "linear"
   #   enable_profiler: true  # Enable profiling for this model
 
-  - name: "bf16_rms_norm_linear_activation"
-    matrix_shapes:
-      - name: "custom"
-        shapes: [
-          [2048, 4096, 1024],
-        ]
-    high_precision_dtype: "torch.bfloat16"
-    use_torch_compile: true
-    torch_compile_mode: "max-autotune"
-    device: "cuda"
-    model_type: "rms_norm_linear_activation"
-    enable_profiler: true
-    enable_memory_profile: true
+  # - name: "bf16_rms_norm_linear_activation"
+  #   matrix_shapes:
+  #     - name: "custom"
+  #       shapes: [
+  #         [2048, 4096, 1024],
+  #       ]
+  #   high_precision_dtype: "torch.bfloat16"
+  #   use_torch_compile: true
+  #   torch_compile_mode: "max-autotune"
+  #   device: "cuda"
+  #   model_type: "rms_norm_linear_activation"
+  #   enable_profiler: true
+  #   enable_memory_profile: true
 
-  - name: "bf16_transformer_block"
-    matrix_shapes:
-      - name: "custom"
-        shapes: [
-          [2048, 4096, 1024],  # For transformer_block, k is the hidden dimension
-        ]
-    high_precision_dtype: "torch.bfloat16"
-    use_torch_compile: true
-    torch_compile_mode: "max-autotune"
-    device: "cuda"
-    model_type: "transformer_block"
-    enable_profiler: true
-    enable_memory_profile: true
+  # - name: "bf16_transformer_block"
+  #   matrix_shapes:
+  #     - name: "custom"
+  #       shapes: [
+  #         [2048, 4096, 1024],  # For transformer_block, k is the hidden dimension
+  #       ]
+  #   high_precision_dtype: "torch.bfloat16"
+  #   use_torch_compile: true
+  #   torch_compile_mode: "max-autotune"
+  #   device: "cuda"
+  #   model_type: "transformer_block" # TODO: Add a custom model (Figure out how to do this, maybe pass a .py file with model definition)
+  #   enable_profiler: true
+  #   enable_memory_profile: true
diff --git a/benchmarks/microbenchmarks/utils.py b/benchmarks/microbenchmarks/utils.py
@@ -359,6 +359,8 @@ def to_dict(self) -> Dict[str, Any]:
         return result_dict
 
 
+# TODO: MOE block (Maybe)
+# TODO: Move stuff to torchao/testing
 class ToyLinearModel(torch.nn.Module):
     def __init__(self, k=64, n=32, dtype=torch.bfloat16):
         super().__init__()
@@ -369,12 +371,14 @@ def forward(self, x):
         return x
 
 
+# TODO: Maybe we can specify a diy for activation function and use it in the model
+# TODO: MLP block
 class LNLinearSigmoid(torch.nn.Module):
     def __init__(self, fc_dim1, fc_dim2, dtype=torch.bfloat16):
         super().__init__()
         self.ln = torch.nn.LayerNorm(fc_dim1, elementwise_affine=False)
         self.fc = torch.nn.Linear(fc_dim1, fc_dim2, bias=False).to(dtype)
-        self.sigmoid = torch.nn.Sigmoid()
+        self.sigmoid = torch.nn.Sigmoid()  # TODO: Find a way to make it configurable
 
     def forward(self, x):
         x = self.ln(x)
@@ -383,6 +387,7 @@ def forward(self, x):
         return x
 
 
+# TODO: We might not need it, need to figure of it's relevant in any technique
 class RMSNorm(torch.nn.Module):
     def __init__(self, dim, eps=1e-6, dtype=torch.bfloat16):
         super().__init__()