Skip to content

Commit 36b3639

Browse files
committed
Update
[ghstack-poisoned]
1 parent a886a27 commit 36b3639

File tree

2 files changed

+47
-40
lines changed

2 files changed

+47
-40
lines changed

benchmarks/microbenchmarks/test/benchmark_config.yml

Lines changed: 41 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,29 @@
22
benchmark_mode: "inference"
33
quantization_config_recipe_names:
44
# Will run a baseline inference for model by default, without quantization for comparison
5-
# - "int4wo-32"
5+
- "int4wo-32"
66
# - "marlin"
77
- "int8wo"
8+
- "int8dq"
9+
- "float8dq"
810
# sparsity_config_recipe_names:
911
# Will run a baseline inference for model by default, without sparsity for comparison
1012
# - "semi-sparse"
1113
# - "block"
1214
output_dir: "benchmarks/microbenchmarks/results"
1315
model_params:
14-
# - name: "small_bf16_linear"
15-
# matrix_shapes:
16-
# - name: "custom"
17-
# shapes: [
18-
# [1024, 1024, 1024], # [m, k, n]
19-
# ]
20-
# high_precision_dtype: "torch.bfloat16"
21-
# use_torch_compile: true
22-
# torch_compile_mode: "max-autotune"
23-
# device: "cuda"
24-
# model_type: "linear"
25-
# enable_profiler: true # Enable profiling for this model
16+
- name: "small_bf16_linear"
17+
matrix_shapes:
18+
- name: "custom"
19+
shapes: [
20+
[1024, 1024, 1024], # [m, k, n]
21+
]
22+
high_precision_dtype: "torch.bfloat16"
23+
use_torch_compile: true
24+
torch_compile_mode: "max-autotune"
25+
device: "cuda"
26+
model_type: "linear"
27+
enable_profiler: true # Enable profiling for this model
2628

2729
- name: "large_bf16_ln_linear"
2830
matrix_shapes:
@@ -65,30 +67,30 @@ model_params:
6567
# model_type: "linear"
6668
# enable_profiler: true # Enable profiling for this model
6769

68-
- name: "bf16_rms_norm_linear_activation"
69-
matrix_shapes:
70-
- name: "custom"
71-
shapes: [
72-
[2048, 4096, 1024],
73-
]
74-
high_precision_dtype: "torch.bfloat16"
75-
use_torch_compile: true
76-
torch_compile_mode: "max-autotune"
77-
device: "cuda"
78-
model_type: "rms_norm_linear_activation"
79-
enable_profiler: true
80-
enable_memory_profile: true
70+
# - name: "bf16_rms_norm_linear_activation"
71+
# matrix_shapes:
72+
# - name: "custom"
73+
# shapes: [
74+
# [2048, 4096, 1024],
75+
# ]
76+
# high_precision_dtype: "torch.bfloat16"
77+
# use_torch_compile: true
78+
# torch_compile_mode: "max-autotune"
79+
# device: "cuda"
80+
# model_type: "rms_norm_linear_activation"
81+
# enable_profiler: true
82+
# enable_memory_profile: true
8183

82-
- name: "bf16_transformer_block"
83-
matrix_shapes:
84-
- name: "custom"
85-
shapes: [
86-
[2048, 4096, 1024], # For transformer_block, k is the hidden dimension
87-
]
88-
high_precision_dtype: "torch.bfloat16"
89-
use_torch_compile: true
90-
torch_compile_mode: "max-autotune"
91-
device: "cuda"
92-
model_type: "transformer_block"
93-
enable_profiler: true
94-
enable_memory_profile: true
84+
# - name: "bf16_transformer_block"
85+
# matrix_shapes:
86+
# - name: "custom"
87+
# shapes: [
88+
# [2048, 4096, 1024], # For transformer_block, k is the hidden dimension
89+
# ]
90+
# high_precision_dtype: "torch.bfloat16"
91+
# use_torch_compile: true
92+
# torch_compile_mode: "max-autotune"
93+
# device: "cuda"
94+
# model_type: "transformer_block" # TODO: Add a custom model (Figure out how to do this, maybe pass a .py file with model definition)
95+
# enable_profiler: true
96+
# enable_memory_profile: true

benchmarks/microbenchmarks/utils.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -359,6 +359,8 @@ def to_dict(self) -> Dict[str, Any]:
359359
return result_dict
360360

361361

362+
# TODO: MOE block (Maybe)
363+
# TODO: Move stuff to torchao/testing
362364
class ToyLinearModel(torch.nn.Module):
363365
def __init__(self, k=64, n=32, dtype=torch.bfloat16):
364366
super().__init__()
@@ -369,12 +371,14 @@ def forward(self, x):
369371
return x
370372

371373

374+
# TODO: Maybe we can specify a diy for activation function and use it in the model
375+
# TODO: MLP block
372376
class LNLinearSigmoid(torch.nn.Module):
373377
def __init__(self, fc_dim1, fc_dim2, dtype=torch.bfloat16):
374378
super().__init__()
375379
self.ln = torch.nn.LayerNorm(fc_dim1, elementwise_affine=False)
376380
self.fc = torch.nn.Linear(fc_dim1, fc_dim2, bias=False).to(dtype)
377-
self.sigmoid = torch.nn.Sigmoid()
381+
self.sigmoid = torch.nn.Sigmoid() # TODO: Find a way to make it configurable
378382

379383
def forward(self, x):
380384
x = self.ln(x)
@@ -383,6 +387,7 @@ def forward(self, x):
383387
return x
384388

385389

390+
# TODO: We might not need it, need to figure of it's relevant in any technique
386391
class RMSNorm(torch.nn.Module):
387392
def __init__(self, dim, eps=1e-6, dtype=torch.bfloat16):
388393
super().__init__()

0 commit comments

Comments
 (0)