Skip to content

Commit c663e30

Browse files
authored
Add additional info to dashboard (#2494)
1 parent aee0795 commit c663e30

File tree

5 files changed

+72
-14
lines changed

5 files changed

+72
-14
lines changed

benchmarks/dashboard/ci_microbenchmark_runner.py

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,8 @@ def create_benchmark_result(
3939
metric_values: List[float],
4040
quant_type: str,
4141
device: str,
42+
torch_compile_mode: str,
43+
metric_extra_info: Dict[str, Any] = {},
4244
) -> Dict[str, Any]:
4345
"""Create a benchmark result in the PyTorch OSS benchmark database format.
4446
@@ -77,6 +79,7 @@ def create_benchmark_result(
7779
"extra_info": {
7880
"device": device,
7981
"arch": benchmark_device,
82+
"torch_compile_mode": torch_compile_mode,
8083
},
8184
},
8285
"model": {
@@ -85,9 +88,12 @@ def create_benchmark_result(
8588
"origins": ["torchao"],
8689
},
8790
"metric": {
88-
"name": f"{metric_name}(wrt bf16)", # name with unit
91+
"name": f"{metric_name}", # name with unit
8992
"benchmark_values": metric_values, # benchmark_values
9093
"target_value": 0.0, # TODO: Will need to define the target value
94+
"extra_info": {
95+
**metric_extra_info,
96+
},
9197
},
9298
"runners": [],
9399
"dependencies": {},
@@ -115,15 +121,55 @@ def run_ci_benchmarks(config_path: str) -> List[Dict[str, Any]]:
115121

116122
if result is not None:
117123
# Create benchmark result in OSS format
118-
benchmark_result = create_benchmark_result(
124+
speedup_result = create_benchmark_result(
119125
benchmark_name="TorchAO Quantization Benchmark",
120126
shape=[config.m, config.k, config.n],
121-
metric_name="speedup",
127+
metric_name="Fwd Speedup (x)",
122128
metric_values=[result.speedup],
123129
quant_type=config.quantization,
124130
device=config.device,
131+
torch_compile_mode=config.torch_compile_mode,
132+
)
133+
results.append(speedup_result)
134+
baseline_time_result = create_benchmark_result(
135+
benchmark_name="TorchAO Quantization Benchmark",
136+
shape=[config.m, config.k, config.n],
137+
metric_name="Bfloat16 Fwd Time (ms)",
138+
metric_values=[result.baseline_inference_time_in_ms],
139+
quant_type=config.quantization,
140+
device=config.device,
141+
torch_compile_mode=config.torch_compile_mode,
142+
metric_extra_info={
143+
"unit": "ms",
144+
},
145+
)
146+
results.append(baseline_time_result)
147+
quantize_time_result = create_benchmark_result(
148+
benchmark_name="TorchAO Quantization Benchmark",
149+
shape=[config.m, config.k, config.n],
150+
metric_name="Quantized Fwd Time (ms)",
151+
metric_values=[result.model_inference_time_in_ms],
152+
quant_type=config.quantization,
153+
device=config.device,
154+
torch_compile_mode=config.torch_compile_mode,
155+
metric_extra_info={
156+
"unit": "ms",
157+
},
158+
)
159+
results.append(quantize_time_result)
160+
allocated_memory_result = create_benchmark_result(
161+
benchmark_name="TorchAO Quantization Benchmark",
162+
shape=[config.m, config.k, config.n],
163+
metric_name="Allocated Memory (MB)",
164+
metric_values=[result.memory_stats["allocated_bytes.all.peak"]],
165+
quant_type=config.quantization,
166+
device=config.device,
167+
torch_compile_mode=config.torch_compile_mode,
168+
metric_extra_info={
169+
"unit": "MB",
170+
},
125171
)
126-
results.append(benchmark_result)
172+
results.append(allocated_memory_result)
127173

128174
return results
129175

benchmarks/dashboard/microbenchmark_quantization_config.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ model_params:
1818
torch_compile_mode: "max-autotune"
1919
device: "cuda"
2020
model_type: "linear"
21+
enable_memory_profiler: true

benchmarks/microbenchmarks/benchmark_inference.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -149,13 +149,15 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
149149
os.makedirs(memory_profiler_dir, exist_ok=True)
150150

151151
# Save memory profile with .pickle extension
152-
result.memory_profile_path = generate_memory_profile(
153-
model=m_copy,
154-
input_data=input_data,
155-
profile_file_path=os.path.join(
156-
memory_profiler_dir,
157-
f"{config._file_name}_memory_profile.pickle",
158-
),
152+
result.memory_profile_path, result.memory_stats = (
153+
generate_memory_profile(
154+
model=m_copy,
155+
input_data=input_data,
156+
profile_file_path=os.path.join(
157+
memory_profiler_dir,
158+
f"{config._file_name}_memory_profile.pickle",
159+
),
160+
)
159161
)
160162

161163
if result.memory_profile_path:

benchmarks/microbenchmarks/profiler.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def generate_memory_profile(model, input_data, profile_file_path):
9191

9292
# Create parent directory if it doesn't exist
9393
os.makedirs(os.path.dirname(profile_file_path), exist_ok=True)
94+
memory_stats = dict()
9495

9596
try:
9697
torch.cuda.empty_cache()
@@ -130,11 +131,19 @@ def generate_memory_profile(model, input_data, profile_file_path):
130131
print(f"Attempt {i + 1}/5: {e}, retrying...")
131132
time.sleep(3.0)
132133

134+
# Record memory stats
135+
_memory_stats = torch.cuda.memory_stats()
136+
memory_stats = {
137+
"allocated_bytes.all.peak": _memory_stats["allocated_bytes.all.peak"] / 1e6,
138+
"active_bytes.all.peak": _memory_stats["active_bytes.all.peak"] / 1e6,
139+
"reserved_bytes.all.peak": _memory_stats["reserved_bytes.all.peak"] / 1e6,
140+
}
141+
133142
except Exception as e:
134143
print(f"Error in memory profiling: {e}")
135144

136145
# Return the file path for consistency with other profiler functions
137-
return profile_file_path
146+
return profile_file_path, memory_stats
138147

139148

140149
def visualize_memory_profile(profile_file_path):

benchmarks/microbenchmarks/test/test_benchmark_profiler.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,7 @@ def test_memory_profiler_enabled(self):
178178
)
179179

180180
# Generate memory profile
181-
result_path = generate_memory_profile(
181+
result_path, memory_stats = generate_memory_profile(
182182
self.model, self.input_data, memory_profile_path
183183
)
184184

@@ -271,7 +271,7 @@ def test_memory_profiler_cuda_unavailable(self):
271271
)
272272

273273
# Generate memory profile
274-
result = generate_memory_profile(
274+
result, memory_stats = generate_memory_profile(
275275
self.model, self.input_data, memory_profile_path
276276
)
277277

0 commit comments

Comments
 (0)