Add additional info to dashboard (#2494)

jainapurva · web-flow · commit c663e302bbca · 2025-07-11T08:17:32.000-07:00
diff --git a/benchmarks/dashboard/ci_microbenchmark_runner.py b/benchmarks/dashboard/ci_microbenchmark_runner.py
@@ -39,6 +39,8 @@ def create_benchmark_result(
     metric_values: List[float],
     quant_type: str,
     device: str,
+    torch_compile_mode: str,
+    metric_extra_info: Dict[str, Any] = {},
 ) -> Dict[str, Any]:
     """Create a benchmark result in the PyTorch OSS benchmark database format.
 
@@ -77,6 +79,7 @@ def create_benchmark_result(
             "extra_info": {
                 "device": device,
                 "arch": benchmark_device,
+                "torch_compile_mode": torch_compile_mode,
             },
         },
         "model": {
@@ -85,9 +88,12 @@ def create_benchmark_result(
             "origins": ["torchao"],
         },
         "metric": {
-            "name": f"{metric_name}(wrt bf16)",  # name with unit
+            "name": f"{metric_name}",  # name with unit
             "benchmark_values": metric_values,  # benchmark_values
             "target_value": 0.0,  # TODO: Will need to define the target value
+            "extra_info": {
+                **metric_extra_info,
+            },
         },
         "runners": [],
         "dependencies": {},
@@ -115,15 +121,55 @@ def run_ci_benchmarks(config_path: str) -> List[Dict[str, Any]]:
 
         if result is not None:
             # Create benchmark result in OSS format
-            benchmark_result = create_benchmark_result(
+            speedup_result = create_benchmark_result(
                 benchmark_name="TorchAO Quantization Benchmark",
                 shape=[config.m, config.k, config.n],
-                metric_name="speedup",
+                metric_name="Fwd Speedup (x)",
                 metric_values=[result.speedup],
                 quant_type=config.quantization,
                 device=config.device,
+                torch_compile_mode=config.torch_compile_mode,
+            )
+            results.append(speedup_result)
+            baseline_time_result = create_benchmark_result(
+                benchmark_name="TorchAO Quantization Benchmark",
+                shape=[config.m, config.k, config.n],
+                metric_name="Bfloat16 Fwd Time (ms)",
+                metric_values=[result.baseline_inference_time_in_ms],
+                quant_type=config.quantization,
+                device=config.device,
+                torch_compile_mode=config.torch_compile_mode,
+                metric_extra_info={
+                    "unit": "ms",
+                },
+            )
+            results.append(baseline_time_result)
+            quantize_time_result = create_benchmark_result(
+                benchmark_name="TorchAO Quantization Benchmark",
+                shape=[config.m, config.k, config.n],
+                metric_name="Quantized Fwd Time (ms)",
+                metric_values=[result.model_inference_time_in_ms],
+                quant_type=config.quantization,
+                device=config.device,
+                torch_compile_mode=config.torch_compile_mode,
+                metric_extra_info={
+                    "unit": "ms",
+                },
+            )
+            results.append(quantize_time_result)
+            allocated_memory_result = create_benchmark_result(
+                benchmark_name="TorchAO Quantization Benchmark",
+                shape=[config.m, config.k, config.n],
+                metric_name="Allocated Memory (MB)",
+                metric_values=[result.memory_stats["allocated_bytes.all.peak"]],
+                quant_type=config.quantization,
+                device=config.device,
+                torch_compile_mode=config.torch_compile_mode,
+                metric_extra_info={
+                    "unit": "MB",
+                },
             )
-            results.append(benchmark_result)
+            results.append(allocated_memory_result)
 
     return results
 
diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml
@@ -18,3 +18,4 @@ model_params:
     torch_compile_mode: "max-autotune"
     device: "cuda"
     model_type: "linear"
+    enable_memory_profiler: true
diff --git a/benchmarks/microbenchmarks/benchmark_inference.py b/benchmarks/microbenchmarks/benchmark_inference.py
@@ -149,13 +149,15 @@ def run(config: BenchmarkConfig) -> BenchmarkResult:
                 os.makedirs(memory_profiler_dir, exist_ok=True)
 
                 # Save memory profile with .pickle extension
-                result.memory_profile_path = generate_memory_profile(
-                    model=m_copy,
-                    input_data=input_data,
-                    profile_file_path=os.path.join(
-                        memory_profiler_dir,
-                        f"{config._file_name}_memory_profile.pickle",
-                    ),
+                result.memory_profile_path, result.memory_stats = (
+                    generate_memory_profile(
+                        model=m_copy,
+                        input_data=input_data,
+                        profile_file_path=os.path.join(
+                            memory_profiler_dir,
+                            f"{config._file_name}_memory_profile.pickle",
+                        ),
+                    )
                 )
 
                 if result.memory_profile_path:
diff --git a/benchmarks/microbenchmarks/profiler.py b/benchmarks/microbenchmarks/profiler.py
@@ -91,6 +91,7 @@ def generate_memory_profile(model, input_data, profile_file_path):
 
     # Create parent directory if it doesn't exist
     os.makedirs(os.path.dirname(profile_file_path), exist_ok=True)
+    memory_stats = dict()
 
     try:
         torch.cuda.empty_cache()
@@ -130,11 +131,19 @@ def generate_memory_profile(model, input_data, profile_file_path):
                 print(f"Attempt {i + 1}/5: {e}, retrying...")
                 time.sleep(3.0)
 
+        # Record memory stats
+        _memory_stats = torch.cuda.memory_stats()
+        memory_stats = {
+            "allocated_bytes.all.peak": _memory_stats["allocated_bytes.all.peak"] / 1e6,
+            "active_bytes.all.peak": _memory_stats["active_bytes.all.peak"] / 1e6,
+            "reserved_bytes.all.peak": _memory_stats["reserved_bytes.all.peak"] / 1e6,
+        }
+
     except Exception as e:
         print(f"Error in memory profiling: {e}")
 
     # Return the file path for consistency with other profiler functions
-    return profile_file_path
+    return profile_file_path, memory_stats
 
 
 def visualize_memory_profile(profile_file_path):
diff --git a/benchmarks/microbenchmarks/test/test_benchmark_profiler.py b/benchmarks/microbenchmarks/test/test_benchmark_profiler.py
@@ -178,7 +178,7 @@ def test_memory_profiler_enabled(self):
         )
 
         # Generate memory profile
-        result_path = generate_memory_profile(
+        result_path, memory_stats = generate_memory_profile(
             self.model, self.input_data, memory_profile_path
         )
 
@@ -271,7 +271,7 @@ def test_memory_profiler_cuda_unavailable(self):
             )
 
             # Generate memory profile
-            result = generate_memory_profile(
+            result, memory_stats = generate_memory_profile(
                 self.model, self.input_data, memory_profile_path
             )
 

Original file line number	Diff line number	Diff line change
`@@ -178,7 +178,7 @@ def test_memory_profiler_enabled(self):`
`178`	`178`	`)`
`179`	`179`
`180`	`180`	`# Generate memory profile`
`181`		`- result_path = generate_memory_profile(`
	`181`	`+ result_path, memory_stats = generate_memory_profile(`
`182`	`182`	`self.model, self.input_data, memory_profile_path`
`183`	`183`	`)`
`184`	`184`
`@@ -271,7 +271,7 @@ def test_memory_profiler_cuda_unavailable(self):`
`271`	`271`	`)`
`272`	`272`
`273`	`273`	`# Generate memory profile`
`274`		`- result = generate_memory_profile(`
	`274`	`+ result, memory_stats = generate_memory_profile(`
`275`	`275`	`self.model, self.input_data, memory_profile_path`
`276`	`276`	`)`
`277`	`277`