Add benchmark numbers to dashboard (#2260)

jainapurva · web-flow · commit 8401e91783b3 · 2025-07-01T09:52:59.000-07:00
diff --git a/.github/workflows/run_microbenchmarks.yml b/.github/workflows/run_microbenchmarks.yml
@@ -0,0 +1,70 @@
+name: Microbenchmarks-Perf-Nightly
+# Dashboard: https://hud.pytorch.org/benchmark/llms?repoName=pytorch%2Fao&benchmarkName=micro-benchmark+api
+
+on:
+  pull_request:
+  push:
+    tags:
+      - ciflow/benchmark/*
+  workflow_dispatch:
+  schedule:
+    - cron: '0 3 * * *'  # Run daily at 7 AM UTC
+
+jobs:
+  benchmark:
+    runs-on: linux.aws.h100
+    strategy:
+      matrix:
+        torch-spec:
+          - '--pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup miniconda
+        uses: pytorch/test-infra/.github/actions/setup-miniconda@main
+        with:
+          python-version: "3.9"
+
+      - name: Run benchmark
+        shell: bash
+        run: |
+          set -eux
+
+          # Upgrade pip
+          ${CONDA_RUN} python -m pip install --upgrade pip
+
+          ${CONDA_RUN} ls
+          ${CONDA_RUN} bash -c 'pwd'
+          ${CONDA_RUN} bash -c 'echo $PYTHONPATH'
+
+          # Install dependencies
+          ${CONDA_RUN} pip install ${{ matrix.torch-spec }}
+          ${CONDA_RUN} pip install -r dev-requirements.txt
+          ${CONDA_RUN} pip install .
+
+          ${CONDA_RUN} ls
+          ${CONDA_RUN} bash -c 'pwd'
+          ${CONDA_RUN} bash -c 'echo $PYTHONPATH'
+
+          # Set PYTHONPATH to current directory (.) if not set, and include the benchmarks directory
+          ${CONDA_RUN} export PYTHONPATH="${PYTHONPATH:-$(pwd)}:$(pwd)/benchmarks"
+
+          # Create benchmark results directory
+          mkdir -p ${{ runner.temp }}/benchmark-results
+
+          # Run microbenchmarks for dashboard
+          ${CONDA_RUN} bash -c '
+            export PYTHONPATH="${PYTHONPATH:-$(pwd)}:$(pwd)/benchmarks"
+            echo "PYTHONPATH is: $PYTHONPATH"
+            echo "Current directory is: $(pwd)"
+            python benchmarks/dashboard/ci_microbenchmark_runner.py \
+              --config benchmarks/dashboard/microbenchmark_quantization_config.yml \
+              --output "$RUNNER_TEMP/benchmark-results/microbenchmark-results.json"'
+
+      - name: Upload the benchmark results to OSS benchmark database for the dashboard
+        uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
+        with:
+          benchmark-results-dir: ${{ runner.temp }}/benchmark-results
+          dry-run: false
+          schema-version: v3
+          github-token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/benchmarks/dashboard/ci_microbenchmark_runner.py b/benchmarks/dashboard/ci_microbenchmark_runner.py
@@ -0,0 +1,160 @@
+#!/usr/bin/env python3
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+CI Microbenchmark Runner for PyTorch OSS Benchmark Database
+
+This script runs microbenchmarks for a given config file
+and outputs results in the format required by the PyTorch OSS benchmark database.
+It reuses functionality from benchmark_runner.py and only adds CI-specific code.
+
+Usage:
+    python ci_microbenchmark_runner.py --config benchmark_config.yml
+
+The YAML file should contain all necessary configuration parameters for the benchmarks.
+"""
+
+import argparse
+import json
+import platform
+from typing import Any, Dict, List
+
+import torch
+
+from benchmarks.microbenchmarks.benchmark_inference import run as run_inference
+from benchmarks.microbenchmarks.benchmark_runner import (
+    load_benchmark_configs,
+)
+from benchmarks.microbenchmarks.utils import clean_caches
+
+
+def create_benchmark_result(
+    benchmark_name: str,
+    shape: List[int],
+    metric_name: str,
+    metric_values: List[float],
+    quant_type: str,
+    device: str,
+) -> Dict[str, Any]:
+    """Create a benchmark result in the PyTorch OSS benchmark database format.
+
+    Args:
+        benchmark_name: Name of the benchmark
+        shape: List of shape dimensions [M, K, N]
+        metric_name: Name of the metric
+        metric_values: List of metric values
+        quant_type: Quantization type
+        device: Device type (cuda/cpu)
+
+    Returns:
+        Dictionary containing the benchmark result in the required format
+    """
+    print(
+        f"Creating benchmark result for {benchmark_name} with shape {shape} and metric {metric_name}"
+    )
+
+    # Map device to benchmark device name
+    benchmark_device = (
+        torch.cuda.get_device_name(0)
+        if device == "cuda"
+        else platform.processor()
+        if device == "cpu"
+        else "unknown"
+    )
+
+    # Format shape as M-K-N
+    mkn_name = f"{shape[0]}-{shape[1]}-{shape[2]}" if len(shape) == 3 else "unknown"
+
+    return {
+        "benchmark": {
+            "name": "micro-benchmark api",
+            "mode": "inference",
+            "dtype": quant_type,
+            "extra_info": {
+                "device": device,
+                "arch": benchmark_device,
+            },
+        },
+        "model": {
+            "name": mkn_name,  # name in M-K-N format
+            "type": "micro-benchmark custom layer",  # type
+            "origins": ["torchao"],
+        },
+        "metric": {
+            "name": f"{metric_name}(wrt bf16)",  # name with unit
+            "benchmark_values": metric_values,  # benchmark_values
+            "target_value": 0.0,  # TODO: Will need to define the target value
+        },
+        "runners": [],
+        "dependencies": {},
+    }
+
+
+def run_ci_benchmarks(config_path: str) -> List[Dict[str, Any]]:
+    """Run benchmarks using configurations from YAML file and return results in OSS format.
+
+    Args:
+        config_path: Path to the benchmark configuration file
+
+    Returns:
+        List of benchmark results in the PyTorch OSS benchmark database format
+    """
+    # Load configuration using existing function
+    configs = load_benchmark_configs(argparse.Namespace(config=config_path))
+    results = []
+
+    # Run benchmarks for each config
+    for config in configs:
+        # Run benchmark using existing function
+        clean_caches()
+        result = run_inference(config)
+
+        if result is not None:
+            # Create benchmark result in OSS format
+            benchmark_result = create_benchmark_result(
+                benchmark_name="TorchAO Quantization Benchmark",
+                shape=[config.m, config.k, config.n],
+                metric_name="speedup",
+                metric_values=[result.speedup],
+                quant_type=config.quantization,
+                device=config.device,
+            )
+            results.append(benchmark_result)
+
+    return results
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Run microbenchmarks and output results in PyTorch OSS benchmark database format"
+    )
+    parser.add_argument(
+        "--config",
+        type=str,
+        required=True,
+        help="Path to benchmark configuration file",
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="benchmark_results.json",
+        help="Path to output JSON file",
+    )
+    args = parser.parse_args()
+
+    # Run benchmarks
+    results = run_ci_benchmarks(args.config)
+
+    # Save results to JSON file
+    with open(args.output, "w") as f:
+        json.dump(results, f, indent=2)
+
+    print(f"Benchmark results saved to {args.output}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/benchmarks/dashboard/microbenchmark_quantization_config.yml b/benchmarks/dashboard/microbenchmark_quantization_config.yml
@@ -0,0 +1,20 @@
+# Benchmark configuration for microbenchmarks
+benchmark_mode: "inference"
+quantization_config_recipe_names: # Will run a baseline inference for model by default, without quantization for comparison
+  - "int8wo"
+  - "int8dq"
+  - "float8dq-tensor"
+  - "float8dq-row"
+  - "float8wo"
+output_dir: "benchmarks/microbenchmarks/results"
+model_params:
+  - name: "small_bf16_linear"
+    matrix_shapes:
+      - name: "small_sweep"
+        min_power: 10
+        max_power: 15
+    high_precision_dtype: "torch.bfloat16"
+    use_torch_compile: true
+    torch_compile_mode: "max-autotune"
+    device: "cuda"
+    model_type: "linear"