fix fbgemm (#222)

xuzhao9 · facebook-github-bot · commit a13002697ff5 · 2025-05-12T12:25:57.000-07:00
Summary: FBGEMM changed the installation command for genai variant: https://docs.pytorch.org/FBGEMM/fbgemm_genai/development/BuildInstructions.html#cuda-build Pull Request resolved: #222 Test Plan: https://github.com/pytorch-labs/tritonbench/actions/runs/14963093343 Reviewed By: njriasan Differential Revision: D74528038 Pulled By: xuzhao9 fbshipit-source-id: 2e6982b0247e28b60d86d6f38785651fcb8f8f3b
diff --git a/benchmarks/run_config/example_config.yaml b/benchmarks/run_config/example_config.yaml
@@ -1,3 +1,3 @@
-benchmarks:
-  - benchmark_name: bf16_ragged_attention_fwd
+bf16_ragged_attention_fwd:
+    op: ragged_attention
     args: --op ragged_attention --only hstu --num-inputs 1 --input-id 2 --metrics tflops --cudagraph --simple-output
diff --git a/docker/gcp-a100-runner-dind.dockerfile b/docker/gcp-a100-runner-dind.dockerfile
@@ -0,0 +1,62 @@
+# default base image: ghcr.io/actions/actions-runner:latest
+# base image: Ubuntu 22.04 jammy
+# Prune CUDA to only keep gencode >= A100
+ARG BASE_IMAGE=ghcr.io/actions/actions-runner:latest
+FROM ${BASE_IMAGE}
+
+ENV LANG=C.UTF-8 LC_ALL=C.UTF-8
+ARG OVERRIDE_GENCODE="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a"
+ARG OVERRIDE_GENCODE_CUDNN="-gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_90a,code=sm_90a"
+
+RUN sudo apt-get -y update && sudo apt -y update
+# fontconfig: required by model doctr_det_predictor
+# libjpeg and libpng: optionally required by torchvision (vision#8342)
+RUN sudo apt-get install -y git jq gcc g++ \
+                            vim wget curl ninja-build cmake \
+                            libgl1-mesa-glx libsndfile1-dev kmod libxml2-dev libxslt1-dev \
+                            fontconfig libfontconfig1-dev \
+                            libpango-1.0-0 libpangoft2-1.0-0 \
+                            libsdl2-dev libsdl2-2.0-0 \
+                            libjpeg-dev libpng-dev zlib1g-dev
+
+# get switch-cuda utility
+RUN sudo wget -q https://raw.githubusercontent.com/phohenecker/switch-cuda/master/switch-cuda.sh -O /usr/bin/switch-cuda.sh
+RUN sudo chmod +x /usr/bin/switch-cuda.sh
+
+RUN sudo mkdir -p /workspace; sudo chown runner:runner /workspace
+
+# GKE version: 1.28.5-gke.1217000
+# NVIDIA driver version: 535.104.05
+# NVIDIA drivers list available at gs://ubuntu_nvidia_packages/
+# We assume that the host NVIDIA driver binaries and libraries are mapped to the docker filesystem
+
+# Install CUDA 12.8 build toolchains
+RUN cd /workspace; mkdir -p pytorch-ci; cd pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cuda.sh
+RUN cd /workspace/pytorch-ci; wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cudnn.sh && \
+    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_nccl.sh && \
+    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/common/install_cusparselt.sh && \
+    mkdir ci_commit_pins && cd ci_commit_pins && \
+    wget https://raw.githubusercontent.com/pytorch/pytorch/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
+RUN sudo bash -c "set -x;export OVERRIDE_GENCODE=\"${OVERRIDE_GENCODE}\" OVERRIDE_GENCODE_CUDNN=\"${OVERRIDE_GENCODE_CUDNN}\"; cd /workspace/pytorch-ci; bash install_cuda.sh 12.8"
+
+# Install miniconda
+RUN wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /workspace/Miniconda3-latest-Linux-x86_64.sh
+RUN cd /workspace && \
+    chmod +x Miniconda3-latest-Linux-x86_64.sh && \
+    bash ./Miniconda3-latest-Linux-x86_64.sh -b -u -p /workspace/miniconda3
+
+# Test activate miniconda
+RUN . /workspace/miniconda3/etc/profile.d/conda.sh && \
+    conda activate base && \
+    conda init
+
+RUN echo "\
+. /workspace/miniconda3/etc/profile.d/conda.sh\n\
+conda activate base\n\
+export CONDA_HOME=/workspace/miniconda3\n\
+export CUDA_HOME=/usr/local/cuda\n\
+export PATH=/home/runner/bin\${PATH:+:\${PATH}}\n\
+export LD_LIBRARY_PATH=\${CUDA_HOME}/lib64\${LD_LIBRARY_PATH:+:\${LD_LIBRARY_PATH}}\n\
+export LIBRARY_PATH=\${CUDA_HOME}/lib64\${LIBRARY_PATHPATH:+:\${LIBRARY_PATHPATH}}\n" >> /workspace/setup_instance.sh
+
+RUN echo ". /workspace/setup_instance.sh\n" >> ${HOME}/.bashrc
diff --git a/install.py b/install.py
@@ -62,15 +62,15 @@ def install_fbgemm(genai=True):
             sys.executable,
             "setup.py",
             "install",
-            "--package_variant=genai",
+            "--build-target=genai",
             "-DTORCH_CUDA_ARCH_LIST=8.0;9.0;9.0a",
         ]
     else:
         cmd = [
             sys.executable,
             "setup.py",
             "install",
-            "--package_variant=cuda",
+            "--build-target=cuda",
             "-DTORCH_CUDA_ARCH_LIST=8.0;9.0;9.0a",
         ]
     subprocess.check_call(cmd, cwd=str(FBGEMM_PATH.resolve()))
diff --git a/tritonbench/utils/run_utils.py b/tritonbench/utils/run_utils.py
@@ -91,11 +91,11 @@ def run_config(config_file: str):
     assert Path(config_file).exists(), f"Config file {config_file} must exist."
     with open(config_file, "r") as fp:
         config = yaml.safe_load(fp)
-    benchmarks = config["benchmarks"]
-    for benchmark in benchmarks:
-        op_args = benchmark["args"].split(" ")
-        benchmark_name = benchmark["benchmark_name"]
-        run_in_task(op=None, op_args=op_args, benchmark_name=benchmark_name)
+    for benchmark_name in config:
+        benchmark_config = config[benchmark_name]
+        op_name = benchmark_config["op"]
+        op_args = benchmark_config["args"].split(" ")
+        run_in_task(op=op_name, op_args=op_args, benchmark_name=benchmark_name)
 
 
 def run_in_task(