From d59abc4928b793395885e7eed8f800a333a94c2f Mon Sep 17 00:00:00 2001
From: Camyll Harajli <camyllh@meta.com>
Date: Mon, 14 Apr 2025 14:49:41 -0700
Subject: [PATCH 1/3] move workflow cli from testinfra to executorch

---
 extension/benchmark/benchmark_workflow_cli.py | 137 ++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 extension/benchmark/benchmark_workflow_cli.py

diff --git a/extension/benchmark/benchmark_workflow_cli.py b/extension/benchmark/benchmark_workflow_cli.py
new file mode 100644
index 00000000000..fde624c8cba
--- /dev/null
+++ b/extension/benchmark/benchmark_workflow_cli.py
@@ -0,0 +1,137 @@
+#!/usr/bin/env python3
+import json
+import logging
+import os
+from argparse import ArgumentParser
+from logging import info
+from re import A
+from typing import Any
+
+import requests
+
+
+GITHUB_TOKEN = os.environ.get("GITHUB_TOKEN")
+logging.basicConfig(level=logging.INFO)
+
+
+def parse_args() -> Any:
+    parser = ArgumentParser(
+        "Run Android and iOS tests on AWS Device Farm via github actions workflow run"
+    )
+
+    parser.add_argument(
+        "--branch",
+        type=str,
+        default="main",
+        required=False,
+        help="what gh branch to use in pytorch/executorch",
+    )
+
+    app_type = parser.add_mutually_exclusive_group(required=True)
+    app_type.add_argument(
+        "--android",
+        action="store_true",
+        required=False,
+        help="run the test on Android",
+    )
+    app_type.add_argument(
+        "--ios",
+        action="store_true",
+        required=False,
+        help="run the test on iOS",
+    )
+
+    parser.add_argument(
+        "--models",
+        type=str,
+        required=False,
+        default="llama",
+        help="the model to run on. Default is llama. See https://github.com/pytorch/executorch/blob/0342babc505bcb90244874e9ed9218d90dd67b87/examples/models/__init__.py#L53 for more model options",
+    )
+
+    parser.add_argument(
+        "--devices",
+        type=str,
+        required=False,
+        default="",
+        choices=[
+            "apple_iphone_15",
+            "apple_iphone_15+ios_18",
+            "samsung_galaxy_s22",
+            "samsung_galaxy_s24",
+            "google_pixel_8_pro",
+        ],
+        help="specific devices to run on. Default is s22 for android and iphone 15 for ios.",
+    )
+
+    parser.add_argument(
+        "--benchmark_configs",
+        type=str,
+        required=False,
+        choices=["xplat", "android", "ios"],
+        default="",
+        help="The list of configs used in the benchmark",
+    )
+
+    args, unknown = parser.parse_known_args()
+    if len(unknown) > 0:
+        info(f"detected unknown flags: {unknown}")
+    return args
+
+
+def run_workflow(app_type, branch, models, devices, benchmark_configs):
+    dispatch_hook = "/dispatches"
+    if app_type == "android":
+        url = f"https://api.github.com/repos/pytorch/executorch/actions/workflows/android-perf.yml"
+    else:
+        url = f"https://api.github.com/repos/pytorch/executorch/actions/workflows/apple-perf.yml"
+
+    headers = {
+        "Accept": "application/vnd.github.v3+json",
+        "Authorization": f"Bearer {GITHUB_TOKEN}",
+        "X-GitHub-Api-Version": "2022-11-28",
+    }
+
+    data = {
+        "ref": f"{branch}",
+        "inputs": {
+            "models": f"{models}",
+            "devices": f"{devices}",
+            "benchmark_configs": f"{benchmark_configs}",
+        },
+    }
+
+    resp = requests.post(url + dispatch_hook, headers=headers, data=json.dumps(data))
+    if resp.status_code != 204:
+        raise Exception(f"Failed to start workflow: {resp.text}")
+    else:
+        print("Workflow started successfully.")
+        if app_type == "android":
+            print(
+                "Find your workflow run here: https://github.com/pytorch/executorch/actions/workflows/android-perf.yml"
+            )
+        else:
+            print(
+                "Find your workflow run here: https://github.com/pytorch/executorch/actions/workflows/apple-perf.yml"
+            )
+
+
+def main() -> None:
+    args = parse_args()
+    app_type = None
+    if args.android:
+        app_type = "android"
+    elif args.ios:
+        app_type = "ios"
+    if app_type:
+        resp = run_workflow(
+            app_type, args.branch, args.models, args.devices, args.benchmark_configs
+        )
+    else:
+        raise Exception(
+            "No app type specified. Please specify either --android or --ios."
+        )
+
+
+if __name__ == "__main__":
+    main()

From ec3e3f140b8e2cc98e7532382b61e2a665b7ac8b Mon Sep 17 00:00:00 2001
From: Camyll Harajli <camyllh@meta.com>
Date: Mon, 21 Apr 2025 15:50:55 -0700
Subject: [PATCH 2/3] update readme and make new required arguments

---
 extension/benchmark/README.md                 | 13 +++++
 .../benchmark.py}                             | 52 +++++++++++--------
 2 files changed, 43 insertions(+), 22 deletions(-)
 rename extension/benchmark/{benchmark_workflow_cli.py => scripts/benchmark.py} (68%)

diff --git a/extension/benchmark/README.md b/extension/benchmark/README.md
index a9918864e9c..b22e453e4b2 100644
--- a/extension/benchmark/README.md
+++ b/extension/benchmark/README.md
@@ -51,11 +51,24 @@ The benchmarking infrastructure currently supports two major use-cases:
 
 ## Scheduling On-Demand Benchmarking
 
+### Via GitHub
 Users can schedule a benchmarking workflow on a pull request through GitHub Actions using the workflow dispatch UI. Follow the steps below to trigger benchmarking:
 1. Access `pytorch/executorch` repository on GitHub and navigate to the "Actions" tab.
 2. Select `android-perf` or `apple-perf` workflow from the list of workflows.
 3. Click "Run workflow" and fill in the required parameters for the model you want to benchmark, e.g. branch name, model name and delegate, and device pool, etc.
 
+### Via Command Line
+1. From this folder, navigate to `/scripts`.
+2. Make sure you have your GITHUB_TOKEN set (classic personal access token, not fine-grained) (`export GITHUB_TOKEN=<YOUR_TOKEN>`)
+3. Run `python benchmark.py --<platform>` with either `--android` or `--ios` as a required platform argument.
+    - Other **Required** arguments:
+        - `--branch`: Branch name to run the benchmark on. Ideally your local branch with changes committed. For example, `--branch main`
+        - `--models`: Comma-separated list of models to benchmark. For example, `--models llama2,metanet` (see [list](https://github.com/pytorch/executorch/blob/0342babc505bcb90244874e9ed9218d90dd67b87/examples/models/__init__.py#L53) for more options or use a valid huggingface model name, e.g. "meta-llama/Llama-3.2-1B")
+3. Use --help to see other optional arguments
+    - `--devices`: Comma-separated list of specific devices to run the benchmark on. Defaults to device pools for approriate platform. For example, `--devices samsung_galaxy_s22,samsung_galaxy_s24`
+    - `--benchmark-configs`: Comma-separated list of benchmark configs to use. For example, `--benchmark-configs xnnpack_q8,hf_xnnpack_fp32,llama3_fb16` (See [list](https://github.com/pytorch/executorch/blob/main/.ci/scripts/gather_benchmark_configs.py#L29-L47) for options)
+
+
 > **Note:** Write permission to the repo will be needed in order to run the on-demand workflow.
 
 
diff --git a/extension/benchmark/benchmark_workflow_cli.py b/extension/benchmark/scripts/benchmark.py
similarity index 68%
rename from extension/benchmark/benchmark_workflow_cli.py
rename to extension/benchmark/scripts/benchmark.py
index fde624c8cba..2087998943f 100644
--- a/extension/benchmark/benchmark_workflow_cli.py
+++ b/extension/benchmark/scripts/benchmark.py
@@ -5,6 +5,7 @@
 from argparse import ArgumentParser
 from logging import info
 from re import A
+from shutil import Error
 from typing import Any
 
 import requests
@@ -22,19 +23,18 @@ def parse_args() -> Any:
     parser.add_argument(
         "--branch",
         type=str,
-        default="main",
-        required=False,
-        help="what gh branch to use in pytorch/executorch",
+        required=True,
+        help="what (non-fork) gh branch to use in pytorch/executorch",
     )
 
-    app_type = parser.add_mutually_exclusive_group(required=True)
-    app_type.add_argument(
+    platform = parser.add_mutually_exclusive_group(required=True)
+    platform.add_argument(
         "--android",
         action="store_true",
         required=False,
         help="run the test on Android",
     )
-    app_type.add_argument(
+    platform.add_argument(
         "--ios",
         action="store_true",
         required=False,
@@ -42,11 +42,10 @@ def parse_args() -> Any:
     )
 
     parser.add_argument(
-        "--models",
+        "--modesl",
         type=str,
-        required=False,
-        default="llama",
-        help="the model to run on. Default is llama. See https://github.com/pytorch/executorch/blob/0342babc505bcb90244874e9ed9218d90dd67b87/examples/models/__init__.py#L53 for more model options",
+        required=True,
+        help='Comma separated list of Models for benchmarking. Model options: https://github.com/pytorch/executorch/blob/0342babc505bcb90244874e9ed9218d90dd67b87/examples/models/__init__.py#L53 or ok to use HuggingFace model name, e.g. "meta-llama/Llama-3.2-1B"',
     )
 
     parser.add_argument(
@@ -68,7 +67,17 @@ def parse_args() -> Any:
         "--benchmark_configs",
         type=str,
         required=False,
-        choices=["xplat", "android", "ios"],
+        choices=[
+            "xnnpack_q8",
+            "hf_xnnpack_fp32",
+            "llama3_fb16",
+            "llama3_spinquant",
+            "llama3_qlora",
+            "qnn_q8",
+            "coreml_fp16",
+            "mps",
+            "llama3_coreml_ane",
+        ],
         default="",
         help="The list of configs used in the benchmark",
     )
@@ -79,13 +88,14 @@ def parse_args() -> Any:
     return args
 
 
-def run_workflow(app_type, branch, models, devices, benchmark_configs):
+def run_workflow(platform, branch, models, devices, benchmark_configs):
     dispatch_hook = "/dispatches"
-    if app_type == "android":
+    if platform == "android":
         url = f"https://api.github.com/repos/pytorch/executorch/actions/workflows/android-perf.yml"
     else:
         url = f"https://api.github.com/repos/pytorch/executorch/actions/workflows/apple-perf.yml"
 
+    # see github workflow dispatch for header details https://docs.github.com/en/rest/actions/workflows#create-a-workflow-dispatch-event
     headers = {
         "Accept": "application/vnd.github.v3+json",
         "Authorization": f"Bearer {GITHUB_TOKEN}",
@@ -106,7 +116,7 @@ def run_workflow(app_type, branch, models, devices, benchmark_configs):
         raise Exception(f"Failed to start workflow: {resp.text}")
     else:
         print("Workflow started successfully.")
-        if app_type == "android":
+        if platform == "android":
             print(
                 "Find your workflow run here: https://github.com/pytorch/executorch/actions/workflows/android-perf.yml"
             )
@@ -118,19 +128,17 @@ def run_workflow(app_type, branch, models, devices, benchmark_configs):
 
 def main() -> None:
     args = parse_args()
-    app_type = None
+    platform = None
     if args.android:
-        app_type = "android"
+        platform = "android"
     elif args.ios:
-        app_type = "ios"
-    if app_type:
+        platform = "ios"
+    if platform:
         resp = run_workflow(
-            app_type, args.branch, args.models, args.devices, args.benchmark_configs
+            platform, args.branch, args.models, args.devices, args.benchmark_configs
         )
     else:
-        raise Exception(
-            "No app type specified. Please specify either --android or --ios."
-        )
+        raise Error("No app type specified. Please specify either --android or --ios.")
 
 
 if __name__ == "__main__":

From c7f9ea812b9039df245c73b813bdede0bcbf99a2 Mon Sep 17 00:00:00 2001
From: Camyll Harajli <camyllh@meta.com>
Date: Mon, 21 Apr 2025 16:11:08 -0700
Subject: [PATCH 3/3] fix typos

---
 extension/benchmark/scripts/benchmark.py | 27 +++++-------------------
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/extension/benchmark/scripts/benchmark.py b/extension/benchmark/scripts/benchmark.py
index 2087998943f..225c753c91b 100644
--- a/extension/benchmark/scripts/benchmark.py
+++ b/extension/benchmark/scripts/benchmark.py
@@ -42,7 +42,7 @@ def parse_args() -> Any:
     )
 
     parser.add_argument(
-        "--modesl",
+        "--models",
         type=str,
         required=True,
         help='Comma separated list of Models for benchmarking. Model options: https://github.com/pytorch/executorch/blob/0342babc505bcb90244874e9ed9218d90dd67b87/examples/models/__init__.py#L53 or ok to use HuggingFace model name, e.g. "meta-llama/Llama-3.2-1B"',
@@ -53,33 +53,16 @@ def parse_args() -> Any:
         type=str,
         required=False,
         default="",
-        choices=[
-            "apple_iphone_15",
-            "apple_iphone_15+ios_18",
-            "samsung_galaxy_s22",
-            "samsung_galaxy_s24",
-            "google_pixel_8_pro",
-        ],
-        help="specific devices to run on. Default is s22 for android and iphone 15 for ios.",
+        # TODO update example or add choices once we establish custom device pools
+        help="Comma-separated list of specific devices to run the benchmark on. Defaults to device pools for approriate platform. For example, `--devices samsung_galaxy_s22,samsung_galaxy_s24`.",
     )
 
     parser.add_argument(
-        "--benchmark_configs",
+        "--benchmark-configs",
         type=str,
         required=False,
-        choices=[
-            "xnnpack_q8",
-            "hf_xnnpack_fp32",
-            "llama3_fb16",
-            "llama3_spinquant",
-            "llama3_qlora",
-            "qnn_q8",
-            "coreml_fp16",
-            "mps",
-            "llama3_coreml_ane",
-        ],
         default="",
-        help="The list of configs used in the benchmark",
+        help="Comma-separated list of benchmark configs to use. For example, `--benchmark-configs xnnpack_q8,hf_xnnpack_fp32,llama3_fb16` (See https://github.com/pytorch/executorch/blob/main/.ci/scripts/gather_benchmark_configs.py#L29-L47 for options)",
     )
 
     args, unknown = parser.parse_known_args()