trunk #12508
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: trunk | |
on: | |
push: | |
branches: | |
- main | |
- release/* | |
tags: | |
- ciflow/trunk/* | |
pull_request: | |
paths: | |
- .ci/docker/ci_commit_pins/pytorch.txt | |
- .ci/scripts/** | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ github.event_name == 'workflow_dispatch' }}-${{ github.event_name == 'schedule' }} | |
cancel-in-progress: true | |
jobs: | |
test-models-macos: | |
name: test-models-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
# Mac runners are expensive and limited, and non reliable. | |
# Do some basic testing for macos jobs, and rely mostly on | |
# test-models-linux-aarch64 job instead. | |
model: [emformer_join, ic4, llama2, mobilebert, mv3, resnet50, vit, w2l] | |
backend: [xnnpack-quantization-delegation] | |
include: | |
- model: efficient_sam | |
backend: portable | |
- model: llama | |
backend: portable | |
- model: llama3_2_vision_encoder | |
backend: portable | |
- model: mv3 | |
backend: portable | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL=cmake | |
BACKEND=${{ matrix.backend }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" | |
test-models-linux-aarch64: | |
name: test-models-linux-aarch64 | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
model: [linear, add, add_mul, ic3, ic4, mv2, mv3, resnet18, resnet50, vit, w2l, mobilebert, emformer_join, emformer_transcribe] | |
backend: [portable, xnnpack-quantization-delegation] | |
runner: [linux.arm64.2xlarge] | |
include: | |
- model: lstm | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: mul | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: softmax | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: phi_4_mini | |
backend: portable | |
runner: linux.arm64.m7g.4xlarge | |
- model: qwen2_5 | |
backend: portable | |
runner: linux.arm64.2xlarge | |
- model: llama3_2_vision_encoder | |
backend: portable | |
runner: linux.arm64.2xlarge | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
MODEL_NAME=${{ matrix.model }} | |
BUILD_TOOL="cmake" | |
BACKEND=${{ matrix.backend }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Build and test ExecuTorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "${BACKEND}" | |
test-custom-ops-macos: | |
name: test-custom-ops-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test custom ops | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/portable/custom_ops/test_custom_ops.sh "${BUILD_TOOL}" | |
test-selective-build-macos: | |
name: test-selective-build-macos | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
include: | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
BUILD_TOOL=${{ matrix.build-tool }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test selective build | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/selective_build/test_selective_build.sh "${BUILD_TOOL}" | |
test-demo-backend-delegation: | |
name: test-demo-backend-delegation | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
include: | |
- build-tool: buck2 | |
- build-tool: cmake | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL=${{ matrix.build-tool }} | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Test selective build | |
PYTHON_EXECUTABLE=python bash examples/portable/scripts/test_demo_backend_delegation.sh "${BUILD_TOOL}" | |
test-arm-backend: | |
name: test-arm-backend | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
include: | |
- test_arm_baremetal: test_pytest_ops_ethosu_fvp | |
- test_arm_baremetal: test_pytest_models_ethosu_fvp | |
- test_arm_baremetal: test_run_ethosu_fvp | |
- test_arm_baremetal: test_models_tosa | |
- test_arm_baremetal: test_models_ethos-u55 | |
- test_arm_baremetal: test_models_ethos-u85 | |
fail-fast: false | |
with: | |
runner: linux.2xlarge.memory | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch "--use-pt-pinned-commit" | |
.ci/scripts/setup-arm-baremetal-tools.sh | |
# Increase number of files user can monitor to bypass buck failures. | |
# Hopefully this is high enough for this setup. | |
sudo sysctl fs.inotify.max_user_watches=1048576 # 1024 * 1024 | |
ARM_TEST=${{ matrix.test_arm_baremetal }} | |
# Test test_arm_baremetal.sh with test | |
backends/arm/test/test_arm_baremetal.sh "${ARM_TEST}" | |
test-arm-cortex-m-size-test: | |
name: test-arm-cortex-m-size-test | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-arm-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
source .ci/scripts/utils.sh | |
install_executorch "--use-pt-pinned-commit" | |
.ci/scripts/setup-arm-baremetal-tools.sh | |
source examples/arm/ethos-u-scratch/setup_path.sh | |
# User baremetal toolchain | |
arm-none-eabi-c++ --version | |
toolchain_cmake=examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake | |
toolchain_cmake=$(realpath ${toolchain_cmake}) | |
# Build and test size test | |
bash test/build_size_test.sh "-DCMAKE_TOOLCHAIN_FILE=${toolchain_cmake} -DEXECUTORCH_BUILD_ARM_BAREMETAL=ON" | |
elf="cmake-out/test/size_test" | |
# Dump basic info | |
ls -al ${elf} | |
arm-none-eabi-size ${elf} | |
# Dump symbols | |
python .github/scripts/run_nm.py -e ${elf} | |
python .github/scripts/run_nm.py -e ${elf} -f "executorch" -p "arm-none-eabi-" | |
python .github/scripts/run_nm.py -e ${elf} -f "executorch_text" -p "arm-none-eabi-" | |
# Add basic guard - TODO: refine this! | |
arm-none-eabi-strip ${elf} | |
output=$(ls -la ${elf}) | |
arr=($output) | |
size=${arr[4]} | |
threshold="103268" # ~100KiB | |
echo "size: $size, threshold: $threshold" | |
if [[ "$size" -le "$threshold" ]]; then | |
echo "Success $size <= $threshold" | |
else | |
echo "Fail $size > $threshold" | |
exit 1 | |
fi | |
test-coreml-delegate: | |
name: test-coreml-delegate | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-latest-xlarge | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# Build and test coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/build_all.sh | |
test-static-llama-ane: | |
name: test-static-llama-ane | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
bash .ci/scripts/setup-conda.sh | |
eval "$(conda shell.bash hook)" | |
# Install requirements | |
${CONDA_RUN} sh install_requirements.sh | |
${CONDA_RUN} sh backends/apple/coreml/scripts/install_requirements.sh | |
${CONDA_RUN} python install_executorch.py | |
${CONDA_RUN} sh examples/models/llama/install_requirements.sh | |
# Test ANE llama | |
${CONDA_RUN} sh .ci/scripts/test_ane_static_llama.sh | |
test-llama-torchao-lowbit: | |
name: test-llama-torchao-lowbit | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
script: | | |
set -eux | |
bash .ci/scripts/setup-conda.sh | |
eval "$(conda shell.bash hook)" | |
# Install requirements | |
${CONDA_RUN} python install_executorch.py | |
${CONDA_RUN} sh examples/models/llama/install_requirements.sh | |
# Run test | |
${CONDA_RUN} sh .ci/scripts/test_llama_torchao_lowbit.sh | |
test-llama-runner-linux: | |
# Test Both linux x86 and linux aarch64 | |
name: test-llama-runner-linux | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [portable, xnnpack+custom] | |
runner: [linux.2xlarge, linux.arm64.2xlarge] | |
docker-image: [executorch-ubuntu-22.04-clang12, executorch-ubuntu-22.04-gcc11-aarch64] | |
include: | |
- dtype: bf16 | |
mode: portable | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
- dtype: bf16 | |
mode: portable | |
runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
- dtype: bf16 | |
mode: custom | |
runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
# Excluding specific runner + docker image combinations that don't make sense: | |
# - Excluding the ARM64 gcc image on the x86 runner (linux.2xlarge) | |
# - Excluding the x86 clang image on the ARM64 runner (linux.arm64.2xlarge) | |
exclude: | |
- runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-gcc11-aarch64 | |
- runner: linux.arm64.2xlarge | |
docker-image: executorch-ubuntu-22.04-clang12 | |
fail-fast: false | |
with: | |
runner: ${{ matrix.runner }} | |
docker-image: ${{ matrix.docker-image }} | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
DTYPE=${{ matrix.dtype }} | |
BUILD_TOOL="cmake" | |
MODE=${{ matrix.mode }} | |
ARTIFACTS_DIR_NAME="artifacts-to-be-uploaded/${DTYPE}-${MODE}" | |
ARTIFACTS_DIR_NAME="${ARTIFACTS_DIR_NAME/+/-}" | |
# Setup executorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -dtype "${DTYPE}" -mode "${MODE}" -upload "${ARTIFACTS_DIR_NAME}" | |
test-llama-runner-macos: | |
name: test-llama-runner-mac | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
matrix: | |
dtype: [fp32] | |
mode: [mps, coreml, xnnpack+custom+quantize_kv] | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
bash .ci/scripts/setup-conda.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool cmake | |
if [[ "${MODE}" == "coreml" ]]; then | |
# Install coreml delegate | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
fi | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llama.sh -model stories110M -build_tool cmake -dtype "${DTYPE}" -mode "${MODE}" | |
# # TODO(jackzhxng): Runner consistently runs out of memory before test finishes. Try to find a more powerful runner. | |
# test-llava-runner-macos: | |
# name: test-llava-runner-macos | |
# uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
# strategy: | |
# fail-fast: false | |
# with: | |
# runner: macos-14-xlarge | |
# python-version: '3.11' | |
# submodules: 'recursive' | |
# ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
# timeout: 900 | |
# script: | | |
# BUILD_TOOL=cmake | |
# bash .ci/scripts/setup-conda.sh | |
# # Setup MacOS dependencies as there is no Docker support on MacOS atm | |
# GITHUB_RUNNER=1 PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
# # install Llava requirements | |
# ${CONDA_RUN} bash examples/models/llama/install_requirements.sh | |
# ${CONDA_RUN} bash examples/models/llava/install_requirements.sh | |
# # run python unittest | |
# ${CONDA_RUN} python -m unittest examples.models.llava.test.test_llava | |
# # run e2e (export, tokenizer and runner) | |
# PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_llava.sh | |
test-qnn-model: | |
name: test-qnn-model | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
model: [dl3, mv3, mv2, ic4, ic3, vit, mb, w2l] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn" | |
test-apple-model: | |
name: test-apple-model | |
uses: pytorch/test-infra/.github/workflows/macos_job.yml@main | |
strategy: | |
fail-fast: false | |
with: | |
runner: macos-m1-stable | |
python-version: '3.11' | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
script: | | |
BUILD_TOOL=cmake | |
bash .ci/scripts/setup-conda.sh | |
# Setup MacOS dependencies as there is no Docker support on MacOS atm | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/setup-macos.sh --build-tool "${BUILD_TOOL}" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash backends/apple/coreml/scripts/install_requirements.sh | |
echo "Finishing installing coreml." | |
# Build and test coreml model | |
MODELS=(mv3 ic4 resnet50 edsr mobilebert w2l) | |
for MODEL_NAME in "${MODELS[@]}"; do | |
echo "::group::Exporting coreml model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "coreml" | |
echo "::endgroup::" | |
echo "::group::Exporting mps model: $MODEL_NAME" | |
PYTHON_EXECUTABLE=python ${CONDA_RUN} bash .ci/scripts/test_model.sh "${MODEL_NAME}" "${BUILD_TOOL}" "mps" | |
echo "::endgroup::" | |
done | |
test-huggingface-transformers: | |
# NB: Don't run this on fork PRs because they won't have access to the secret and would fail anyway | |
if: ${{ !github.event.pull_request.head.repo.fork }} | |
name: test-huggingface-transformers | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
secrets: inherit | |
strategy: | |
matrix: | |
hf_model_id: [ | |
google/gemma-3-1b-it, | |
Qwen/Qwen3-0.6B, | |
HuggingFaceTB/SmolLM2-135M, | |
meta-llama/Llama-3.2-1B, | |
allenai/OLMo-1B-hf, | |
] | |
fail-fast: false | |
with: | |
secrets-env: EXECUTORCH_HF_TOKEN | |
runner: linux.2xlarge.memory | |
docker-image: executorch-ubuntu-22.04-clang12 | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 90 | |
upload-artifact: profiling-artifacts-${{ strategy.job-index }} | |
script: | | |
echo "::group::Set up ExecuTorch" | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake | |
# Build executor_runner with ETdump enabled | |
PYTHON_EXECUTABLE=python cmake -DPYTHON_EXECUTABLE=python \ | |
-DCMAKE_INSTALL_PREFIX=cmake-out \ | |
-DEXECUTORCH_ENABLE_LOGGING=1 \ | |
-DCMAKE_BUILD_TYPE=Release \ | |
-DEXECUTORCH_BUILD_EXTENSION_DATA_LOADER=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_FLAT_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_MODULE=ON \ | |
-DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON \ | |
-DEXECUTORCH_BUILD_XNNPACK=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_OPTIMIZED=ON \ | |
-DEXECUTORCH_BUILD_KERNELS_CUSTOM=ON \ | |
-DEXECUTORCH_BUILD_DEVTOOLS=ON \ | |
-DEXECUTORCH_ENABLE_EVENT_TRACER=ON \ | |
-Bcmake-out . | |
cmake --build cmake-out -j16 --target install --config Release | |
echo "::endgroup::" | |
echo "::group::Set up Hugging Face" | |
pip install -U "huggingface_hub[cli]" | |
huggingface-cli login --token $SECRET_EXECUTORCH_HF_TOKEN | |
git clone https://github.com/huggingface/optimum-executorch | |
pushd optimum-executorch | |
# There is no release yet, for CI stability, always test from the same commit on main | |
git checkout 1c653dc49812fc431a22312c7295d97005d22e12 | |
pip install .[tests] | |
pip install transformers==4.52.4 | |
popd | |
pip list | |
echo "::endgroup::" | |
echo "::group::Export to ExecuTorch" | |
# Pass matrix variable as environment variable | |
export MODEL_ID="${{ matrix.hf_model_id }}" | |
export OUTPUT_DIR="$(pwd)/${MODEL_ID}_custom_sdpa_kv_cache_8da4w" | |
pushd optimum-executorch | |
ARGS=( | |
"--model" "${MODEL_ID}" | |
"--task" "text-generation" | |
"--recipe" "xnnpack" | |
"--use_custom_sdpa" | |
"--qlinear" | |
"--qembedding" | |
"--output_dir" "${OUTPUT_DIR}" | |
) | |
# Add conditional arguments based on model | |
case "${MODEL_ID}" in | |
*"google/gemma-3-1b-it"*) | |
echo "--use_custom_kv_cache can not be used for HybridCache" | |
;; | |
*) | |
ARGS+=("--use_custom_kv_cache") | |
;; | |
esac | |
optimum-cli export executorch "${ARGS[@]}" | |
ls -FlAGhp ${OUTPUT_DIR} | |
popd | |
echo "::endgroup::" | |
echo "::group::Inference using python API" | |
pushd optimum-executorch | |
python -c " | |
import os | |
from optimum.executorch import ExecuTorchModelForCausalLM | |
from transformers import AutoTokenizer | |
model_id = os.getenv('MODEL_ID') | |
pte_dir = os.getenv('OUTPUT_DIR') | |
print(f'Loading model {model_id} from {pte_dir}.') | |
model = ExecuTorchModelForCausalLM.from_pretrained(pte_dir) | |
generated_text = model.text_generation( | |
tokenizer=AutoTokenizer.from_pretrained(model_id), | |
prompt='Simply put, the theory of relativity states that', | |
max_seq_len=64 | |
) | |
print(generated_text) | |
" | |
popd | |
echo "::endgroup::" | |
echo "::group::Inference using executor_runner with ETDump" | |
./cmake-out/executor_runner \ | |
--model_path ${OUTPUT_DIR}/model.pte \ | |
--etdump_path ${OUTPUT_DIR}/etdump.etdp | |
export TSV_PATH=artifacts-to-be-uploaded/${MODEL_ID}_op_prof.tsv | |
mkdir -p $(dirname "$TSV_PATH") | |
python3 -m devtools.inspector.inspector_cli \ | |
--etdump_path ${OUTPUT_DIR}/etdump.etdp \ | |
--tsv_path ${TSV_PATH} | |
echo "::endgroup::" | |
test-llama-runner-qnn-linux: | |
name: test-llama-runner-qnn-linux | |
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main | |
permissions: | |
id-token: write | |
contents: read | |
strategy: | |
matrix: | |
dtype: [fp32] | |
pt2e_quantize: [qnn_16a16w, qnn_8a8w] | |
mode: [qnn] | |
fail-fast: false | |
with: | |
runner: linux.2xlarge | |
docker-image: executorch-ubuntu-22.04-qnn-sdk | |
submodules: 'recursive' | |
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }} | |
timeout: 900 | |
script: | | |
# The generic Linux job chooses to use base env, not the one setup by the image | |
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]") | |
conda activate "${CONDA_ENV}" | |
BUILD_TOOL="cmake" | |
DTYPE=${{ matrix.dtype }} | |
MODE=${{ matrix.mode }} | |
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }} | |
./install_requirements.sh --use-pt-pinned-commit | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh | |
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh | |
# Setup executorch | |
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool "${BUILD_TOOL}" | |
# Install requirements for export_llama | |
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh | |
# Test llama2 | |
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}" | |
unittest-release: | |
uses: ./.github/workflows/_unittest.yml | |
permissions: | |
id-token: write | |
contents: read | |
with: | |
build-mode: Release | |
build-tool: cmake | |
docker-image: executorch-ubuntu-22.04-clang12 |