diff --git a/.github/scripts/fbgemm_gpu_install.bash b/.github/scripts/fbgemm_gpu_install.bash index b85af9214a..f6b525eeba 100644 --- a/.github/scripts/fbgemm_gpu_install.bash +++ b/.github/scripts/fbgemm_gpu_install.bash @@ -176,6 +176,7 @@ __fbgemm_gpu_post_install_checks () { install_fbgemm_gpu_wheel () { local env_name="$1" local wheel_path="$2" + ls -ls "${wheel_path}" if [ "$wheel_path" == "" ]; then echo "Usage: ${FUNCNAME[0]} ENV_NAME WHEEL_NAME" echo "Example(s):" diff --git a/.github/scripts/test_torchrec.bash b/.github/scripts/test_torchrec.bash old mode 100644 new mode 100755 index 7d406ed409..0e6d0c01d2 --- a/.github/scripts/test_torchrec.bash +++ b/.github/scripts/test_torchrec.bash @@ -8,20 +8,15 @@ # Exit on failure set -e -# shellcheck source=/dev/null -. "$(dirname "$(realpath -s "$0")")/setup_env.bash" - verbose=0 -env_name=test_binary torchrec_package_name="" python_version="" cuda_version="x" -fbgemm_wheel_path="x" miniconda_prefix="${HOME}/miniconda" usage () { # shellcheck disable=SC2086 - echo "Usage: bash $(basename ${BASH_SOURCE[0]}) -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION -w FBGEMM_WHEEL_PATH [-m MINICONDA_PREFIX] [-v] [-h]" + echo "Usage: bash $(basename ${BASH_SOURCE[0]}) -o PACKAGE_NAME -p PYTHON_VERSION -P PYTORCH_CHANNEL_NAME -c CUDA_VERSION [-m MINICONDA_PREFIX] [-v] [-h]" echo "-v : verbose" echo "-h : help" echo "PACKAGE_NAME : output package name of TorchRec (e.g., torchrec_nightly)" @@ -30,14 +25,13 @@ usage () { echo "PYTHON_VERSION : Python version (e.g., 3.10)" echo "PYTORCH_CHANNEL_NAME: PyTorch's channel name (e.g., pytorch-nightly, pytorch-test (=pre-release), pytorch (=stable release))" echo "CUDA_VERSION : PyTorch's CUDA version (e.g., 12.4)" - echo "FBGEMM_WHEEL_PATH : path to FBGEMM_GPU's wheel file" echo "MINICONDA_PREFIX : path to install Miniconda (default: \$HOME/miniconda)" echo "Example: Python 3.10 + PyTorch nightly (CUDA 12.4), install miniconda at \$HOME/miniconda, using dist/fbgemm_gpu_nightly.whl" # shellcheck disable=SC2086 echo " bash $(basename ${BASH_SOURCE[0]}) -v -o torchrec_nightly -p 3.10 -P pytorch-nightly -c 11.7 -w dist/fbgemm_gpu_nightly.whl" } -while getopts vho:p:P:c:m:w: flag +while getopts vho:p:P:c:m:b: flag do case "$flag" in v) verbose="1";; @@ -46,7 +40,7 @@ do P) pytorch_channel_name="${OPTARG}";; c) cuda_version="${OPTARG}";; m) miniconda_prefix="${OPTARG}";; - w) fbgemm_wheel_path="${OPTARG}";; + b) build_env="${OPTARG}";; h) usage exit 0;; *) usage @@ -54,10 +48,12 @@ do esac done -if [ "$torchrec_package_name" == "" ] || [ "$python_version" == "" ] || [ "$cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$fbgemm_wheel_path" == "" ]; then +if [ "$torchrec_package_name" == "" ] || [ "$python_version" == "" ] || [ "$cuda_version" == "x" ] || [ "$miniconda_prefix" == "" ] || [ "$pytorch_channel_name" == "" ] || [ "$build_env" == "" ]; then usage exit 1 fi + +env_name=$build_env python_tag="${python_version//\./}" if [ "$verbose" == "1" ]; then @@ -74,44 +70,60 @@ if [ ! -d "torchrec" ]; then exit 1 fi -################################################################################ -echo "## 1. Set up Miniconda" -################################################################################ - -setup_miniconda "$miniconda_prefix" +# Install PyTorch +conda run -n "$env_name" pip install torch --index-url https://download.pytorch.org/whl/nightly/cpu +conda run -n "$env_name" python -c "import torch" -################################################################################ -echo "## 2. Create Conda environment" -################################################################################ +# Import torch.distributed +conda run -n "$env_name" python -c "import torch.distributed" -if [ "${cuda_version}" != "" ]; then - pytorch_variant="cuda ${cuda_version}" -else - pytorch_variant="cpu" -fi +# Import fbgemm_gpu -# shellcheck disable=SC2086 -test_setup_conda_environment "$env_name" gcc "$python_version" pip "$pytorch_channel_name" $pytorch_variant +echo "[INSTALL] Installing FBGEMM-GPU wheel: ${wheel_path} ..." +# cd ../fbgemm_gpu +# conda run --no-capture-output -n "$env_name" python -m pip install -r requirements.txt +# conda run -n "$env_name" python setup.py clean +# conda run -n "$env_name" python setup.py bdist_wheel --python-tag="py${python_tag}" +# conda run -n "$env_name" pip install fbgemm-gpu ../*.whl +conda run -n "$env_name" python -c "import fbgemm_gpu" +# cd ../torchrec -# Comment out FBGEMM_GPU since we will install it from "$fbgemm_wheel_path" +################################################################################ +echo "## 1. Install TorchRec Requirements" +################################################################################ +# Comment out FBGEMM_GPU since we should pre-install it from the downloaded wheel file sed -i 's/fbgemm-gpu/#fbgemm-gpu/g' requirements.txt conda run -n "$env_name" python -m pip install -r requirements.txt -# Install FBGEMM_GPU from a local wheel file. -conda run -n "$env_name" python -m pip install "$fbgemm_wheel_path" -conda run -n "$env_name" python -c "import fbgemm_gpu" + ################################################################################ -echo "## 3. Build TorchRec" +echo "## 2. Build TorchRec" ################################################################################ rm -rf dist -conda run -n "$env_name" python setup.py bdist_wheel --package_name "${torchrec_package_name}" --python-tag="py${python_tag}" +conda run -n "$env_name" python setup.py bdist_wheel --python-tag="py${python_tag}" ################################################################################ -echo "## 4. Import TorchRec" +echo "## 3. Import TorchRec" ################################################################################ -conda run -n "$env_name" python -m pip install dist/"${torchrec_package_name}"*.whl conda run -n "$env_name" python -c "import torchrec" echo "Test succeeded" + +################################################################################ +echo "## 4. Run TorchRec tests" +################################################################################ + +conda install -n "$env_name" -y pytest +# Read the list of tests to skip from a file, ignoring empty lines and comments +skip_expression=$(awk '!/^($|#)/ {printf " and not %s", $0}' ./.github/scripts/tests_to_skip.txt) +# Check if skip_expression is effectively empty +if [ -z "$skip_expression" ]; then + skip_expression="" +else + skip_expression=${skip_expression:5} # Remove the leading " and " +fi +conda run -n "$env_name" \ + python -m pytest torchrec -v -s -W ignore::pytest.PytestCollectionWarning --continue-on-collection-errors \ + --ignore-glob=**/test_utils/ -k "$skip_expression" diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml index 35c7add775..58fd4c306c 100644 --- a/.github/workflows/fbgemm_gpu_ci_cpu.yml +++ b/.github/workflows/fbgemm_gpu_ci_cpu.yml @@ -200,3 +200,80 @@ jobs: env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl + + + # Run torchrec CPU tests + torchrec_cpu_tests: + if: ${{ github.repository_owner == 'pytorch' }} + runs-on: ${{ matrix.host-machine.instance }} + container: + image: amazonlinux:2023 + options: --user root + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: build_binary + BUILD_TARGET: ${{ matrix.build-target }} + BUILD_VARIANT: cpu + strategy: + fail-fast: false + matrix: + host-machine: [ + # { arch: arm, instance: "linux.arm64.2xlarge", timeout: 30 }, + { arch: x86, instance: "linux.4xlarge", timeout: 20 }, + ] + build-target: [ "default" ] + python-version: [ "3.9", "3.10", "3.11", "3.12", "3.13" ] + compiler: [ "gcc", "clang" ] + needs: build_artifact + + steps: + - name: Setup Build Container + run: yum update -y; yum install -y binutils findutils git pciutils sudo wget which + + - name: Checkout the Repository + uses: actions/checkout@v4 + + - name: Display System Info + run: . $PRELUDE; print_system_info; print_ec2_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install C/C++ Compilers for Updated LIBGCC + run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }} + + - name: Install PyTorch-CPU Nightly + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ (github.event_name == 'workflow_dispatch' && github.event.inputs.pytorch_channel_version) || 'nightly' }} cpu + + - name: Collect PyTorch Environment Info + if: ${{ success() || failure() }} + run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi + + - name: Download Wheel Artifact from GHA + uses: actions/download-artifact@v4 + with: + name: fbgemm_${{ matrix.build-target }}_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cpu.whl + + - name: Prepare FBGEMM_GPU Build + run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV + + - name: Install FBGEMM_GPU Wheel + run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl + + - name: Clone torchrec + uses: actions/checkout@v4 + with: + repository: pytorch/torchrec + path: torchrec + + - name: Run torchrec CPU tests + run: . $PRELUDE; cd torchrec; ../.github/scripts/test_torchrec.bash -o torchrec_nightly -p ${{ matrix.python-version }} -P pytorch-test -c ${{ matrix.compiler }} -b $BUILD_ENV -v 1