Skip to content

Commit 54556c6

Browse files
committed
Use Nova workflow to host all published wheel files at PyTorch site (#2016)
Summary: Pull Request resolved: #2016 To alleviate CUDA version mismatch issues, we aim to publish fbgemm-gpu-nightly with different CUDA versions. This diff uses Nova workflow and will host the published wheels at PyTorch site instead. #1947 Reviewed By: q10 Differential Revision: D49258503 fbshipit-source-id: a06d095b0c03df62d8cea8fb8db1b5018c9a9dd7
1 parent 2cbcb6e commit 54556c6

File tree

8 files changed

+213
-17
lines changed

8 files changed

+213
-17
lines changed

.github/scripts/fbgemm_gpu_build.bash

Lines changed: 21 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ prepare_fbgemm_gpu_build () {
3939
git submodule update --init --recursive
4040

4141
echo "[BUILD] Installing other build dependencies ..."
42-
(exec_with_retries conda run -n "${env_name}" python -m pip install -r requirements.txt) || return 1
42+
(exec_with_retries conda run --no-capture-output -n "${env_name}" python -m pip install -r requirements.txt) || return 1
4343

4444
(test_python_import_package "${env_name}" numpy) || return 1
4545
(test_python_import_package "${env_name}" skbuild) || return 1
@@ -122,7 +122,7 @@ __configure_fbgemm_gpu_build_cuda () {
122122
# Build only CUDA 7.0 and 8.0 (i.e. V100 and A100) because of 100 MB binary size limits from PyPI.
123123
echo "[BUILD] Setting CUDA build args ..."
124124
# shellcheck disable=SC2155
125-
local nvml_lib_path=$(conda run -n "${env_name}" printenv NVML_LIB_PATH)
125+
local nvml_lib_path=$(conda run --no-capture-output -n "${env_name}" printenv NVML_LIB_PATH)
126126
build_args=(
127127
--nvml_lib_path="${nvml_lib_path}"
128128
-DTORCH_CUDA_ARCH_LIST="'${arch_list}'"
@@ -193,15 +193,15 @@ __build_fbgemm_gpu_common_pre_steps () {
193193

194194
# Extract the Python tag
195195
# shellcheck disable=SC2207
196-
python_version=($(conda run -n "${env_name}" python --version))
196+
python_version=($(conda run --no-capture-output -n "${env_name}" python --version))
197197
# shellcheck disable=SC2206
198198
python_version_arr=(${python_version[1]//./ })
199199
python_tag="py${python_version_arr[0]}${python_version_arr[1]}"
200200
echo "[BUILD] Extracted Python tag: ${python_tag}"
201201

202202
echo "[BUILD] Running pre-build cleanups ..."
203203
print_exec rm -rf dist
204-
print_exec conda run -n "${env_name}" python setup.py clean
204+
print_exec conda run --no-capture-output -n "${env_name}" python setup.py clean
205205

206206
echo "[BUILD] Printing git status ..."
207207
print_exec git status
@@ -305,10 +305,23 @@ build_fbgemm_gpu_package () {
305305
# See https://github.com/pypa/manylinux
306306
local plat_name="manylinux2014_${MACHINE_NAME}"
307307

308+
echo "[BUILD] Checking arch_list = ${arch_list}"
309+
echo "[BUILD] Checking build_args:"
310+
echo "${build_args[@]}"
311+
312+
core=$(lscpu | grep "Core(s)" | awk '{print $NF}') && echo "core = ${core}" || echo "core not found"
313+
sockets=$(lscpu | grep "Socket(s)" | awk '{print $NF}') && echo "sockets = ${sockets}" || echo "sockets not found"
314+
re='^[0-9]+$'
315+
run_multicore=""
316+
if [[ $core =~ $re && $sockets =~ $re ]] ; then
317+
n_core=$((core * sockets))
318+
run_multicore=" -j ${n_core}"
319+
fi
320+
308321
# Distribute Python extensions as wheels on Linux
309322
echo "[BUILD] Building FBGEMM-GPU wheel (VARIANT=${fbgemm_variant}) ..."
310-
print_exec conda run -n "${env_name}" \
311-
python setup.py bdist_wheel \
323+
print_exec conda run --no-capture-output -n "${env_name}" \
324+
python setup.py "${run_multicore}" bdist_wheel \
312325
--package_name="${package_name}" \
313326
--python-tag="${python_tag}" \
314327
--plat-name="${plat_name}" \
@@ -357,7 +370,7 @@ build_fbgemm_gpu_install () {
357370
# Parallelism may need to be limited to prevent the build from being
358371
# canceled for going over ulimits
359372
echo "[BUILD] Building + installing FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..."
360-
print_exec conda run -n "${env_name}" \
373+
print_exec conda run --no-capture-output -n "${env_name}" \
361374
python setup.py install "${build_args[@]}"
362375

363376
# Run checks on the built libraries
@@ -401,7 +414,7 @@ build_fbgemm_gpu_develop () {
401414
# Parallelism may need to be limited to prevent the build from being
402415
# canceled for going over ulimits
403416
echo "[BUILD] Building (develop) FBGEMM-GPU (VARIANT=${fbgemm_variant}) ..."
404-
print_exec conda run -n "${env_name}" \
417+
print_exec conda run --no-capture-output -n "${env_name}" \
405418
python setup.py build develop "${build_args[@]}"
406419

407420
# Run checks on the built libraries

.github/scripts/fbgemm_gpu_test.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ run_python_test () {
2828
echo "################################################################################"
2929
fi
3030

31-
if print_exec conda run -n "${env_name}" python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
31+
if print_exec conda run --no-capture-output -n "${env_name}" python -m pytest -v -rsx -s -W ignore::pytest.PytestCollectionWarning "${python_test_file}"; then
3232
echo "[TEST] Python test suite PASSED: ${python_test_file}"
3333
echo ""
3434
else

.github/scripts/nova_dir.bash

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
#
4+
# This source code is licensed under the BSD-style license found in the
5+
# LICENSE file in the root directory of this source tree.
6+
7+
## Workaround for Nova Workflow to look for setup.py in fbgemm_gpu rather than root repo
8+
FBGEMM_DIR="/__w/FBGEMM/FBGEMM"
9+
export FBGEMM_REPO="${FBGEMM_DIR}/${REPOSITORY}"
10+
working_dir=$(pwd)
11+
if [[ "$working_dir" == "$FBGEMM_REPO" ]]; then cd fbgemm_gpu || echo "Failed to cd fbgemm_gpu from $(pwd)"; fi
12+
13+
## Build clean/wheel will be done in pre-script. Set flag such that setup.py will skip these steps in Nova workflow
14+
export BUILD_FROM_NOVA=1
15+
16+
## Overwrite existing ENV VAR in Nova
17+
if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi
18+
if [[ "$CU_VERSION" == "cu118" ]]; then export TORCH_CUDA_ARCH_LIST='7.0;8.0' && echo "$TORCH_CUDA_ARCH_LIST"; fi
19+
if [[ "$CU_VERSION" == "cu121" ]]; then export TORCH_CUDA_ARCH_LIST='7.0;8.0;9.0' && echo "$TORCH_CUDA_ARCH_LIST"; fi

.github/scripts/nova_postscript.bash

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
echo "Current working directory: $(pwd)"
9+
cd "${FBGEMM_REPO}" || echo "Failed to cd to ${FBGEMM_REPO}"
10+
PRELUDE="${FBGEMM_REPO}/.github/scripts/setup_env.bash"
11+
BUILD_ENV_NAME=base
12+
GITHUB_ENV=TRUE
13+
export GITHUB_ENV
14+
15+
# Install FBGEMM_GPU Nightly
16+
echo "Current working directory: $(pwd)"
17+
# shellcheck disable=SC1091
18+
# shellcheck source=.github/scripts/setup_env.bash
19+
. "${PRELUDE}";
20+
21+
install_fbgemm_gpu_wheel "${BUILD_ENV_NAME}" fbgemm_gpu/dist/*.whl
22+
23+
# Test with PyTest
24+
echo "Current working directory: $(pwd)"
25+
CPU_GPU="${CU_VERSION}"
26+
if [ "${CU_VERSION}" != 'cpu' ]; then
27+
CPU_GPU=""
28+
fi
29+
$CONDA_RUN python3 -c "import torch; print('cuda.is_available() ', torch.cuda.is_available()); print ('device_count() ',torch.cuda.device_count());"
30+
cd "${FBGEMM_REPO}/fbgemm_gpu/test" || { echo "Failed to cd to fbgemm_gpu/test from $(pwd)"; };
31+
run_fbgemm_gpu_tests "${BUILD_ENV_NAME}" "${CPU_GPU}"

.github/scripts/nova_prescript.bash

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#!/bin/bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
echo "Current working directory: $(pwd)"
9+
cd "${FBGEMM_REPO}" || echo "Failed to cd to ${FBGEMM_REPO}"
10+
PRELUDE="${FBGEMM_REPO}/.github/scripts/setup_env.bash"
11+
BUILD_ENV_NAME=base
12+
echo "--------------------------"
13+
echo "----- conda env list -----"
14+
conda env list
15+
echo "--------------------------"
16+
echo "PRELUDE = $PRELUDE"
17+
export PATH="${PATH}:/usr/sbin:/sbin"
18+
echo "CU_VERSION = ${CU_VERSION}"
19+
echo "PYTHON_VERSION = ${PYTHON_VERSION}"
20+
echo "python3 --version = $(python3 --version)"
21+
echo "ARCH = ${ARCH}"
22+
echo "---------------------------"
23+
# shellcheck disable=SC1091
24+
# shellcheck source=.github/scripts/setup_env.bash
25+
. "${PRELUDE}";
26+
27+
## Display System Info
28+
print_system_info
29+
30+
## Display GPU Info
31+
print_gpu_info
32+
33+
## Install C/C++ Compilers
34+
install_cxx_compiler "${BUILD_ENV_NAME}"
35+
36+
## Install Build Tools
37+
install_build_tools "${BUILD_ENV_NAME}"
38+
39+
## Install cuDNN
40+
CPU_GPU=${CU_VERSION}
41+
if [ "${CU_VERSION}" != 'cpu' ]; then
42+
## Nova $CU_VERSION is e.g., cu118
43+
cuda_version_num=$(echo "$CU_VERSION" | cut -c 3-)
44+
install_cudnn "${BUILD_ENV_NAME}" "$(pwd)/build_only/cudnn" "$cuda_version_num"
45+
echo "-------- Finding NVML_LIB_PATH -----------"
46+
echo "NVML_LIB_PATH = ${NVML_LIB_PATH}"
47+
echo "CONDA_ENV = ${CONDA_ENV}, CUDA_HOME = ${CUDA_HOME}"
48+
if [[ ${NVML_LIB_PATH} == "" ]]; then NVML_LIB_PATH=$(find "${CUDA_HOME}" -name libnvidia-ml.so) && export NVML_LIB_PATH && echo "looking in ${CUDA_HOME}" || echo "libnvidia-ml.so not found in ${CUDA_HOME}"; fi
49+
if [[ ${NVML_LIB_PATH} == "" ]]; then NVML_LIB_PATH=$(find "${CONDA_ENV}" -name libnvidia-ml.so) && export NVML_LIB_PATH && echo "looking in ${CONDA_ENV}" || echo "libnvidia-ml.so not found in ${CONDA_ENV}"; fi
50+
echo "NVML_LIB_PATH = ${NVML_LIB_PATH}"
51+
echo "------------------------------------------"
52+
CPU_GPU="cuda"
53+
fi
54+
55+
cd "${FBGEMM_REPO}/fbgemm_gpu" || { echo "Failed to cd to fbgemm_gpu from $(pwd)"; }
56+
prepare_fbgemm_gpu_build "${BUILD_ENV_NAME}"
57+
58+
# reset NOVA flag to run setup.py
59+
BUILD_FROM_NOVA=0
60+
export BUILD_FROM_NOVA
61+
62+
## Build FBGEMM_GPU Nightly
63+
cd "${FBGEMM_REPO}/fbgemm_gpu" || echo "Failed to cd to ${FBGEMM_REPO}/fbgemm_gpu from $(pwd)"
64+
if [[ ${CHANNEL} == "" ]]; then CHANNEL="nightly"; fi #set nightly by default
65+
echo "----------------------------------------------"
66+
echo "build_fbgemm_gpu_package ${BUILD_ENV_NAME} ${CHANNEL} ${CPU_GPU}"
67+
build_fbgemm_gpu_package "${BUILD_ENV_NAME}" "${CHANNEL}" "${CPU_GPU}"
68+
echo "----------------------------------------------"
69+
70+
## Temporary workaround - copy dist/ to root repo for smoke test
71+
echo "Copying dist folder to root repo.."
72+
(cp -r "${FBGEMM_REPO}/fbgemm_gpu/dist" "${FBGEMM_REPO}") && (echo "dist folder has been copied to ${FBGEMM_REPO}") || echo "Failed to copy dist/ folder to ${FBGEMM_REPO}"
73+
echo "----------------------------------"
74+
ls -al "${FBGEMM_REPO}/dist"
75+
echo "----------------------------------"

.github/scripts/utils_system.bash

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,12 @@ free_disk_space () {
7777
################################################################################
7878

7979
print_gpu_info () {
80-
echo "################################################################################"
81-
echo "[INFO] Printing general display info ..."
82-
install_system_packages lshw
83-
print_exec sudo lshw -C display
80+
if [[ "${BUILD_FROM_NOVA}" != '1' ]]; then
81+
echo "################################################################################"
82+
echo "[INFO] Printing general display info ..."
83+
install_system_packages lshw
84+
print_exec sudo lshw -C display
85+
fi
8486

8587
echo "################################################################################"
8688
echo "[INFO] Printing NVIDIA GPU info ..."
@@ -131,11 +133,15 @@ __print_system_info_linux () {
131133
echo "################################################################################"
132134
echo "[INFO] Print CPU info ..."
133135
print_exec nproc
136+
print_exec lscpu
134137
print_exec cat /proc/cpuinfo
135138

136-
echo "################################################################################"
137-
echo "[INFO] Print PCI info ..."
138-
print_exec lspci -v
139+
140+
if [[ "${BUILD_FROM_NOVA}" != '1' ]]; then
141+
echo "################################################################################"
142+
echo "[INFO] Print PCI info ..."
143+
print_exec lspci -v
144+
fi
139145

140146
echo "################################################################################"
141147
echo "[INFO] Print Linux distribution info ..."
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
name: Build Linux Wheels
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches:
7+
- nightly
8+
workflow_dispatch:
9+
10+
jobs:
11+
generate-matrix:
12+
uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main
13+
with:
14+
package-type: wheel
15+
os: linux
16+
test-infra-repository: pytorch/test-infra
17+
test-infra-ref: main
18+
with-cuda: enable
19+
with-rocm: disable
20+
with-cpu: enable
21+
build:
22+
needs: generate-matrix
23+
name: pytorch/FBGEMM
24+
uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
25+
with:
26+
repository: pytorch/FBGEMM
27+
ref: ""
28+
pre-script: ../.github/scripts/nova_prescript.bash
29+
post-script: ../.github/scripts/nova_postscript.bash
30+
smoke-test-script: ""
31+
env-var-script: .github/scripts/nova_dir.bash
32+
package-name: fbgemm_gpu
33+
test-infra-repository: pytorch/test-infra
34+
test-infra-ref: main
35+
build-matrix: ${{ needs.generate-matrix.outputs.matrix }}
36+
trigger-event: ${{ github.event_name }}
37+
secrets:
38+
AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID: ${{ secrets.AWS_PYTORCH_UPLOADER_ACCESS_KEY_ID }}
39+
AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY: ${{ secrets.AWS_PYTORCH_UPLOADER_SECRET_ACCESS_KEY }}

fbgemm_gpu/setup.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,19 @@ def main(argv: List[str]) -> None:
269269
if len(unknown) != 0 and (len(unknown) != 1 or unknown[0] != "clean"):
270270
print("Unknown Arguments: ", unknown)
271271

272+
# Skip Nova build steps since it will be done in pre-script
273+
if "BUILD_FROM_NOVA" in os.environ:
274+
build_from_nova = os.getenv("BUILD_FROM_NOVA")
275+
print("build_from_nova", build_from_nova)
276+
# Package name is the same for all variants in Nova
277+
package_name = "fbgemm_gpu"
278+
if str(build_from_nova) != "0":
279+
# Skip build clean and build wheel steps in Nova workflow since they are done in pre-script
280+
print("Build from Nova detected... exiting")
281+
sys.exit(0)
282+
else:
283+
package_name = args.package_name
284+
272285
if not args.cpu_only:
273286
set_cuda_environment_variables()
274287

@@ -282,7 +295,7 @@ def main(argv: List[str]) -> None:
282295
FbgemmGpuInstaller.generate_version_file(package_version)
283296

284297
setup(
285-
name=args.package_name,
298+
name=package_name,
286299
version=package_version,
287300
author="FBGEMM Team",
288301
author_email="packages@pytorch.org",

0 commit comments

Comments
 (0)