From 75f75fbfea9772085947981ee886e4be60160b68 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Wed, 9 Apr 2025 01:43:27 +0530 Subject: [PATCH 1/2] code changes for integrating nvidia v5.0 --- automation/utils.py | 4 +- script/add-custom-nvidia-system/meta.yaml | 10 ++ script/app-mlperf-inference-nvidia/meta.yaml | 116 ++++++++++++++---- script/app-mlperf-inference/meta.yaml | 55 +++++++++ script/build-dockerfile/customize.py | 4 + .../meta.yaml | 2 +- .../run.sh | 8 ++ .../meta.yaml | 5 + script/get-mlperf-inference-results/meta.yaml | 4 + script/get-nvidia-mitten/customize.py | 7 +- script/get-nvidia-mitten/meta.yaml | 2 +- .../patch/numpy-mitten-v5.0.patch | 13 ++ script/get-nvidia-mitten/run.sh | 3 + script/run-mlperf-inference-app/meta.yaml | 16 ++- 14 files changed, 217 insertions(+), 32 deletions(-) create mode 100644 script/get-nvidia-mitten/patch/numpy-mitten-v5.0.patch diff --git a/automation/utils.py b/automation/utils.py index 986888e87..98a3a3718 100644 --- a/automation/utils.py +++ b/automation/utils.py @@ -339,8 +339,8 @@ def compare_versions(i): # 3.9.6 vs 3.9 # 3.9 vs 3.9.6 - i_version1 = [int(v) if v.isdigit() else v for v in l_version1] - i_version2 = [int(v) if v.isdigit() else v for v in l_version2] + i_version1 = [int(v) for v in l_version1 if v.isdigit()] + i_version2 = [int(v) for v in l_version2 if v.isdigit()] comparison = 0 diff --git a/script/add-custom-nvidia-system/meta.yaml b/script/add-custom-nvidia-system/meta.yaml index 6dce8414d..b07ec8d5e 100644 --- a/script/add-custom-nvidia-system/meta.yaml +++ b/script/add-custom-nvidia-system/meta.yaml @@ -74,6 +74,11 @@ deps: # Detect pycuda - tags: get,generic-python-lib,_pycuda + - tags: get,generic-python-lib,_package.typeguard + enable_if_env: + MLC_MLPERF_INFERENCE_VERSION: + - "5.0" + variations: nvidia-only: group: code @@ -124,3 +129,8 @@ versions: add_deps_recursive: nvidia-inference-common-code: version: r4.0 + + r5.0: + add_deps_recursive: + nvidia-inference-common-code: + version: r5.0 diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 472eb9383..f803b6a03 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -272,7 +272,9 @@ deps: - run_harness - tags: get,generic-python-lib,_package.pycuda - version: "2022.2.2" + names: + - pycuda + version: "2023.1" - tags: get,generic-python-lib,_package.nvmitten update_tags_from_env_with_prefix: @@ -281,11 +283,10 @@ deps: enable_if_env: MLC_RUN_STATE_DOCKER: - 'yes' + MLC_ENV_NVMITTEN_DOCKER_WHEEL_PATH: + - 'yes' - tags: get,nvidia,mitten - skip_if_env: - MLC_RUN_STATE_DOCKER: - - 'yes' enable_if_env: MLC_NVIDIA_MITTEN_FROM_SRC: - 'yes' @@ -351,6 +352,18 @@ post_deps: # Variations to customize dependencies variations: # MLPerf inference version + v5.0: + group: version + env: + MLC_MLPERF_INFERENCE_CODE_VERSION: "v5.0" + MLC_MLPERF_GPTJ_MODEL_FP8_PATH_SUFFIX: GPTJ-FP8-quantized + MLC_NVIDIA_MITTEN_FROM_SRC: "yes" + MLC_GIT_CHECKOUT: "98bb85df8e936219ec7acd10ce1d702147fb1e21" + adr: + pytorch: + tags: _for-nvidia-mlperf-inference-v5.0 + pycuda: + version_min: "2024.1" v4.1: group: version env: @@ -435,9 +448,20 @@ variations: - tags: get,generic-python-lib,_numpy - tags: get,generic-python-lib,_pycocotools - tags: get,generic-python-lib,_onnx-graphsurgeon + - tags: get,generic,sys-util,_cmake + - tags: get,generic-python-lib,_package.cmake + - tags: get,generic-python-lib,_package.sympy + + retinanet,v5.0: + deps: + - tags: get,generic-python-lib,_package.onnx + version: 1.17.0 + + retinanet,v4.0: + deps: - tags: get,generic-python-lib,_package.onnx version: 1.14.1 - - tags: get,generic-python-lib,_package.sympy + sdxl: new_env_keys: @@ -481,8 +505,8 @@ variations: names: - nvtx - tags: get,generic-python-lib,_package.cuda-python - version_max: 12.6.2 - version_max_usable: 12.6.2 + version_max: "12.6.2" + version_max_usable: "12.6.2" names: - cuda-python - tags: get,generic-python-lib,_package.ninja @@ -494,38 +518,78 @@ variations: - tags: get,generic-python-lib,_package.colored names: - colored - - tags: get,generic-python-lib,_package.nvidia-ammo - names: - - nvidia-ammo - version: 0.7.4 - env: - MLC_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: "https://pypi.nvidia.com" - MLC_GENERIC_PYTHON_PIP_EXTRA: "--no-cache-dir" - tags: get,generic-python-lib,_package.optimum names: - optimum - - tags: get,generic-python-lib,_package.onnx - names: - - onnx - version: 1.14.0 - tags: get,generic-python-lib,_package.scipy names: - scipy - version: 1.10.1 - - tags: get,generic-python-lib,_package.numpy - names: - - numpy - version_max: 1.22.99 - version_max_usable: "1.22" + sdxl,v4.0: + deps: + - tags: get,generic-python-lib,_package.onnx + names: + - onnx + version: "1.14.0" + - tags: get,generic-python-lib,_package.numpy + names: + - numpy + version_max: "1.22.99" + version_max_usable: "1.22" + - tags: get,generic-python-lib,_package.nvidia-ammo + names: + - nvidia-ammo + version: "0.7.4" + env: + MLC_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: "https://pypi.nvidia.com" + MLC_GENERIC_PYTHON_PIP_EXTRA: "--no-cache-dir" + sdxl,v4.1: deps: - tags: get,generic-python-lib,_package.torchrec - version: 0.4.0 + version: "0.4.0" - tags: get,generic-python-lib,_package.torchmetrics - version: 1.0.3 + version: "1.0.3" - tags: get,generic-python-lib,_package.typeguard + - tags: get,generic-python-lib,_package.onnx + names: + - onnx + version: "1.14.0" + - tags: get,generic-python-lib,_package.numpy + names: + - numpy + version_max: "1.22.99" + version_max_usable: "1.22" + - tags: get,generic-python-lib,_package.nvidia-ammo + names: + - nvidia-ammo + version: "0.7.4" + env: + MLC_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL: "https://pypi.nvidia.com" + MLC_GENERIC_PYTHON_PIP_EXTRA: "--no-cache-dir" + - tags: get,generic-python-lib,_package.scipy + names: + - scipy + version: "1.10.1" + sdxl,v5.0: + # nvidia-ammo is decommisioned and model-opt is being used which is built with TRTLLM + deps: + - tags: get,generic-python-lib,_package.torchrec + version: "0.6.0" + - tags: get,generic-python-lib,_package.torchmetrics + version: "1.0.3" + - tags: get,generic-python-lib,_package.typeguard + - tags: get,generic-python-lib,_package.onnx + names: + - onnx + version: "1.17.0" + - tags: get,generic-python-lib,_package.numpy + names: + - numpy + version_max: "1.26.99" + version_max_usable: "1.26.4" + bert_: deps: - tags: get,generic-python-lib,_transformers diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 6a4b6cf3b..c1c177d61 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -382,6 +382,24 @@ variations: env: MLC_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' + nvidia-original,r5.0_default: + env: + MLC_NVIDIA_MITTEN_FROM_SRC: 'yes' + docker: + build_deps: + - tags: detect,os + image_name: mlperf-inference-nvidia-v5.0-common + update_meta_if_env: + - enable_if_env: + MLC_HOST_PLATFORM_FLAVOR: + - x86_64 + docker: + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v5.0-cuda12.8-pytorch25.01-ubuntu24.04-x86_64-release + - skip_if_env: + MLC_HOST_PLATFORM_FLAVOR: + - x86_64 + docker: + base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v5.0-cuda12.8-pytorch25.01-ubuntu24.04-aarch64-Grace-release nvidia-original,gptj_: env: @@ -424,6 +442,14 @@ variations: update_tags_from_env_with_prefix: _tp-size.: - MLC_NVIDIA_TP_SIZE + + nvidia-original,r5.0_default,gptj_: + docker: + deps: + - tags: get,ml-model,gptj,_nvidia,_fp8 + update_tags_from_env_with_prefix: + _tp-size.: + - MLC_NVIDIA_TP_SIZE nvidia-original,r4.1-dev_default,llama2-70b_: @@ -446,6 +472,14 @@ variations: - MLC_NVIDIA_TP_SIZE env: BUILD_TRTLLM: 1 + + nvidia-original,r5.0_default,llama2-70b_: + docker: + deps: + - tags: get,ml-model,llama2-70b,_nvidia,_fp8 + update_tags_from_env_with_prefix: + _tp-size.: + - MLC_NVIDIA_TP_SIZE nvidia-original: docker: @@ -1813,6 +1847,27 @@ variations: MLC_REGENERATE_MEASURE_FILES: 'yes' env: MLC_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + + r5.0_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r5.0 + tags: _mlcommons + nvidia-inference-server: + version: r5.0 + tags: _mlcommons + intel-harness: + tags: _v4.1 + inference-src: + version: r5.0 + nvidia-scratch-space: + tags: _version.5.0 + default_env: + MLC_SKIP_SYS_UTILS: 'yes' + MLC_REGENERATE_MEASURE_FILES: 'yes' + MLC_MLPERF_INFERENCE_VERSION: '5.0' invalid_variation_combinations: diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 8a01ef753..6feeb95cf 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -248,6 +248,10 @@ def preprocess(i): for cmd in config['RUN_CMDS']: f.write('RUN ' + cmd + EOL) + if env.get('MLC_MLPERF_IMPLEMENTATION', '') == "nvidia" and env.get( + 'MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": + f.write('ENV ' + 'ENV' + "=\"" + 'release' + "\"" + EOL) + f.write(EOL + '# Setup docker user' + EOL) docker_user = get_value(env, config, 'USER', 'MLC_DOCKER_USER') docker_group = get_value(env, config, 'GROUP', 'MLC_DOCKER_GROUP') diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index e8e49748b..022fb9c9b 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -111,7 +111,7 @@ deps: # Detect pycuda - tags: get,generic-python-lib,_pycuda - version: "2022.2.2" + version: "2023.1" skip_if_env: MLC_RUN_STATE_DOCKER: - 'yes' diff --git a/script/build-mlperf-inference-server-nvidia/run.sh b/script/build-mlperf-inference-server-nvidia/run.sh index ac990aa62..c5f4e1a8a 100644 --- a/script/build-mlperf-inference-server-nvidia/run.sh +++ b/script/build-mlperf-inference-server-nvidia/run.sh @@ -8,9 +8,17 @@ if [[ ${MLC_MAKE_CLEAN} == "yes" ]]; then fi if [[ ${MLC_MLPERF_DEVICE} == "inferentia" ]]; then + echo "inferencia" make prebuild fi +# Perform sed replacement only if version is 5.0 +if [[ "${MLC_MLPERF_INFERENCE_VERSION}" == "5.0" ]]; then + echo "Replacing /work/ with ${MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH} in all files..." + find . -type f -exec sed -i "s|/work/|${MLC_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/|g" {} + +fi + +echo ${MLC_MAKE_BUILD_COMMAND} SKIP_DRIVER_CHECK=1 make ${MLC_MAKE_BUILD_COMMAND} test $? -eq 0 || exit $? diff --git a/script/get-mlperf-inference-nvidia-common-code/meta.yaml b/script/get-mlperf-inference-nvidia-common-code/meta.yaml index 3c0657dda..8d0be3d3a 100644 --- a/script/get-mlperf-inference-nvidia-common-code/meta.yaml +++ b/script/get-mlperf-inference-nvidia-common-code/meta.yaml @@ -63,3 +63,8 @@ versions: mlperf-inference-results: version: v4.0 tags: _code-only-for-v5.0 + r5.0: + add_deps_recursive: + mlperf-inference-results: + version: v5.0 + tags: _code-only diff --git a/script/get-mlperf-inference-results/meta.yaml b/script/get-mlperf-inference-results/meta.yaml index 8b1da1310..7b8397c0a 100644 --- a/script/get-mlperf-inference-results/meta.yaml +++ b/script/get-mlperf-inference-results/meta.yaml @@ -86,3 +86,7 @@ versions: env: MLC_GIT_URL: https://github.com/<<>>/inference_results_v4.1.git MLC_MLPERF_INFERENCE_RESULTS_VERSION_NAME: v4.1 + v5.0: + env: + MLC_GIT_URL: https://github.com/<<>>/inference_results_v5.0.git + MLC_MLPERF_INFERENCE_RESULTS_VERSION_NAME: v5.0 diff --git a/script/get-nvidia-mitten/customize.py b/script/get-nvidia-mitten/customize.py index a2acfde43..77e4bac46 100644 --- a/script/get-nvidia-mitten/customize.py +++ b/script/get-nvidia-mitten/customize.py @@ -5,8 +5,13 @@ def preprocess(i): os_info = i['os_info'] + env = i['env'] + script_path = i['artifact'].path - # TBD + if env.get('MLC_MLPERF_INFERENCE_VERSION', '') == "5.0": + extra_run_cmd = 'patch -p1 < {}'.format(os.path.join( + script_path, 'patch', 'numpy-mitten-v5.0.patch')) + env['EXTRA_RUN_CMD'] = extra_run_cmd return {'return': 0} diff --git a/script/get-nvidia-mitten/meta.yaml b/script/get-nvidia-mitten/meta.yaml index 3073438bb..09bac19b9 100644 --- a/script/get-nvidia-mitten/meta.yaml +++ b/script/get-nvidia-mitten/meta.yaml @@ -11,7 +11,7 @@ deps: - python tags: get,python3 - tags: get,generic-python-lib,_pycuda - version: 2022.2.2 + version: "2023.1" - env: MLC_GIT_CHECKOUT_PATH_ENV_NAME: MLC_NVIDIA_MITTEN_SRC extra_cache_tags: nvidia,mitten,src diff --git a/script/get-nvidia-mitten/patch/numpy-mitten-v5.0.patch b/script/get-nvidia-mitten/patch/numpy-mitten-v5.0.patch new file mode 100644 index 000000000..b89abe2ab --- /dev/null +++ b/script/get-nvidia-mitten/patch/numpy-mitten-v5.0.patch @@ -0,0 +1,13 @@ +diff --git a/setup.cfg b/setup.cfg +index 4976354..798175e 100644 +--- a/setup.cfg ++++ b/setup.cfg +@@ -21,7 +21,7 @@ install_requires = + graphlib_backport >=1.0.3;python_version<'3.9' + requests >=2.28.1 + tqdm >=4.65.0 +- numpy >=1.22.0, <1.24.0 ++ numpy >=1.26.4 + GitPython >=3.1.31 + pandas + opencv-python diff --git a/script/get-nvidia-mitten/run.sh b/script/get-nvidia-mitten/run.sh index ac0dc16b2..81f38f829 100644 --- a/script/get-nvidia-mitten/run.sh +++ b/script/get-nvidia-mitten/run.sh @@ -1,4 +1,7 @@ #!/bin/bash cd ${MLC_NVIDIA_MITTEN_SRC} +echo "EXTRA_RUN_CMD = ${EXTRA_RUN_CMD}" +eval "${EXTRA_RUN_CMD}" +test $? -eq 0 || exit $? ${MLC_PYTHON_BIN_WITH_PATH} -m pip install . test $? -eq 0 || exit $? diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index b19362ecc..d2a0aaf56 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -366,7 +366,6 @@ variations: group: benchmark-version r5.0-dev: - default: true env: MLC_MLPERF_INFERENCE_VERSION: '5.0-dev' MLC_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default @@ -380,6 +379,21 @@ variations: mlperf-inference-nvidia-scratch-space: tags: _version.r5.0-dev + r5.0: + default: true + env: + MLC_MLPERF_INFERENCE_VERSION: '5.0' + MLC_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0_default + MLC_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 + group: benchmark-version + adr: + get-mlperf-inference-results-dir: + tags: _version.r5.0 + get-mlperf-inference-submission-dir: + tags: _version.r5.0 + mlperf-inference-nvidia-scratch-space: + tags: _version.r5.0 + short: add_deps_recursive: submission-checker: From f33761bd5cab4510e9aaa1f9ddcd051d9351d867 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 18 May 2025 19:02:51 +0530 Subject: [PATCH 2/2] code clean for pycuda --- script/app-mlperf-inference-nvidia/meta.yaml | 2 +- script/build-mlperf-inference-server-nvidia/meta.yaml | 10 ++++++++-- script/run-mlperf-inference-app/meta.yaml | 2 +- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index f803b6a03..110b792bd 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -274,7 +274,7 @@ deps: - tags: get,generic-python-lib,_package.pycuda names: - pycuda - version: "2023.1" + version: "2022.2.2" - tags: get,generic-python-lib,_package.nvmitten update_tags_from_env_with_prefix: diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index 022fb9c9b..37e038d6d 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -111,7 +111,9 @@ deps: # Detect pycuda - tags: get,generic-python-lib,_pycuda - version: "2023.1" + names: + - pycuda + version: "2022.2.2" skip_if_env: MLC_RUN_STATE_DOCKER: - 'yes' @@ -333,7 +335,11 @@ versions: r4.1: default_env: BUILD_TRTLLM: 1 - + + r5.0: + add_deps_recursive: + pycuda: + version: "2024.1" docker: skip_run_cmd: 'no' all_gpus: 'yes' diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index d2a0aaf56..5c13dfa16 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -366,6 +366,7 @@ variations: group: benchmark-version r5.0-dev: + default: true env: MLC_MLPERF_INFERENCE_VERSION: '5.0-dev' MLC_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default @@ -380,7 +381,6 @@ variations: tags: _version.r5.0-dev r5.0: - default: true env: MLC_MLPERF_INFERENCE_VERSION: '5.0' MLC_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0_default