From 13e4a72be477d38a1bcc9d1d6d6d4f2ba909a870 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 7 Apr 2025 15:38:42 +0530 Subject: [PATCH 1/4] Added version fix for nvidia TRTLLM --- script/build-mlperf-inference-server-nvidia/meta.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index 3530c9482..c27b99bf1 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -283,6 +283,14 @@ versions: deps: - tags: get,generic,sys-util,_nlohmann-json3-dev - tags: get,generic,sys-util,_git-lfs + - tags: get,generic-python-lib,_package.cuda-python + names: + - cuda-python + version_max: 12.6 + version_max_usable: '12.3.0' + - tags: get,generic-python-lib,_package.networkx + version_max: '2.9.99' + version_max_usable: '2.8.8' - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 names: - pytorch From c35256e09b916b70c9852748e576ae19e8024d8b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 7 Apr 2025 16:57:37 +0530 Subject: [PATCH 2/4] Added version fix for nvidia TRTLLM --- script/build-mlperf-inference-server-nvidia/meta.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index c27b99bf1..a80b28ea1 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -276,6 +276,14 @@ versions: - x86_64 MLC_PYTHON_MINOR_VERSION: - 8 + - tags: get,generic-python-lib,_package.cuda-python + names: + - cuda-python + version_max: 12.6 + version_max_usable: '12.3.0' + - tags: get,generic-python-lib,_package.networkx + version_max: '2.9.99' + version_max_usable: '2.8.8' r4.1-dev: default_env: From 0564c3992ca983047679bcc698eb495b15d68829 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 7 Apr 2025 17:09:25 +0530 Subject: [PATCH 3/4] Added version fix for nvidia TRTLLM --- script/build-mlperf-inference-server-nvidia/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index a80b28ea1..e8e49748b 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -279,7 +279,7 @@ versions: - tags: get,generic-python-lib,_package.cuda-python names: - cuda-python - version_max: 12.6 + version_max: '12.6' version_max_usable: '12.3.0' - tags: get,generic-python-lib,_package.networkx version_max: '2.9.99' @@ -294,7 +294,7 @@ versions: - tags: get,generic-python-lib,_package.cuda-python names: - cuda-python - version_max: 12.6 + version_max: '12.6' version_max_usable: '12.3.0' - tags: get,generic-python-lib,_package.networkx version_max: '2.9.99' From af8737017a4f1bfeb4f1a7e4a0458c72f8edd765 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 7 Apr 2025 19:26:40 +0530 Subject: [PATCH 4/4] Fix nvidia gptj --- script/app-mlperf-inference-nvidia/customize.py | 2 +- script/app-mlperf-inference/meta.yaml | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 70e5d3710..2cb95e75f 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -300,7 +300,7 @@ def preprocess(i): if not os.path.exists(fp32_model_path): # download via prehook_deps env['MLC_REQUIRE_GPTJ_MODEL_DOWNLOAD'] = 'yes' - if make_command == "build_engine": + if make_command in ["build_engine", "preprocess_data"]: cmds.append( f"cp -r $MLC_ML_MODEL_FILE_WITH_PATH {fp32_model_path}") diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 846936234..6a4b6cf3b 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -404,6 +404,15 @@ variations: _tp-size.: - MLC_NVIDIA_TP_SIZE + nvidia-original,r5.0-dev_default,gptj_: + docker: + image_name: mlperf-inference-nvidia-v4.1-dev-llm + deps: + - tags: get,ml-model,gptj,_nvidia,_fp8 + update_tags_from_env_with_prefix: + _tp-size.: + - MLC_NVIDIA_TP_SIZE + nvidia-original,r4.1_default: docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-x86_64-release