From 32d2b7335f0a8c4814250961f64c1ec69d609916 Mon Sep 17 00:00:00 2001 From: Nathan Wasson Date: Wed, 4 Dec 2024 12:14:45 -0600 Subject: [PATCH 1/8] Update CODEOWNERS --- .github/CODEOWNERS | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9b7df2c6c7..ecc15005e3 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -2,8 +2,8 @@ # Unless a later match takes precedence,they will be requested for review when someone opens a pull request. * @mlcommons/wg-benchmark-infra -#/.github/CODEOWNERS @mlcommons/systems +/.github/CODEOWNERS @mlcommons/systems -#/.github/workflows/cla.yml @mlcommons/systems +/.github/workflows/cla.yml @mlcommons/systems -#/LICENSE.md @mlcommons/systems +/LICENSE.md @mlcommons/systems From b37fda0eafb5129a8f912fcb923c9c3d5ffec4cd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 5 Dec 2024 10:38:46 +0530 Subject: [PATCH 2/8] Cleanup of format.yml github action file (#2) * Update format.yml * Update build_wheel.yml --- .github/workflows/build_wheel.yml | 2 ++ .github/workflows/format.yml | 19 +++++++------------ 2 files changed, 9 insertions(+), 12 deletions(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index e2941b3ac6..16c2016c77 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -75,6 +75,7 @@ jobs: run: | git add VERSION git commit -m "Increment version to ${{ steps.do_version_increment.outputs.new_version }}" + git pull --rebase git push # Step 6: Update the git hash in the repository @@ -83,6 +84,7 @@ jobs: python3 get_git_version.py > git_commit_hash.txt git add git_commit_hash.txt git commit -m "Updated git_commit_hash.txt" + git pull --rebase git push # Step 7: Install required dependencies diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index 66a1318632..f11721be25 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -22,9 +22,11 @@ jobs: python-version: ${{ env.python_version }} - name: Format modified python files + env: + filter: ${{ github.event.before }} run: | python3 -m pip install autopep8 - for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.py$') + for FILE in $(git diff --name-only $filter | grep -E '.*\.py$') do # Check if the file still exists in the working tree if [ -f "$FILE" ]; then @@ -34,8 +36,10 @@ jobs: done - name: Format modified C++ files + env: + filter: ${{ github.event.before }} run: | - for FILE in $(git diff --name-only ${{ github.event.before }} | grep -E '.*\.(cc|cpp|h|hpp)$') + for FILE in $(git diff --name-only $filter | grep -E '.*\.(cc|cpp|h|hpp)$') do # Check if the file still exists in the working tree if [ -f "$FILE" ]; then @@ -44,7 +48,7 @@ jobs: fi done - - name: Commit and create PR + - name: Commit and Push run: | HAS_CHANGES=$(git diff --staged --name-only) if [ ${#HAS_CHANGES} -gt 0 ]; then @@ -54,13 +58,4 @@ jobs: git commit -m '[Automated Commit] Format Codebase' git push - # Push changes to a new branch - #BRANCH_NAME="auto/code-format" - #git branch $BRANCH_NAME - #git push origin $BRANCH_NAME --force - - # Create a pull request to the "code-format" branch - #gh pr create --base code-format --head $BRANCH_NAME --title "[Automated PR] Format Codebase" --body "This pull request contains automated code formatting changes." fi - # env: - # GH_TOKEN: ${{ secrets.ACCESS_TOKEN }} From 953c6268b1ce42349b163de9eeb5501d78bf5a54 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Tue, 10 Dec 2024 00:30:53 +0530 Subject: [PATCH 3/8] changes for rgat reference implementation --- rgat.patch | 401 ++++++++++++++++++ .../_cm.yaml | 46 +- .../customize.py | 17 +- script/app-mlperf-inference/_cm.yaml | 35 ++ script/get-cudnn/_cm.yaml | 1 + .../_cm.yaml | 14 +- .../customize.py | 10 +- script/get-ml-model-rgat/_cm.yaml | 2 +- script/get-ml-model-rgat/customize.py | 8 +- script/get-mlperf-inference-src/_cm.yaml | 6 + script/get-mlperf-inference-src/customize.py | 2 +- script/process-mlperf-accuracy/_cm.yaml | 4 + script/process-mlperf-accuracy/customize.py | 10 + script/run-mlperf-inference-app/_cm.yaml | 13 + 14 files changed, 533 insertions(+), 36 deletions(-) create mode 100644 rgat.patch diff --git a/rgat.patch b/rgat.patch new file mode 100644 index 0000000000..7d8d956281 --- /dev/null +++ b/rgat.patch @@ -0,0 +1,401 @@ +diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml +index 85fddc989..8fa3df206 100644 +--- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml ++++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml +@@ -482,13 +482,12 @@ deps: + ## RGAT + - tags: get,ml-model,rgat + names: +- - ml-model + - rgat-model + enable_if_env: + CM_MODEL: + - rgat + skip_if_env: +- RGAT_CHECKPOINT_PATH: ++ CM_ML_MODEL_RGAT_CHECKPOINT_PATH: + - 'on' + + ######################################################################## +@@ -620,6 +619,9 @@ deps: + enable_if_env: + CM_MODEL: + - rgat ++ skip_if_env: ++ CM_DATASET_IGBH_PATH: ++ - "on" + + ######################################################################## + # Install MLPerf inference dependencies +@@ -1224,27 +1226,45 @@ variations: + group: models + env: + CM_MODEL: rgat ++ adr: ++ pytorch: ++ version: 2.1.0 + deps: + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm + - tags: get,generic-python-lib,_package.requests + - tags: get,generic-python-lib,_package.torchdata +- - tags: get,generic-python-lib,_package.torch-geometric +- - tags: get,generic-python-lib,_package.torch-scatter +- - tags: get,generic-python-lib,_package.torch-sparse ++ version: 0.7.0 ++ - tags: get,generic-python-lib,_package.torchvision ++ version: 0.16.0 + - tags: get,generic-python-lib,_package.pybind11 + - tags: get,generic-python-lib,_package.PyYAML ++ - tags: get,generic-python-lib,_package.numpy ++ version: 1.26.4 + - tags: get,generic-python-lib,_package.pydantic + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git +- - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html +- enable_if_env: +- CM_MLPERF_DEVICE: +- - cpu ++ ++ rgat,cuda: ++ deps: + - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html +- enable_if_env: +- CM_MLPERF_DEVICE: +- - gpu +- ++ - tags: get,generic-python-lib,_package.torch-scatter ++ - tags: get,generic-python-lib,_package.torch-sparse ++ - tags: get,generic-python-lib,_package.torch-geometric ++ env: ++ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" ++ ++ rgat,cpu: ++ deps: ++ - tags: get,generic-python-lib,_package.torch-geometric ++ env: ++ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" ++ - tags: get,generic-python-lib,_package.torch-scatter ++ env: ++ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" ++ - tags: get,generic-python-lib,_package.torch-sparse ++ env: ++ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" ++ - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html + + # Target devices + cpu: +diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py +index 8cd17e7de..dcffa5672 100644 +--- a/script/app-mlperf-inference-mlcommons-python/customize.py ++++ b/script/app-mlperf-inference-mlcommons-python/customize.py +@@ -115,10 +115,12 @@ def preprocess(i): + scenario_extra_options = '' + + NUM_THREADS = env['CM_NUM_THREADS'] +- if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": ++ if int( ++ NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": + NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU + +- if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: ++ if env['CM_MODEL'] in ['resnet50', 'retinanet', ++ 'stable-diffusion-xl', 'rgat']: + scenario_extra_options += " --threads " + NUM_THREADS + + ml_model_name = env['CM_MODEL'] +@@ -485,15 +487,16 @@ def get_run_cmd_reference( + # have to add the condition for running in debug mode or real run mode + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ +- " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ +- " --device " + device.replace("cuda", "cuda:0") + \ ++ " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ ++ " --device " + device.replace("cuda", "gpu") + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + scenario_extra_options + mode_extra_options + \ + " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ + ' --dtype ' + dtype_rgat + \ +- " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ +- " --mlperf_conf " + \ +- os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") ++ " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] ++ ++ if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": ++ cmd += " --in-memory " + + if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: + cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] +diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml +index ffb4a26b8..4c368346e 100644 +--- a/script/app-mlperf-inference/_cm.yaml ++++ b/script/app-mlperf-inference/_cm.yaml +@@ -767,6 +767,20 @@ variations: + env: + CM_MODEL: + rgat ++ posthook_deps: ++ - enable_if_env: ++ CM_MLPERF_LOADGEN_MODE: ++ - accuracy ++ - all ++ CM_MLPERF_ACCURACY_RESULTS_DIR: ++ - 'on' ++ skip_if_env: ++ CM_MLPERF_IMPLEMENTATION: ++ - nvidia ++ names: ++ - mlperf-accuracy-script ++ - 3d-unet-accuracy-script ++ tags: run,accuracy,mlperf,_igbh + + sdxl: + group: +@@ -1645,6 +1659,25 @@ variations: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + CM_MLPERF_INFERENCE_VERSION: '4.1' + ++ r5.0-dev_default: ++ group: ++ reproducibility ++ add_deps_recursive: ++ nvidia-inference-common-code: ++ version: r4.1 ++ tags: _mlcommons ++ nvidia-inference-server: ++ version: r4.1 ++ tags: _mlcommons ++ intel-harness: ++ tags: _v4.1 ++ default_env: ++ CM_SKIP_SYS_UTILS: 'yes' ++ CM_REGENERATE_MEASURE_FILES: 'yes' ++ env: ++ CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' ++ ++ + invalid_variation_combinations: + - + - retinanet +@@ -1768,6 +1801,8 @@ docker: + - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" + - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" + - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" ++ - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" ++ - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" + skip_run_cmd: 'no' + shm_size: '32gb' + interactive: True +diff --git a/script/get-cudnn/_cm.yaml b/script/get-cudnn/_cm.yaml +index b01506f6d..fa5ccd2c7 100644 +--- a/script/get-cudnn/_cm.yaml ++++ b/script/get-cudnn/_cm.yaml +@@ -19,6 +19,7 @@ default_env: + + deps: + - tags: detect,os ++- tags: detect,sudo + - names: + - cuda + skip_if_env: +diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml +index c3e78b464..4750f3ff5 100644 +--- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml ++++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml +@@ -11,7 +11,8 @@ tags: + - inference + uid: 824e61316c074253 + new_env_keys: +- - CM_IGBH_DATASET_PATH ++ - CM_DATASET_IGBH_PATH ++ - CM_DATASET_IGBH_SIZE + input_mapping: + out_path: CM_IGBH_DATASET_OUT_PATH + deps: +@@ -21,6 +22,9 @@ deps: + - tags: get,python + names: + - get-python ++ - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git ++ - tags: get,generic-python-lib,_package.colorama ++ - tags: get,generic-python-lib,_package.tqdm + prehook_deps: + #paper + - env: +@@ -359,13 +363,13 @@ variations: + default: true + group: dataset-type + env: +- CM_IGBH_DATASET_TYPE: debug +- CM_IGBH_DATASET_SIZE: tiny ++ CM_DATASET_IGBH_TYPE: debug ++ CM_DATASET_IGBH_SIZE: tiny + full: + group: dataset-type + env: +- CM_IGBH_DATASET_TYPE: full +- CM_IGBH_DATASET_SIZE: full ++ CM_DATASET_IGBH_TYPE: debug ++ CM_DATASET_IGBH_SIZE: tiny + glt: + env: + CM_IGBH_GRAPH_COMPRESS: yes +diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py +index 9d4240209..a0e6f24a6 100644 +--- a/script/get-dataset-mlperf-inference-igbh/customize.py ++++ b/script/get-dataset-mlperf-inference-igbh/customize.py +@@ -27,18 +27,18 @@ def preprocess(i): + x_sep = " && " + + # download the model +- if env['CM_IGBH_DATASET_TYPE'] == "debug": ++ if env['CM_DATASET_IGBH_TYPE'] == "debug": + run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ + f" tools/download_igbh_test.py --target-path {download_loc} " + + # split seeds + run_cmd += x_sep + \ +- f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}" ++ f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" + + # compress graph(for glt implementation) + if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": + run_cmd += x_sep + \ +- f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" ++ f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + + env['CM_RUN_CMD'] = run_cmd + +@@ -49,10 +49,10 @@ def postprocess(i): + + env = i['env'] + +- env['CM_IGBH_DATASET_PATH'] = env.get( ++ env['CM_DATASET_IGBH_PATH'] = env.get( + 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + + print( +- f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}") ++ f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") + + return {'return': 0} +diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml +index 0bc4b1eab..644bf688a 100644 +--- a/script/get-ml-model-rgat/_cm.yaml ++++ b/script/get-ml-model-rgat/_cm.yaml +@@ -12,7 +12,7 @@ input_mapping: + to: CM_DOWNLOAD_PATH + new_env_keys: + - CM_ML_MODEL_* +-- RGAT_CHECKPOINT_PATH ++- CM_ML_MODEL_RGAT_CHECKPOINT_PATH + prehook_deps: + - enable_if_env: + CM_DOWNLOAD_TOOL: +diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py +index 2fc39c59d..ac8feaad7 100644 +--- a/script/get-ml-model-rgat/customize.py ++++ b/script/get-ml-model-rgat/customize.py +@@ -19,12 +19,12 @@ def postprocess(i): + + env = i['env'] + +- if env.get('RGAT_CHECKPOINT_PATH', '') == '': +- env['RGAT_CHECKPOINT_PATH'] = os.path.join( ++ if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': ++ env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( + env['CM_ML_MODEL_PATH'], "RGAT.pt") + elif env.get('CM_ML_MODEL_PATH', '') == '': +- env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] ++ env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + +- env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] ++ env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + + return {'return': 0} +diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml +index 0eb7cf4ff..afadfa9bc 100644 +--- a/script/get-mlperf-inference-src/_cm.yaml ++++ b/script/get-mlperf-inference-src/_cm.yaml +@@ -168,6 +168,12 @@ versions: + env: + CM_MLPERF_LAST_RELEASE: v3.1 + CM_TMP_GIT_CHECKOUT: '' ++ r4.0: ++ env: ++ CM_MLPERF_LAST_RELEASE: v4.0 ++ r4.1: ++ env: ++ CM_MLPERF_LAST_RELEASE: v4.1 + tvm: + env: + CM_MLPERF_LAST_RELEASE: v3.1 +diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py +index c9aad1ee1..16669e2d5 100644 +--- a/script/get-mlperf-inference-src/customize.py ++++ b/script/get-mlperf-inference-src/customize.py +@@ -54,7 +54,7 @@ def preprocess(i): + env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" + + if env.get("CM_MLPERF_LAST_RELEASE", '') == '': +- env["CM_MLPERF_LAST_RELEASE"] = "v4.1" ++ env["CM_MLPERF_LAST_RELEASE"] = "v5.0" + + if 'CM_GIT_DEPTH' not in env: + env['CM_GIT_DEPTH'] = '' +diff --git a/script/process-mlperf-accuracy/_cm.yaml b/script/process-mlperf-accuracy/_cm.yaml +index f6d9acd5e..59544fd3a 100644 +--- a/script/process-mlperf-accuracy/_cm.yaml ++++ b/script/process-mlperf-accuracy/_cm.yaml +@@ -261,3 +261,7 @@ variations: + env: + CM_DATASET: terabyte + group: dataset ++ igbh: ++ env: ++ CM_DATASET: igbh ++ group: dataset +diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py +index 381b1cdcd..f1d8b7874 100644 +--- a/script/process-mlperf-accuracy/customize.py ++++ b/script/process-mlperf-accuracy/customize.py +@@ -171,6 +171,16 @@ def preprocess(i): + " --dtype " + env.get('CM_ACCURACY_DTYPE', + "float32") + " > '" + out_file + "'" + ++ elif dataset == "igbh": ++ if env.get('CM_DATASET_IGBH_SIZE', '') == '': ++ if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', ++ '') == "full": ++ env['CM_DATASET_IGBH_SIZE'] = "full" ++ else: ++ env['CM_DATASET_IGBH_SIZE'] = "tiny" ++ CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( ++ result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" ++ + else: + return {'return': 1, 'error': 'Unsupported dataset'} + +diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml +index 5b9d4b151..05ae0d476 100644 +--- a/script/run-mlperf-inference-app/_cm.yaml ++++ b/script/run-mlperf-inference-app/_cm.yaml +@@ -360,6 +360,19 @@ variations: + mlperf-inference-nvidia-scratch-space: + tags: _version.r4_1 + group: benchmark-version ++ ++ r5.0-dev: ++ env: ++ CM_MLPERF_INFERENCE_VERSION: '5.0-dev' ++ CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default ++ group: benchmark-version ++ adr: ++ get-mlperf-inference-results-dir: ++ tags: _version.r5.0-dev ++ get-mlperf-inference-submission-dir: ++ tags: _version.r5.0-dev ++ mlperf-inference-nvidia-scratch-space: ++ tags: _version.r5.0-dev + + short: + add_deps_recursive: diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 85fddc989e..8fa3df206a 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -482,13 +482,12 @@ deps: ## RGAT - tags: get,ml-model,rgat names: - - ml-model - rgat-model enable_if_env: CM_MODEL: - rgat skip_if_env: - RGAT_CHECKPOINT_PATH: + CM_ML_MODEL_RGAT_CHECKPOINT_PATH: - 'on' ######################################################################## @@ -620,6 +619,9 @@ deps: enable_if_env: CM_MODEL: - rgat + skip_if_env: + CM_DATASET_IGBH_PATH: + - "on" ######################################################################## # Install MLPerf inference dependencies @@ -1224,27 +1226,45 @@ variations: group: models env: CM_MODEL: rgat + adr: + pytorch: + version: 2.1.0 deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - - tags: get,generic-python-lib,_package.torch-geometric - - tags: get,generic-python-lib,_package.torch-scatter - - tags: get,generic-python-lib,_package.torch-sparse + version: 0.7.0 + - tags: get,generic-python-lib,_package.torchvision + version: 0.16.0 - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML + - tags: get,generic-python-lib,_package.numpy + version: 1.26.4 - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - cpu + + rgat,cuda: + deps: - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html - enable_if_env: - CM_MLPERF_DEVICE: - - gpu - + - tags: get,generic-python-lib,_package.torch-scatter + - tags: get,generic-python-lib,_package.torch-sparse + - tags: get,generic-python-lib,_package.torch-geometric + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + + rgat,cpu: + deps: + - tags: get,generic-python-lib,_package.torch-geometric + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-scatter + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-sparse + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 8cd17e7de5..dcffa5672d 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -115,10 +115,12 @@ def preprocess(i): scenario_extra_options = '' NUM_THREADS = env['CM_NUM_THREADS'] - if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + if int( + NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU - if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + if env['CM_MODEL'] in ['resnet50', 'retinanet', + 'stable-diffusion-xl', 'rgat']: scenario_extra_options += " --threads " + NUM_THREADS ml_model_name = env['CM_MODEL'] @@ -485,15 +487,16 @@ def get_run_cmd_reference( # have to add the condition for running in debug mode or real run mode cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ - " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ - " --device " + device.replace("cuda", "cuda:0") + \ + " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ + " --device " + device.replace("cuda", "gpu") + \ env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ - " --mlperf_conf " + \ - os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + + if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": + cmd += " --in-memory " if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index ffb4a26b86..4c368346eb 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -767,6 +767,20 @@ variations: env: CM_MODEL: rgat + posthook_deps: + - enable_if_env: + CM_MLPERF_LOADGEN_MODE: + - accuracy + - all + CM_MLPERF_ACCURACY_RESULTS_DIR: + - 'on' + skip_if_env: + CM_MLPERF_IMPLEMENTATION: + - nvidia + names: + - mlperf-accuracy-script + - 3d-unet-accuracy-script + tags: run,accuracy,mlperf,_igbh sdxl: group: @@ -1645,6 +1659,25 @@ variations: CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' CM_MLPERF_INFERENCE_VERSION: '4.1' + r5.0-dev_default: + group: + reproducibility + add_deps_recursive: + nvidia-inference-common-code: + version: r4.1 + tags: _mlcommons + nvidia-inference-server: + version: r4.1 + tags: _mlcommons + intel-harness: + tags: _v4.1 + default_env: + CM_SKIP_SYS_UTILS: 'yes' + CM_REGENERATE_MEASURE_FILES: 'yes' + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' + + invalid_variation_combinations: - - retinanet @@ -1768,6 +1801,8 @@ docker: - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" + - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" + - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" skip_run_cmd: 'no' shm_size: '32gb' interactive: True diff --git a/script/get-cudnn/_cm.yaml b/script/get-cudnn/_cm.yaml index b01506f6dc..fa5ccd2c77 100644 --- a/script/get-cudnn/_cm.yaml +++ b/script/get-cudnn/_cm.yaml @@ -19,6 +19,7 @@ default_env: deps: - tags: detect,os +- tags: detect,sudo - names: - cuda skip_if_env: diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index c3e78b4640..4750f3ff51 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -11,7 +11,8 @@ tags: - inference uid: 824e61316c074253 new_env_keys: - - CM_IGBH_DATASET_PATH + - CM_DATASET_IGBH_PATH + - CM_DATASET_IGBH_SIZE input_mapping: out_path: CM_IGBH_DATASET_OUT_PATH deps: @@ -21,6 +22,9 @@ deps: - tags: get,python names: - get-python + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git + - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm prehook_deps: #paper - env: @@ -359,13 +363,13 @@ variations: default: true group: dataset-type env: - CM_IGBH_DATASET_TYPE: debug - CM_IGBH_DATASET_SIZE: tiny + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny full: group: dataset-type env: - CM_IGBH_DATASET_TYPE: full - CM_IGBH_DATASET_SIZE: full + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny glt: env: CM_IGBH_GRAPH_COMPRESS: yes diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py index 9d4240209a..a0e6f24a64 100644 --- a/script/get-dataset-mlperf-inference-igbh/customize.py +++ b/script/get-dataset-mlperf-inference-igbh/customize.py @@ -27,18 +27,18 @@ def preprocess(i): x_sep = " && " # download the model - if env['CM_IGBH_DATASET_TYPE'] == "debug": + if env['CM_DATASET_IGBH_TYPE'] == "debug": run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ f" tools/download_igbh_test.py --target-path {download_loc} " # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" env['CM_RUN_CMD'] = run_cmd @@ -49,10 +49,10 @@ def postprocess(i): env = i['env'] - env['CM_IGBH_DATASET_PATH'] = env.get( + env['CM_DATASET_IGBH_PATH'] = env.get( 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) print( - f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}") + f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") return {'return': 0} diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 0bc4b1eab1..644bf688a3 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -12,7 +12,7 @@ input_mapping: to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* -- RGAT_CHECKPOINT_PATH +- CM_ML_MODEL_RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: CM_DOWNLOAD_TOOL: diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index 2fc39c59d2..ac8feaad7a 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -19,12 +19,12 @@ def postprocess(i): env = i['env'] - if env.get('RGAT_CHECKPOINT_PATH', '') == '': - env['RGAT_CHECKPOINT_PATH'] = os.path.join( + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( env['CM_ML_MODEL_PATH'], "RGAT.pt") elif env.get('CM_ML_MODEL_PATH', '') == '': - env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index 0eb7cf4ffa..afadfa9bca 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -168,6 +168,12 @@ versions: env: CM_MLPERF_LAST_RELEASE: v3.1 CM_TMP_GIT_CHECKOUT: '' + r4.0: + env: + CM_MLPERF_LAST_RELEASE: v4.0 + r4.1: + env: + CM_MLPERF_LAST_RELEASE: v4.1 tvm: env: CM_MLPERF_LAST_RELEASE: v3.1 diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py index c9aad1ee14..16669e2d55 100644 --- a/script/get-mlperf-inference-src/customize.py +++ b/script/get-mlperf-inference-src/customize.py @@ -54,7 +54,7 @@ def preprocess(i): env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" if env.get("CM_MLPERF_LAST_RELEASE", '') == '': - env["CM_MLPERF_LAST_RELEASE"] = "v4.1" + env["CM_MLPERF_LAST_RELEASE"] = "v5.0" if 'CM_GIT_DEPTH' not in env: env['CM_GIT_DEPTH'] = '' diff --git a/script/process-mlperf-accuracy/_cm.yaml b/script/process-mlperf-accuracy/_cm.yaml index f6d9acd5e1..59544fd3ab 100644 --- a/script/process-mlperf-accuracy/_cm.yaml +++ b/script/process-mlperf-accuracy/_cm.yaml @@ -261,3 +261,7 @@ variations: env: CM_DATASET: terabyte group: dataset + igbh: + env: + CM_DATASET: igbh + group: dataset diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 381b1cdcd1..f1d8b78747 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -171,6 +171,16 @@ def preprocess(i): " --dtype " + env.get('CM_ACCURACY_DTYPE', "float32") + " > '" + out_file + "'" + elif dataset == "igbh": + if env.get('CM_DATASET_IGBH_SIZE', '') == '': + if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', + '') == "full": + env['CM_DATASET_IGBH_SIZE'] = "full" + else: + env['CM_DATASET_IGBH_SIZE'] = "tiny" + CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" + else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 5b9d4b1512..05ae0d476a 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -360,6 +360,19 @@ variations: mlperf-inference-nvidia-scratch-space: tags: _version.r4_1 group: benchmark-version + + r5.0-dev: + env: + CM_MLPERF_INFERENCE_VERSION: '5.0-dev' + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + group: benchmark-version + adr: + get-mlperf-inference-results-dir: + tags: _version.r5.0-dev + get-mlperf-inference-submission-dir: + tags: _version.r5.0-dev + mlperf-inference-nvidia-scratch-space: + tags: _version.r5.0-dev short: add_deps_recursive: From b57924302b7472c506b5deb5573b04013bf9c2fd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 9 Dec 2024 19:20:56 +0000 Subject: [PATCH 4/8] Update test_tutorial_retinanet.py | Not use dashboard --- script/test-cm-core/src/tutorials/test_tutorial_retinanet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py b/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py index 0b96f17f5a..bc8d22f783 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_retinanet.py @@ -30,7 +30,7 @@ 'name': 'mlperf'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'compiler': {'tags': "gcc"}, 'openimages-preprocessed': {'tags': '_50'}}, 'submitter': 'Community', 'implementation': 'cpp', 'hw_name': 'default', 'model': 'retinanet', 'backend': 'onnxruntime', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '10', 'clean': 'true', 'quiet': 'yes'}) From 08cb787419798d914531469893c99d0405ad7888 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 9 Dec 2024 19:22:43 +0000 Subject: [PATCH 5/8] Update test_tutorial_tvm_pip_ge.py | Dont use dashboard --- script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py index 692ddeb830..4e17d572d4 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_ge.py @@ -18,7 +18,7 @@ 'device': 'cpu', 'scenario': 'Offline', 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': { 'tags': '_pip-install'}, 'tvm-model': {'tags': '_graph_executor'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', From a32fdeb4ae3adf7d2fcc56dc30159e6516a1e135 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 9 Dec 2024 19:23:07 +0000 Subject: [PATCH 6/8] Update test_tutorial_tvm_pip_vm.py | Dont use dashboard --- script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py index 5758ad08f2..28bc0132bf 100644 --- a/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py +++ b/script/test-cm-core/src/tutorials/test_tutorial_tvm_pip_vm.py @@ -20,7 +20,7 @@ 'mode': 'accuracy', 'test_query_count': '5', 'clean': 'true', 'quiet': 'yes'}) checks.check_return(r) -r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short,_dashboard', 'adr': +r = cm.access({'action': 'run', 'automation': 'script', 'tags': 'run,mlperf,inference,generate-run-cmds,_submission,_short', 'adr': {'python': {'name': 'mlperf', 'version_min': '3.8'}, 'tvm': {'tags': '_pip-install'}}, 'submitter': 'Community', 'implementation': 'python', 'hw_name': 'default', 'model': 'resnet50', 'backend': 'tvm-onnx', 'device': 'cpu', 'scenario': 'Offline', 'test_query_count': '500', 'clean': 'true', 'quiet': 'yes'}) From 5fc6ef1ec449e6e362feb7bc66cc23febc5e7898 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 9 Dec 2024 19:30:11 +0000 Subject: [PATCH 7/8] Delete rgat.patch --- rgat.patch | 401 ----------------------------------------------------- 1 file changed, 401 deletions(-) delete mode 100644 rgat.patch diff --git a/rgat.patch b/rgat.patch deleted file mode 100644 index 7d8d956281..0000000000 --- a/rgat.patch +++ /dev/null @@ -1,401 +0,0 @@ -diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml -index 85fddc989..8fa3df206 100644 ---- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml -+++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml -@@ -482,13 +482,12 @@ deps: - ## RGAT - - tags: get,ml-model,rgat - names: -- - ml-model - - rgat-model - enable_if_env: - CM_MODEL: - - rgat - skip_if_env: -- RGAT_CHECKPOINT_PATH: -+ CM_ML_MODEL_RGAT_CHECKPOINT_PATH: - - 'on' - - ######################################################################## -@@ -620,6 +619,9 @@ deps: - enable_if_env: - CM_MODEL: - - rgat -+ skip_if_env: -+ CM_DATASET_IGBH_PATH: -+ - "on" - - ######################################################################## - # Install MLPerf inference dependencies -@@ -1224,27 +1226,45 @@ variations: - group: models - env: - CM_MODEL: rgat -+ adr: -+ pytorch: -+ version: 2.1.0 - deps: - - tags: get,generic-python-lib,_package.colorama - - tags: get,generic-python-lib,_package.tqdm - - tags: get,generic-python-lib,_package.requests - - tags: get,generic-python-lib,_package.torchdata -- - tags: get,generic-python-lib,_package.torch-geometric -- - tags: get,generic-python-lib,_package.torch-scatter -- - tags: get,generic-python-lib,_package.torch-sparse -+ version: 0.7.0 -+ - tags: get,generic-python-lib,_package.torchvision -+ version: 0.16.0 - - tags: get,generic-python-lib,_package.pybind11 - - tags: get,generic-python-lib,_package.PyYAML -+ - tags: get,generic-python-lib,_package.numpy -+ version: 1.26.4 - - tags: get,generic-python-lib,_package.pydantic - - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git -- - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html -- enable_if_env: -- CM_MLPERF_DEVICE: -- - cpu -+ -+ rgat,cuda: -+ deps: - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html -- enable_if_env: -- CM_MLPERF_DEVICE: -- - gpu -- -+ - tags: get,generic-python-lib,_package.torch-scatter -+ - tags: get,generic-python-lib,_package.torch-sparse -+ - tags: get,generic-python-lib,_package.torch-geometric -+ env: -+ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" -+ -+ rgat,cpu: -+ deps: -+ - tags: get,generic-python-lib,_package.torch-geometric -+ env: -+ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" -+ - tags: get,generic-python-lib,_package.torch-scatter -+ env: -+ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" -+ - tags: get,generic-python-lib,_package.torch-sparse -+ env: -+ CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" -+ - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html - - # Target devices - cpu: -diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py -index 8cd17e7de..dcffa5672 100644 ---- a/script/app-mlperf-inference-mlcommons-python/customize.py -+++ b/script/app-mlperf-inference-mlcommons-python/customize.py -@@ -115,10 +115,12 @@ def preprocess(i): - scenario_extra_options = '' - - NUM_THREADS = env['CM_NUM_THREADS'] -- if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": -+ if int( -+ NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu" and env['CM_MODEL'] != "rgat": - NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU - -- if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: -+ if env['CM_MODEL'] in ['resnet50', 'retinanet', -+ 'stable-diffusion-xl', 'rgat']: - scenario_extra_options += " --threads " + NUM_THREADS - - ml_model_name = env['CM_MODEL'] -@@ -485,15 +487,16 @@ def get_run_cmd_reference( - # have to add the condition for running in debug mode or real run mode - cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \ - " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \ -- " --dataset-path " + env['CM_IGBH_DATASET_PATH'] + \ -- " --device " + device.replace("cuda", "cuda:0") + \ -+ " --dataset-path " + env['CM_DATASET_IGBH_PATH'] + \ -+ " --device " + device.replace("cuda", "gpu") + \ - env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ - scenario_extra_options + mode_extra_options + \ - " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ - ' --dtype ' + dtype_rgat + \ -- " --model-path " + env['RGAT_CHECKPOINT_PATH'] + \ -- " --mlperf_conf " + \ -- os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") -+ " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] -+ -+ if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": -+ cmd += " --in-memory " - - if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]: - cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN'] -diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml -index ffb4a26b8..4c368346e 100644 ---- a/script/app-mlperf-inference/_cm.yaml -+++ b/script/app-mlperf-inference/_cm.yaml -@@ -767,6 +767,20 @@ variations: - env: - CM_MODEL: - rgat -+ posthook_deps: -+ - enable_if_env: -+ CM_MLPERF_LOADGEN_MODE: -+ - accuracy -+ - all -+ CM_MLPERF_ACCURACY_RESULTS_DIR: -+ - 'on' -+ skip_if_env: -+ CM_MLPERF_IMPLEMENTATION: -+ - nvidia -+ names: -+ - mlperf-accuracy-script -+ - 3d-unet-accuracy-script -+ tags: run,accuracy,mlperf,_igbh - - sdxl: - group: -@@ -1645,6 +1659,25 @@ variations: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - CM_MLPERF_INFERENCE_VERSION: '4.1' - -+ r5.0-dev_default: -+ group: -+ reproducibility -+ add_deps_recursive: -+ nvidia-inference-common-code: -+ version: r4.1 -+ tags: _mlcommons -+ nvidia-inference-server: -+ version: r4.1 -+ tags: _mlcommons -+ intel-harness: -+ tags: _v4.1 -+ default_env: -+ CM_SKIP_SYS_UTILS: 'yes' -+ CM_REGENERATE_MEASURE_FILES: 'yes' -+ env: -+ CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' -+ -+ - invalid_variation_combinations: - - - - retinanet -@@ -1768,6 +1801,8 @@ docker: - - "${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}:${{ CM_NVIDIA_LLAMA_DATASET_FILE_PATH }}" - - "${{ SDXL_CHECKPOINT_PATH }}:${{ SDXL_CHECKPOINT_PATH }}" - - "${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}:${{ CM_DATASET_KITS19_PREPROCESSED_PATH }}" -+ - "${{ CM_DATASET_IGBH_PATH }}:${{ CM_DATASET_IGBH_PATH }}" -+ - "${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}:${{ CM_ML_MODEL_RGAT_CHECKPOINT_PATH }}" - skip_run_cmd: 'no' - shm_size: '32gb' - interactive: True -diff --git a/script/get-cudnn/_cm.yaml b/script/get-cudnn/_cm.yaml -index b01506f6d..fa5ccd2c7 100644 ---- a/script/get-cudnn/_cm.yaml -+++ b/script/get-cudnn/_cm.yaml -@@ -19,6 +19,7 @@ default_env: - - deps: - - tags: detect,os -+- tags: detect,sudo - - names: - - cuda - skip_if_env: -diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml -index c3e78b464..4750f3ff5 100644 ---- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml -+++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml -@@ -11,7 +11,8 @@ tags: - - inference - uid: 824e61316c074253 - new_env_keys: -- - CM_IGBH_DATASET_PATH -+ - CM_DATASET_IGBH_PATH -+ - CM_DATASET_IGBH_SIZE - input_mapping: - out_path: CM_IGBH_DATASET_OUT_PATH - deps: -@@ -21,6 +22,9 @@ deps: - - tags: get,python - names: - - get-python -+ - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git -+ - tags: get,generic-python-lib,_package.colorama -+ - tags: get,generic-python-lib,_package.tqdm - prehook_deps: - #paper - - env: -@@ -359,13 +363,13 @@ variations: - default: true - group: dataset-type - env: -- CM_IGBH_DATASET_TYPE: debug -- CM_IGBH_DATASET_SIZE: tiny -+ CM_DATASET_IGBH_TYPE: debug -+ CM_DATASET_IGBH_SIZE: tiny - full: - group: dataset-type - env: -- CM_IGBH_DATASET_TYPE: full -- CM_IGBH_DATASET_SIZE: full -+ CM_DATASET_IGBH_TYPE: debug -+ CM_DATASET_IGBH_SIZE: tiny - glt: - env: - CM_IGBH_GRAPH_COMPRESS: yes -diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py -index 9d4240209..a0e6f24a6 100644 ---- a/script/get-dataset-mlperf-inference-igbh/customize.py -+++ b/script/get-dataset-mlperf-inference-igbh/customize.py -@@ -27,18 +27,18 @@ def preprocess(i): - x_sep = " && " - - # download the model -- if env['CM_IGBH_DATASET_TYPE'] == "debug": -+ if env['CM_DATASET_IGBH_TYPE'] == "debug": - run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ - f" tools/download_igbh_test.py --target-path {download_loc} " - - # split seeds - run_cmd += x_sep + \ -- f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']}" -+ f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" - - # compress graph(for glt implementation) - if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": - run_cmd += x_sep + \ -- f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_IGBH_DATASET_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" -+ f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" - - env['CM_RUN_CMD'] = run_cmd - -@@ -49,10 +49,10 @@ def postprocess(i): - - env = i['env'] - -- env['CM_IGBH_DATASET_PATH'] = env.get( -+ env['CM_DATASET_IGBH_PATH'] = env.get( - 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) - - print( -- f"Path to the IGBH dataset: {os.path.join(env['CM_IGBH_DATASET_PATH'], env['CM_IGBH_DATASET_SIZE'])}") -+ f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") - - return {'return': 0} -diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml -index 0bc4b1eab..644bf688a 100644 ---- a/script/get-ml-model-rgat/_cm.yaml -+++ b/script/get-ml-model-rgat/_cm.yaml -@@ -12,7 +12,7 @@ input_mapping: - to: CM_DOWNLOAD_PATH - new_env_keys: - - CM_ML_MODEL_* --- RGAT_CHECKPOINT_PATH -+- CM_ML_MODEL_RGAT_CHECKPOINT_PATH - prehook_deps: - - enable_if_env: - CM_DOWNLOAD_TOOL: -diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py -index 2fc39c59d..ac8feaad7 100644 ---- a/script/get-ml-model-rgat/customize.py -+++ b/script/get-ml-model-rgat/customize.py -@@ -19,12 +19,12 @@ def postprocess(i): - - env = i['env'] - -- if env.get('RGAT_CHECKPOINT_PATH', '') == '': -- env['RGAT_CHECKPOINT_PATH'] = os.path.join( -+ if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': -+ env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( - env['CM_ML_MODEL_PATH'], "RGAT.pt") - elif env.get('CM_ML_MODEL_PATH', '') == '': -- env['CM_ML_MODEL_PATH'] = env['RGAT_CHECKPOINT_PATH'] -+ env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - -- env['CM_GET_DEPENDENT_CACHED_PATH'] = env['RGAT_CHECKPOINT_PATH'] -+ env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] - - return {'return': 0} -diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml -index 0eb7cf4ff..afadfa9bc 100644 ---- a/script/get-mlperf-inference-src/_cm.yaml -+++ b/script/get-mlperf-inference-src/_cm.yaml -@@ -168,6 +168,12 @@ versions: - env: - CM_MLPERF_LAST_RELEASE: v3.1 - CM_TMP_GIT_CHECKOUT: '' -+ r4.0: -+ env: -+ CM_MLPERF_LAST_RELEASE: v4.0 -+ r4.1: -+ env: -+ CM_MLPERF_LAST_RELEASE: v4.1 - tvm: - env: - CM_MLPERF_LAST_RELEASE: v3.1 -diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py -index c9aad1ee1..16669e2d5 100644 ---- a/script/get-mlperf-inference-src/customize.py -+++ b/script/get-mlperf-inference-src/customize.py -@@ -54,7 +54,7 @@ def preprocess(i): - env["CM_GIT_URL"] = "https://github.com/mlcommons/inference" - - if env.get("CM_MLPERF_LAST_RELEASE", '') == '': -- env["CM_MLPERF_LAST_RELEASE"] = "v4.1" -+ env["CM_MLPERF_LAST_RELEASE"] = "v5.0" - - if 'CM_GIT_DEPTH' not in env: - env['CM_GIT_DEPTH'] = '' -diff --git a/script/process-mlperf-accuracy/_cm.yaml b/script/process-mlperf-accuracy/_cm.yaml -index f6d9acd5e..59544fd3a 100644 ---- a/script/process-mlperf-accuracy/_cm.yaml -+++ b/script/process-mlperf-accuracy/_cm.yaml -@@ -261,3 +261,7 @@ variations: - env: - CM_DATASET: terabyte - group: dataset -+ igbh: -+ env: -+ CM_DATASET: igbh -+ group: dataset -diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py -index 381b1cdcd..f1d8b7874 100644 ---- a/script/process-mlperf-accuracy/customize.py -+++ b/script/process-mlperf-accuracy/customize.py -@@ -171,6 +171,16 @@ def preprocess(i): - " --dtype " + env.get('CM_ACCURACY_DTYPE', - "float32") + " > '" + out_file + "'" - -+ elif dataset == "igbh": -+ if env.get('CM_DATASET_IGBH_SIZE', '') == '': -+ if env.get('CM_MLPERF_SUBMISSION_GENERATION_STYLE', -+ '') == "full": -+ env['CM_DATASET_IGBH_SIZE'] = "full" -+ else: -+ env['CM_DATASET_IGBH_SIZE'] = "tiny" -+ CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( -+ result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" -+ - else: - return {'return': 1, 'error': 'Unsupported dataset'} - -diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml -index 5b9d4b151..05ae0d476 100644 ---- a/script/run-mlperf-inference-app/_cm.yaml -+++ b/script/run-mlperf-inference-app/_cm.yaml -@@ -360,6 +360,19 @@ variations: - mlperf-inference-nvidia-scratch-space: - tags: _version.r4_1 - group: benchmark-version -+ -+ r5.0-dev: -+ env: -+ CM_MLPERF_INFERENCE_VERSION: '5.0-dev' -+ CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default -+ group: benchmark-version -+ adr: -+ get-mlperf-inference-results-dir: -+ tags: _version.r5.0-dev -+ get-mlperf-inference-submission-dir: -+ tags: _version.r5.0-dev -+ mlperf-inference-nvidia-scratch-space: -+ tags: _version.r5.0-dev - - short: - add_deps_recursive: From d96064708e04ac66519ac4e72636883c65477ba1 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 10 Dec 2024 04:16:07 +0000 Subject: [PATCH 8/8] Update _cm.yaml --- script/get-mlperf-inference-src/_cm.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index 6a657e32d0..c5e195a889 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -167,9 +167,15 @@ versions: CM_MLPERF_LAST_RELEASE: v3.1 CM_TMP_GIT_CHECKOUT: '' r4.0: + adr: + inference-git-repo: + tags: _tag.v4.0 env: CM_MLPERF_LAST_RELEASE: v4.0 r4.1: + adr: + inference-git-repo: + tags: _tag.v4.1 env: CM_MLPERF_LAST_RELEASE: v4.1 tvm: