From 4897ededd5fe085127035a09da0a74c752f6a745 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Fri, 13 Dec 2024 07:21:09 +0000 Subject: [PATCH 01/34] [Automated Commit] Format Codebase --- script/get-docker/customize.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 30ccf627d..fbf0a5bbf 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -52,9 +52,8 @@ def detect_version(i): if "podman" in r['string'].lower(): tool = "podman" - print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version, "tool":tool} + return {'return': 0, 'version': version, "tool": tool} def postprocess(i): @@ -76,7 +75,7 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version env['CM_DOCKER_VERSION'] = version - + env['CM_CONTAINER_TOOL'] = tool return {'return': 0, 'version': version} From 51d4fdf78a0cad333fc7e7dd25176369b8d7ccf1 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 09:07:47 +0000 Subject: [PATCH 02/34] Fixes for rgat submission generation --- script/get-ml-model-rgat/_cm.yaml | 1 + script/process-mlperf-accuracy/customize.py | 24 ++++++++++++++------- script/run-mlperf-inference-app/_cm.yaml | 2 ++ 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index d7615acd2..c4ecc56e0 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -63,4 +63,5 @@ variations: group: download-tool rclone,fp32: env: + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index f1d8b7874..254c51772 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -118,18 +118,24 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += f" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}' " if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += f" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " else: - extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + extra_options += f""" --compliance-images-path '{ + os.path.join( + result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += f" --ids-path '{ + env['CM_COCO2014_SAMPLE_ID_PATH']}' " if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += f" --device '{ + env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -162,9 +168,11 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += f" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += f" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' " CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ @@ -179,7 +187,7 @@ def preprocess(i): else: env['CM_DATASET_IGBH_SIZE'] = "tiny" CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( - result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'" else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 689aaabc3..12b57a6f7 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -344,6 +344,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '4.1' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v4.1 adr: get-mlperf-inference-results-dir: tags: _version.r4_1 @@ -357,6 +358,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 group: benchmark-version adr: get-mlperf-inference-results-dir: From 7ecb64ae71ad276d7e30614229d0efbd57b23b82 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Fri, 13 Dec 2024 09:08:05 +0000 Subject: [PATCH 03/34] [Automated Commit] Format Codebase --- script/process-mlperf-accuracy/customize.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 254c51772..579b1eedd 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -118,11 +118,11 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{ + extra_options += f" - -statistics - path '{ env['CM_SDXL_STATISTICS_FILE_PATH']}' " if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{ + extra_options += f" - -compliance - images - path '{ env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " else: extra_options += f""" --compliance-images-path '{ @@ -130,11 +130,11 @@ def preprocess(i): result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{ + extra_options += f" - -ids - path '{ env['CM_COCO2014_SAMPLE_ID_PATH']}' " if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{ + extra_options += f" - -device '{ env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] @@ -168,10 +168,10 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{ + extra_options += f" - -aggregation - trace - file '{ env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{ + extra_options += f" - -day - 23 - file '{ env['CM_DLRM_V2_DAY23_FILE_PATH']}' " CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, From 77da71d08c6ba14460029f741d4bf8ce304811da Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 09:19:34 +0000 Subject: [PATCH 04/34] Fix issue with autoformat --- script/process-mlperf-accuracy/customize.py | 34 +++++++++++++-------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 579b1eedd..0926f332f 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -118,24 +118,30 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" - -statistics - path '{ - env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += ( + f""" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}'""" + ) if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" - -compliance - images - path '{ - env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += ( + f""" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' """ + ) else: extra_options += f""" --compliance-images-path '{ os.path.join( result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" - -ids - path '{ - env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += ( + f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + ) if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" - -device '{ - env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += ( + f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + ) # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -168,11 +174,15 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" - -aggregation - trace - file '{ - env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += ( + f" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" - -day - 23 - file '{ - env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += ( + f" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ From c6ba03982103a5ded10b4cef5ebf4d361bfb9ec0 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Fri, 13 Dec 2024 09:19:54 +0000 Subject: [PATCH 05/34] [Automated Commit] Format Codebase --- script/process-mlperf-accuracy/customize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 0926f332f..698227d86 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -175,12 +175,12 @@ def preprocess(i): extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': extra_options += ( - f" --aggregation-trace-file '{ + f" - -aggregation - trace - file '{ env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': extra_options += ( - f" --day-23-file '{ + f" - -day - 23 - file '{ env['CM_DLRM_V2_DAY23_FILE_PATH']}' " ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", From 85b975a6d4f1831b4d5b9966931df5f5ebe1a0a2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 09:22:55 +0000 Subject: [PATCH 06/34] Fix issue with autoformat --- script/process-mlperf-accuracy/customize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index 698227d86..21569fd57 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -175,13 +175,13 @@ def preprocess(i): extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': extra_options += ( - f" - -aggregation - trace - file '{ - env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + f""" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' """ ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': extra_options += ( - f" - -day - 23 - file '{ - env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + f""" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' """ ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, From 541b27c6e09e33afa2bc5e3c8cdb5e88e3ff9a85 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 09:24:18 +0000 Subject: [PATCH 07/34] Update test-mlperf-inference-rgat.yml --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index de5b0fbb6..9f2f5a307 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: From deeaa9df4ff237640305085e36e158b116dc8cfb Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 11:09:56 +0000 Subject: [PATCH 08/34] Update test-mlperf-inference-rgat.yml --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 9f2f5a307..478e666c6 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: From 18cacd453896cb131637e784864d84dc8f1a0513 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 12:17:13 +0000 Subject: [PATCH 09/34] Update _cm.yaml --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 796d5674e..eac7179b7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -28,6 +28,7 @@ deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm + prehook_deps: #paper - env: From c1dd2078b1a2200be5ccba7bef0426b5cc57bf4d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 14:08:57 +0000 Subject: [PATCH 10/34] Make r5.0-dev the default version for mlperf-inference --- script/run-mlperf-inference-app/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 12b57a6f7..8fe9b88d1 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -327,7 +327,6 @@ variations: tags: _version.r4_0-dev r4.1-dev: - default: true env: CM_MLPERF_INFERENCE_VERSION: '4.1-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default @@ -355,6 +354,7 @@ variations: group: benchmark-version r5.0-dev: + default: true env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default From ed22615425428a6659e10a12e3a96fac8e28a901 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 14:26:26 +0000 Subject: [PATCH 11/34] Update _cm.yaml --- script/app-image-classification-onnx-py/_cm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/script/app-image-classification-onnx-py/_cm.yaml b/script/app-image-classification-onnx-py/_cm.yaml index 740a8a18a..e53b91ec2 100644 --- a/script/app-image-classification-onnx-py/_cm.yaml +++ b/script/app-image-classification-onnx-py/_cm.yaml @@ -22,7 +22,6 @@ default_env: deps: - tags: detect,os -#- tags: get,sys-utils-cm - names: - python - python3 From f0326b235843f7f77ba39a1eb0fdf5b129a2be16 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 14:41:17 +0000 Subject: [PATCH 12/34] Update CM_MLPERF_LAST_RELEASE for get-mlperf-inference-src --- script/get-mlperf-inference-src/_cm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index c100e32e8..b8bd39092 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -134,10 +134,10 @@ variations: versions: custom: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 deepsparse: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: deepsparse CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: From 6c4d3a4e536ead50363f9a75d3d26c7200aa9bc0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 14:42:07 +0000 Subject: [PATCH 13/34] Update test-mlperf-inference-rgat.yml --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 87a9314ca..1902eff54 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -1,7 +1,7 @@ name: MLPerf inference rgat on: - pull_request_target: + pull_request: branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-rgat.yml' From 5ee6846a8cd73ce58a48fa21c4551c5c8ca66023 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 15:00:16 +0000 Subject: [PATCH 14/34] Pre-create results and measurements dirs for mlperf-inference submission generation --- .../customize.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 040e7cb09..8d4010a18 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -160,11 +160,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{system_meta_tmp['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + sw_notes = f"{ + system_meta_tmp['sw_notes']} { + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{system_meta_tmp['hw_notes']} {env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + hw_notes = f"{ + system_meta_tmp['hw_notes']} { + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) @@ -297,6 +301,10 @@ def generate_submission(env, state, inp, submission_division): system_path = os.path.join(path_submission, "systems") submission_system_path = system_path + if not os.path.isdir(submission_path): + os.makedirs(submission_path) + if not os.path.isdir(measurement_path): + os.makedirs(measurement_path) if not os.path.isdir(submission_system_path): os.makedirs(submission_system_path) system_file = os.path.join(submission_system_path, sub_res + ".json") From 06b31af62ba4f80e6ac0a23001cbc992b4b7d549 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 15:30:08 +0000 Subject: [PATCH 15/34] Pre-create results and measurements dirs for mlperf-inference submission generation --- script/generate-mlperf-inference-submission/customize.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 8d4010a18..a27086237 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -160,15 +160,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{ + sw_notes = f"""{ system_meta_tmp['sw_notes']} { - env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}""" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{ + hw_notes = f"""{ system_meta_tmp['hw_notes']} { - env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}""" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) From 4b9a98881dd06a9e60e999dec74f45616982d74c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 15:31:46 +0000 Subject: [PATCH 16/34] Use master branch of inference-src for rgat gh action --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 1902eff54..03941023e 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --adr.mlperf-implementation.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: From 2861fa9e6f7faf33a24bf6975df97e011359b17c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 01:20:03 +0000 Subject: [PATCH 17/34] pytorch version_max change to 2.4.0 from 2.4.1 --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 89646244b..45401431f 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1239,8 +1239,8 @@ variations: CM_MODEL: rgat add_deps_recursive: pytorch: - version_max: "2.4.1" - version_max_usable: "2.4.1" + version_max: "2.4.0" + version_max_usable: "2.4.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm From 8cf179637ce78a9d310496a49b766df102d65645 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 15:51:04 +0000 Subject: [PATCH 18/34] Fix typo in igbh download --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index eac7179b7..eacd5be5c 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -34,7 +34,7 @@ prehook_deps: - env: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 - CM_DOWNLOAD_FILENAME: node_feet.npy + CM_DOWNLOAD_FILENAME: node_feat.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat force_cache: true From 5307dc42bf1b0e0f263995f55219df15c143dc29 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 15 Dec 2024 12:05:23 +0530 Subject: [PATCH 19/34] Improvements to gh-actions-runner --- script/get-gh-actions-runner/_cm.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/script/get-gh-actions-runner/_cm.yaml b/script/get-gh-actions-runner/_cm.yaml index 3008f6365..287ee254a 100644 --- a/script/get-gh-actions-runner/_cm.yaml +++ b/script/get-gh-actions-runner/_cm.yaml @@ -6,6 +6,7 @@ can_force_cache: true tags: - get - gh +- github - actions-runner - runner-code - runner @@ -29,21 +30,27 @@ deps: variations: config: + group: command + default: true env: CM_GH_ACTIONS_RUNNER_COMMAND: config remove: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: remove install: + group: command deps: - tags: get,gh,actions-runner,_config force_cache: yes env: CM_GH_ACTIONS_RUNNER_COMMAND: install uninstall: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: uninstall start: + group: command deps: - tags: get,gh,actions-runner,_install force_cache: yes From 311cc3fc067a99c6ec2afd900b64034160655923 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 15 Dec 2024 09:52:56 +0000 Subject: [PATCH 20/34] Update test-nvidia-mlperf-inference-implementations.yml --- .../workflows/test-nvidia-mlperf-inference-implementations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 2974e651c..449fd033b 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - system: [ "GO-spr", "phoenix", "i9" ] + system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] python-version: [ "3.12" ] model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ] exclude: From 3f1788937b62697ab4453ebf9074e8f20ae8a8e3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 15 Dec 2024 17:32:17 +0000 Subject: [PATCH 21/34] Update test-mlperf-inference-sdxl.yaml --- .github/workflows/test-mlperf-inference-sdxl.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index aea41cee2..59d555449 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -21,5 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions From 39b29e59f35b34302111eb3b6403dc320203300a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 09:45:09 +0000 Subject: [PATCH 22/34] Update test-mlperf-inference-gptj.yml --- .github/workflows/test-mlperf-inference-gptj.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 0562b9176..5416a3012 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -26,6 +26,6 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions From f7a95ef9da3925b082d8ad1c2ba5d4554e5db155 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 09:48:45 +0000 Subject: [PATCH 23/34] Update test-amd-mlperf-inference-implementations.yml --- .github/workflows/test-amd-mlperf-inference-implementations.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index b635d266e..785035aa1 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes + cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c From 294491c15c7f7e033f4883b34a317167af4aeddc Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 09:49:47 +0000 Subject: [PATCH 24/34] Update test-mlperf-inference-llama2.yml --- .github/workflows/test-mlperf-inference-llama2.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index ab1c9bb48..e25155236 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -31,5 +31,5 @@ jobs: pip install "huggingface_hub[cli]" git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions From 5d0c53dde7a1e25941e92b1fa60da00c66035111 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 09:51:10 +0000 Subject: [PATCH 25/34] Update test-mlperf-inference-mixtral.yml --- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 0a6a37708..e6800acb0 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -30,5 +30,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions From 65f86e69f921532f06cac19c3e871c95f9354e29 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 12:59:18 +0000 Subject: [PATCH 26/34] Update test-mlperf-inference-dlrm.yml --- .github/workflows/test-mlperf-inference-dlrm.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 6440d0448..4d7727457 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' @@ -45,4 +45,4 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean From 973e319b65e2ea7ebdddbf385b8bbf8b1a404a5f Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 13:07:08 +0000 Subject: [PATCH 27/34] Update test-scc24-sdxl.yaml --- .github/workflows/test-scc24-sdxl.yaml | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 673dba804..012f0a99a 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -9,7 +9,7 @@ jobs: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr ] env: - CM_REPOS: $HOME/GH_CM + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -27,14 +27,16 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions build_nvidia: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr] + env: + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -52,7 +54,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions From 01b4d9c16212c01f2e88d486e1afa08566095c1e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 16 Dec 2024 13:12:36 +0000 Subject: [PATCH 28/34] Update test-scc24-sdxl.yaml --- .github/workflows/test-scc24-sdxl.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 012f0a99a..fec9d7dbe 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -27,8 +27,8 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions @@ -55,6 +55,6 @@ jobs: pip install tabulate cm pull repo cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions From 0b97dc4c2d95b2e99b2037fd5920cb0f3d2d03ee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 18 Dec 2024 19:43:39 +0530 Subject: [PATCH 29/34] Use dev branch and not fork for mlperf inference test runs --- .../workflows/test-amd-mlperf-inference-implementations.yml | 2 +- ...erf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml | 2 +- .github/workflows/test-mlperf-inference-gptj.yml | 2 +- .github/workflows/test-mlperf-inference-llama2.yml | 6 +++--- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- .../test-mlperf-inference-mlcommons-cpp-resnet50.yml | 2 +- .github/workflows/test-mlperf-inference-resnet50.yml | 2 +- .github/workflows/test-mlperf-inference-retinanet.yml | 2 +- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- .github/workflows/test-mlperf-inference-sdxl.yaml | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index 785035aa1..2e140c32e 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -23,4 +23,4 @@ jobs: pip install --upgrade cm4mlops cm pull repo cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes - # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c + # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=dev --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 3594aaf86..9aa9b8293 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -60,4 +60,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 5416a3012..db0ed5923 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -27,5 +27,5 @@ jobs: python3 -m pip install cm4mlops cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index e25155236..6a8c1adb8 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: MLPerf inference LLAMA 2 70B +name: MLPerf inference LLAMA2-70B on: schedule: @@ -20,7 +20,7 @@ jobs: precision: [ "bfloat16" ] steps: - - name: Test MLPerf Inference LLAMA 2 70B reference implementation + - name: Test MLPerf Inference LLAMA2-70B reference implementation run: | source gh_action/bin/deactivate || python3 -m venv gh_action source gh_action/bin/activate @@ -32,4 +32,4 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index e6800acb0..b29341119 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -31,4 +31,4 @@ jobs: huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index ff856ad54..72b0d1fe3 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -59,4 +59,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 4388e4bb9..54cb7c91c 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index eac9346fe..a319f6772 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 03941023e..259edab72 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -45,4 +45,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index 59d555449..03ae46d4e 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -22,4 +22,4 @@ jobs: python3 -m pip install cm4mlops cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions From aa5182e4aa57ab5b9f5465ee5a63253756641bb2 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 11:01:53 +0000 Subject: [PATCH 30/34] Update setup.py | Use default branch in setup.py --- setup.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index a6d5a73f7..8cc2aec00 100644 --- a/setup.py +++ b/setup.py @@ -145,11 +145,12 @@ def custom_function(self): 'artifact': 'mlcommons@cm4mlops', 'force': True, 'all': True}) + branch = os.environ.get('CM_MLOPS_REPO_BRANCH', 'dev') r = cmind.access({'action': 'pull', 'automation': 'repo', 'artifact': 'mlcommons@mlperf-automations', - 'checkout': commit_hash}) - # r = cmind.access({'action':'pull', 'automation':'repo', 'artifact':'mlcommons@mlperf-automations', 'checkout': commit_hash}) + 'checkout': commit_hash, + 'branch': branch}) print(r) if r['return'] > 0: return r['return'] From 597435f3922d1ef43002ecd0bfec490514ce2fee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 01:14:45 +0530 Subject: [PATCH 31/34] Support nvmitten for aarch64 --- script/app-mlperf-inference/_cm.yaml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index c57003c10..f3ef84523 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -338,12 +338,16 @@ variations: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - skip_if_env: CM_HOST_PLATFORM_FLAVOR: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-aarch64-GraceHopper-release + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' @@ -1627,7 +1631,7 @@ variations: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' #uses public code for inference v4.1 @@ -1646,8 +1650,6 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' - env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' r4.1_default: group: From f07bf30d13b6146c183ad2020b446789e30625e4 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 19:50:21 +0000 Subject: [PATCH 32/34] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 592e815ea..e196726d2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.12 +0.6.13 From 2018c6f65fb7e8925bc3f26c9aab83b17e693c9a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:05:13 +0000 Subject: [PATCH 33/34] Copy bert model for nvidia mlperf inference implementation instead of softlink --- script/app-mlperf-inference-nvidia/customize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index a09fd9715..36324cd0c 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -87,13 +87,13 @@ def preprocess(i): if not os.path.exists(fp32_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") if not os.path.exists(int8_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") if not os.path.exists(vocab_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"cp -r {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") model_name = "bert" model_path = fp32_model_path From 85af5f8d784b3408fe76d8c25c2c6f166c686b26 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:06:58 +0000 Subject: [PATCH 34/34] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e196726d2..fcbaa8478 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.13 +0.6.14