From 9526dacf3adfca2ecd8e4995e4cd717b73de7500 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 08:43:47 +0000 Subject: [PATCH 01/63] Fixes for igbh dataset download --- .../_cm.yaml | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 4750f3ff5..5af7233d7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -24,7 +24,7 @@ deps: - get-python - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama - - tags: get,generic-python-lib,_package.tqdm + prehook_deps: #paper - env: @@ -39,6 +39,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -55,6 +56,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-label19 tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -71,6 +73,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-label2k tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -87,6 +90,7 @@ prehook_deps: - 'full' names: - dae + - paper-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -104,6 +108,7 @@ prehook_deps: - 'full' names: - dae + - edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -121,6 +126,7 @@ prehook_deps: - 'full' names: - dae + - author-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -137,6 +143,7 @@ prehook_deps: - 'full' names: - dae + - author-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -154,6 +161,7 @@ prehook_deps: - 'full' names: - dae + - conference-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -170,6 +178,7 @@ prehook_deps: - 'full' names: - dae + - conference-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -187,6 +196,7 @@ prehook_deps: - 'full' names: - dae + - institute-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -203,6 +213,7 @@ prehook_deps: - 'full' names: - dae + - institute-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -220,6 +231,7 @@ prehook_deps: - 'full' names: - dae + - journal-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -236,6 +248,7 @@ prehook_deps: - 'full' names: - dae + - journal-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -253,6 +266,7 @@ prehook_deps: - 'full' names: - dae + - fos-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -269,6 +283,7 @@ prehook_deps: - 'full' names: - dae + - fos-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -286,6 +301,7 @@ prehook_deps: - 'full' names: - dae + - author-to-institute-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -303,6 +319,7 @@ prehook_deps: - 'full' names: - dae + - paper-published-journal-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -320,6 +337,7 @@ prehook_deps: - 'full' names: - dae + - paper-topic-fos-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -337,6 +355,7 @@ prehook_deps: - 'full' names: - dae + - paper-venue-conference-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -354,6 +373,7 @@ prehook_deps: - 'full' names: - dae + - paper-written-by-author-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -363,13 +383,13 @@ variations: default: true group: dataset-type env: - CM_DATASET_IGBH_TYPE: debug - CM_DATASET_IGBH_SIZE: tiny + CM_IGBH_DATASET_TYPE: debug + CM_IGBH_DATASET_SIZE: tiny full: group: dataset-type env: - CM_DATASET_IGBH_TYPE: debug - CM_DATASET_IGBH_SIZE: tiny + CM_IGBH_DATASET_TYPE: full + CM_IGBH_DATASET_SIZE: full glt: env: CM_IGBH_GRAPH_COMPRESS: yes From 663d6be109920510f91bea60a5538074931f56cb Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 10:50:00 +0000 Subject: [PATCH 02/63] fixes for rgat reference implementation --- .../_cm.yaml | 16 ++-- .../customize.py | 2 +- .../_cm.yaml | 90 +++++++++---------- .../customize.py | 8 +- script/get-ml-model-rgat/_cm.yaml | 1 + script/get-ml-model-rgat/customize.py | 1 + script/get-mlperf-inference-src/_cm.yaml | 4 +- 7 files changed, 61 insertions(+), 61 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 8fa3df206..cd0e9fa6d 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -487,7 +487,7 @@ deps: CM_MODEL: - rgat skip_if_env: - CM_ML_MODEL_RGAT_CHECKPOINT_PATH: + RGAT_CHECKPOINT_PATH: - 'on' ######################################################################## @@ -1226,27 +1226,23 @@ variations: group: models env: CM_MODEL: rgat - adr: - pytorch: - version: 2.1.0 deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - version: 0.7.0 - tags: get,generic-python-lib,_package.torchvision - version: 0.16.0 - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy - version: 1.26.4 - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git rgat,cuda: deps: - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html + - tags: get,generic-python-lib,_package.dgl + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - tags: get,generic-python-lib,_package.torch-scatter - tags: get,generic-python-lib,_package.torch-sparse - tags: get,generic-python-lib,_package.torch-geometric @@ -1264,7 +1260,9 @@ variations: - tags: get,generic-python-lib,_package.torch-sparse env: CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html + - tags: get,generic-python-lib,_package.dgl + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index dcffa5672..93b62873b 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -493,7 +493,7 @@ def get_run_cmd_reference( scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + " --model-path " + env['RGAT_CHECKPOINT_PATH'] if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": cmd += " --in-memory " diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 5af7233d7..4c905ce93 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -14,7 +14,7 @@ new_env_keys: - CM_DATASET_IGBH_PATH - CM_DATASET_IGBH_SIZE input_mapping: - out_path: CM_IGBH_DATASET_OUT_PATH + out_path: CM_DATASET_IGBH_OUT_PATH deps: - tags: mlperf,inference,source names: @@ -31,11 +31,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 CM_DOWNLOAD_FILENAME: node_feet.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -48,11 +48,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_19.npy CM_DOWNLOAD_CHECKSUM: be6fda45566e679bdb05ebea98ad16d4 CM_DOWNLOAD_FILENAME: node_label_19.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_19 force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -65,11 +65,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_2K.npy CM_DOWNLOAD_CHECKSUM: 6eccab9a14f92f42be5b367c39002031 CM_DOWNLOAD_FILENAME: node_label_2K.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_2K force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -82,11 +82,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/paper_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: f70dd642a4f7e41d926c91c8c054fc4c CM_DOWNLOAD_FILENAME: paper_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -100,11 +100,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__cites__paper/edge_index.npy CM_DOWNLOAD_CHECKSUM: f4897f53636c04a9c66f6063ec635c16 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -118,11 +118,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/author_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 58c15aab7dae03bbd57e6a4ac5e61bd9 CM_DOWNLOAD_FILENAME: author_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,author_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -135,11 +135,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/node_feat.npy CM_DOWNLOAD_CHECKSUM: 2ec2512b554088381c04ec013e893c8d CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -153,11 +153,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/conference_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 0bf7c555d8c697b31b6af6c4cb6b6612 CM_DOWNLOAD_FILENAME: conference_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -170,11 +170,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/node_feat.npy CM_DOWNLOAD_CHECKSUM: 898ff529b8cf972261fedd50df6377f8 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -188,11 +188,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/institute_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 03fb45eafb7bd35875ef4c7cd2a299a9 CM_DOWNLOAD_FILENAME: institute_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -205,11 +205,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/node_feat.npy CM_DOWNLOAD_CHECKSUM: 12eaeced22d17b4e97d4b4742331c819 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -223,11 +223,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/journal_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: b630c20852b76d17a5c9c37b39176f69 CM_DOWNLOAD_FILENAME: journal_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -240,11 +240,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/node_feat.npy CM_DOWNLOAD_CHECKSUM: 49d51b554b3004f10bee19d1c7f9b416 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -258,11 +258,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/fos_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 0f0cfde619361cde35d3be9f201d081a CM_DOWNLOAD_FILENAME: fos_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -275,11 +275,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/node_feat.npy CM_DOWNLOAD_CHECKSUM: 3ef3df19e2475c387fec10bac82773df CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -293,11 +293,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author__affiliated_to__institute/edge_index.npy CM_DOWNLOAD_CHECKSUM: e35dba208f81e0987207f78787c75711 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -311,11 +311,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__published__journal/edge_index.npy CM_DOWNLOAD_CHECKSUM: 38505e83bde8e5cf94ae0a85afa60e13 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ extra_cache_tags: dataset,igbh,paper_published_journal,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -329,11 +329,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__topic__fos/edge_index.npy CM_DOWNLOAD_CHECKSUM: 427fb350a248ee6eaa8c21cde942fda4 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -347,11 +347,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__venue__conference/edge_index.npy CM_DOWNLOAD_CHECKSUM: 541b8d43cd93579305cfb71961e10a7d CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -365,11 +365,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__written_by__author/edge_index.npy CM_DOWNLOAD_CHECKSUM: df39fe44bbcec93a640400e6d81ffcb5 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -383,13 +383,13 @@ variations: default: true group: dataset-type env: - CM_IGBH_DATASET_TYPE: debug - CM_IGBH_DATASET_SIZE: tiny + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny full: group: dataset-type env: - CM_IGBH_DATASET_TYPE: full - CM_IGBH_DATASET_SIZE: full + CM_DATASET_IGBH_TYPE: full + CM_DATASET_IGBH_SIZE: full glt: env: CM_IGBH_GRAPH_COMPRESS: yes diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py index a0e6f24a6..8f789bcad 100644 --- a/script/get-dataset-mlperf-inference-igbh/customize.py +++ b/script/get-dataset-mlperf-inference-igbh/customize.py @@ -19,9 +19,9 @@ def preprocess(i): graph_folder = os.path.join( env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') - download_loc = env.get('CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) - env['CM_IGBH_DATASET_DOWNLOAD_LOCATION'] = download_loc + env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc run_cmd += f"cd {graph_folder} " x_sep = " && " @@ -33,7 +33,7 @@ def preprocess(i): # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} " # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": @@ -50,7 +50,7 @@ def postprocess(i): env = i['env'] env['CM_DATASET_IGBH_PATH'] = env.get( - 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + 'CM_DATASET_IGBH_OUT_PATH', os.getcwd()) print( f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 644bf688a..d7615acd2 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -13,6 +13,7 @@ input_mapping: new_env_keys: - CM_ML_MODEL_* - CM_ML_MODEL_RGAT_CHECKPOINT_PATH +- RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: CM_DOWNLOAD_TOOL: diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index ac8feaad7..df810a5ab 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -25,6 +25,7 @@ def postprocess(i): elif env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index c5e195a88..c100e32e8 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -142,11 +142,11 @@ versions: CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: main master: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: master r2.1: env: From e0b6ded653a16e08c199222f9f5bff9f4726b615 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 11:28:27 +0000 Subject: [PATCH 03/63] Added tqdm deps for get-dataset-igbh --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 4c905ce93..83cfe457e 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -24,6 +24,7 @@ deps: - get-python - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm prehook_deps: #paper From 45a08cb6f144807384d3601ec947d30f8f6acb43 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:11:24 +0530 Subject: [PATCH 04/63] Fix old repo name usage --- automation/script/module_misc.py | 6 +++--- script/app-mlperf-inference/_cm.yaml | 4 ++-- script/app-mlperf-inference/customize.py | 2 +- script/build-dockerfile/customize.py | 4 ++-- script/generate-mlperf-inference-submission/_cm.yaml | 4 ++-- script/test-cm-core/src/script/test_docker.py | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 9cae72b07..5de7f0402 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1634,12 +1634,12 @@ def dockerfile(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) cm_repo_branch = i.get( 'docker_cm_repo_branch', docker_settings.get( 'cm_repo_branch', - 'mlperf-inference')) + 'main')) cm_repo_flags = i.get( 'docker_cm_repo_flags', @@ -2295,7 +2295,7 @@ def docker(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) docker_path = i.get('docker_path', '').strip() if docker_path == '': diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 4c368346e..eb536e8e5 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1808,8 +1808,8 @@ docker: interactive: True extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev real_run: False os_version: '22.04' docker_input_mapping: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 41fd8570b..f62ae947d 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -356,7 +356,7 @@ def postprocess(i): host_info['system_name'] = env['CM_HOST_SYSTEM_NAME'] # Check CM automation repository - repo_name = 'mlcommons@cm4mlops' + repo_name = 'mlcommons@mlperf-automations' repo_hash = '' r = cm.access({'action': 'find', 'automation': 'repo', 'artifact': 'mlcommons@cm4mlops,9e97bb72b0474657'}) diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index f5cd06204..9c6012aa4 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -131,7 +131,7 @@ def preprocess(i): print( f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") else: - cm_mlops_repo = "mlcommons@cm4mlops" + cm_mlops_repo = "mlcommons@mlperf-automations" cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}" @@ -299,7 +299,7 @@ def preprocess(i): f.write(EOL + '# Download CM repo for scripts' + EOL) if use_copy_repo: - docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops" + docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@mlperf-automations" f.write( f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml index 32003a1b3..064cbc307 100644 --- a/script/generate-mlperf-inference-submission/_cm.yaml +++ b/script/generate-mlperf-inference-submission/_cm.yaml @@ -31,8 +31,8 @@ deps: - 'on' tags: get,mlperf,submission,dir docker: - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev deps: - names: get-mlperf-inference-results-dir skip_if_env: diff --git a/script/test-cm-core/src/script/test_docker.py b/script/test-cm-core/src/script/test_docker.py index ad867a2a1..1b63631c6 100644 --- a/script/test-cm-core/src/script/test_docker.py +++ b/script/test-cm-core/src/script/test_docker.py @@ -10,7 +10,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', @@ -27,7 +27,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', From 0b5bcfeca40bb9f7c0636c10e5765d9c5db87aff Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 11:53:45 +0000 Subject: [PATCH 05/63] Fix for avoiding user prompt in download-igbh --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 83cfe457e..796d5674e 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -15,6 +15,8 @@ new_env_keys: - CM_DATASET_IGBH_SIZE input_mapping: out_path: CM_DATASET_IGBH_OUT_PATH +env: + SKIP_USER_PROMPT: yes deps: - tags: mlperf,inference,source names: @@ -22,7 +24,7 @@ deps: - tags: get,python names: - get-python - - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/gateoverflow/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm From 25d903be1648a615bc108d864d6709661b325f27 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:26:51 +0530 Subject: [PATCH 06/63] Remove deprecated gui usage --- script/run-mlperf-inference-app/_cm.yaml | 9 - script/run-mlperf-inference-app/customize.py | 584 ------------------- 2 files changed, 593 deletions(-) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 05ae0d476..689aaabc3 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -8,9 +8,6 @@ category: Modular MLPerf inference benchmark pipeline developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" -gui: - title: CM GUI to run MLPerf inference benchmarks and prepare submissions - clean_output_files: - open.tar.gz - summary.csv @@ -202,11 +199,6 @@ variations: env: CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' - dashboard: - default_gui: false - env: - CM_MLPERF_DASHBOARD: 'on' - find-performance: env: CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' @@ -396,7 +388,6 @@ variations: submission: base: - all-modes - default_gui: true default_variations: submission-generation-style: full env: diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 9b5f350d4..480beda5a 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -433,587 +433,3 @@ def get_url(url, path, path2, name, text): url_online = '[{}]({})'.format(text, urlx) return {'return': 0, 'url_online': url_online} - -########################################################################## - - -def gui(i): - - params = i['params'] - st = i['st'] - - script_meta = i['meta'] - - misc = i['misc_module'] - - script_path = i['script_path'] - script_url = i.get('script_url', '') - script_tags = i.get('script_tags', '') - - compute_meta = i.get('compute_meta', {}) - compute_tags = compute_meta.get('tags', []) - bench_meta = i.get('bench_meta', {}) - - compute_uid = compute_meta.get('uid', '') - bench_uid = bench_meta.get('uid', '') - - st_inputs_custom = {} - - bench_input = bench_meta.get('bench_input', {}) - - end_html = '' - - extra = {} - add_to_st_inputs = {} - - inp = script_meta['input_description'] - - # Here we can update params - v = compute_meta.get('mlperf_inference_device') - if v is not None and v != '': - inp['device']['force'] = v - - if v in ['tpu', 'gaudi']: - st.markdown('----') - st.markdown( - '**WARNING: unified CM workflow support for this hardware is pending - please [feel free to help](https://discord.gg/JjWNWXKxwT)!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - elif 'orin' in compute_tags: - st.markdown('----') - st.markdown( - '**WARNING: we need to encode CM knowledge from [this Orin setp](https://github.com/mlcommons/ck/blob/master/docs/mlperf/setup/setup-nvidia-jetson-orin.md) to this GUI!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - st.markdown('---') - st.markdown('**How would you like to run the MLPerf inference benchmark?**') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_device', - 'desc': inp['device']}) - device = r.get('value2') - inp['device']['force'] = device - - if device == 'cpu': - inp['implementation']['choices'] = ['mlcommons-python', - 'mlcommons-cpp', 'intel', 'ctuning-cpp-tflite'] - if 'intel' in compute_tags: - inp['implementation']['default'] = 'intel' - else: - inp['implementation']['default'] = 'mlcommons-python' - inp['backend']['choices'] = [ - 'onnxruntime', 'deepsparse', 'pytorch', 'tf', 'tvm-onnx'] - inp['backend']['default'] = 'onnxruntime' - elif device == 'rocm': - inp['implementation']['force'] = 'mlcommons-python' - inp['precision']['force'] = '' - inp['backend']['force'] = 'onnxruntime' - st.markdown( - '*WARNING: CM-MLPerf inference workflow was not tested thoroughly for AMD GPU - please feel free to test and improve!*') - elif device == 'qaic': - inp['implementation']['force'] = 'qualcomm' - inp['precision']['force'] = '' - inp['backend']['force'] = 'glow' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_division', - 'desc': inp['division']}) - division = r.get('value2') - inp['division']['force'] = division - - y = 'compliance' - if division == 'closed': - inp[y]['default'] = 'yes' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_compliance', - 'desc': inp[y]}) - compliance = r.get('value2') - inp[y]['force'] = compliance - - if compliance == 'yes': - st.markdown( - '*:red[See [online table with required compliance tests](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#5132-inference)].*') - - else: - inp[y]['force'] = 'no' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_category', - 'desc': inp['category']}) - category = r.get('value2') - inp['category']['force'] = category - - ########################################################################## - # Implementation - v = bench_input.get('mlperf_inference_implementation') - if v is not None and v != '': - inp['implementation']['force'] = v - else: - if device == 'cuda': - inp['implementation']['choices'] = [ - 'nvidia', 'mlcommons-python', 'mlcommons-cpp'] - inp['implementation']['default'] = 'nvidia' - inp['backend']['choices'] = ['tensorrt', 'onnxruntime', 'pytorch'] - inp['backend']['default'] = 'tensorrt' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_implementation', - 'desc': inp['implementation']}) - implementation = r.get('value2') - inp['implementation']['force'] = implementation - - implementation_setup = '' - r = load_md(script_path, 'setup', 'i-' + implementation) - if r['return'] == 0: - implementation_setup = r['string'] - - url_faq_implementation = '' - r = get_url(script_url, script_path, 'faq', implementation, 'FAQ online') - if r['return'] == 0: - url_faq_implementation = r['url_online'] - - can_have_docker_flag = False - - if implementation == 'mlcommons-cpp': - # inp['backend']['choices'] = ['onnxruntime'] - inp['precision']['force'] = 'float32' - inp['backend']['force'] = 'onnxruntime' - inp['model']['choices'] = ['resnet50', 'retinanet'] - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp)]*') - elif implementation == 'mlcommons-python': - inp['precision']['force'] = 'float32' - if device == 'cuda': - inp['backend']['choices'] = ['onnxruntime', 'pytorch', 'tf'] - inp['backend']['default'] = 'onnxruntime' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-python)]*') - elif implementation == 'ctuning-cpp-tflite': - inp['precision']['force'] = 'float32' - inp['model']['force'] = 'resnet50' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-ctuning-cpp-tflite)]*') - elif implementation == 'nvidia': - inp['backend']['force'] = 'tensorrt' - extra['skip_script_docker_func'] = True - can_have_docker_flag = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-nvidia)]*') - elif implementation == 'intel': - inp['model']['choices'] = ['bert-99', 'gptj-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['int8', 'int4'] - inp['precision']['default'] = 'int8' - inp['category']['force'] = 'datacenter' - inp['backend']['force'] = 'pytorch' - inp['sut']['default'] = 'sapphire-rapids.112c' - can_have_docker_flag = True - extra['skip_script_docker_func'] = True -# st.markdown('*:red[Note: Intel implementation require extra CM command to build and run Docker container - you will run CM commands to run MLPerf benchmarks there!]*') - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-intel)]*') - elif implementation == 'qualcomm': - inp['model']['choices'] = ['resnet50', 'retinanet', 'bert-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['default'] = 'float16' - extra['skip_script_docker_func'] = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-qualcomm)]*') - - ########################################################################## - # Backend - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_backend', - 'desc': inp['backend']}) - backend = r.get('value2') - inp['backend']['force'] = backend - - backend_setup = '' - r = load_md(script_path, 'setup', 'b-' + backend) - if r['return'] == 0: - backend_setup = r['string'] - - if backend == 'deepsparse': - inp['model']['choices'] = [ - 'resnet50', 'retinanet', 'bert-99', 'bert-99.9'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['float32', 'int8'] - inp['precision']['default'] = 'int8' - if 'force' in inp['precision']: - del (inp['precision']['force']) - - ########################################################################## - # Model - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_model', - 'desc': inp['model']}) - model = r.get('value2') - inp['model']['force'] = model - - github_doc_model = '' - - if model == 'retinanet': - x = '50' - if implementation == 'mlcommons-python': - x = '200' - st.markdown( - ':red[This model requires ~{}GB of free disk space for preprocessed dataset in a full/submission run!]\n'.format(x)) - - elif model.startswith('bert-'): - github_doc_model = 'bert' - - elif model.startswith('3d-unet-'): - github_doc_model = '3d-unet' - - elif model == 'rnnt': - github_doc_model = 'rnnt' - - elif model.startswith('dlrm-v2-'): - github_doc_model = 'dlrm_v2' - - elif model.startswith('gptj-'): - github_doc_model = 'gpt-j' - - elif model == 'sdxl': - github_doc_model = 'stable-diffusion-xl' - - elif model.startswith('llama2-'): - github_doc_model = 'llama2-70b' - - elif model.startswith('mixtral-'): - github_doc_model = 'mixtral-8x7b' - - if github_doc_model == '': - github_doc_model = model - - model_cm_url = 'https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference/{}'.format( - github_doc_model) - extra_notes_online = '[Extra notes online]({})\n'.format(model_cm_url) - - st.markdown( - '*[CM-MLPerf GitHub docs for this model]({})*'.format(model_cm_url)) - - ########################################################################## - # Precision - if implementation == 'intel': - if model == 'bert-99': - inp['precision']['force'] = 'int8' - elif model == 'gptj-99': - inp['precision']['force'] = 'int4' - elif implementation == 'qualcomm': - if model == 'resnet50': - inp['precision']['print'] = 'int8' - elif model == 'retinanet': - inp['precision']['print'] = 'int8' - elif model == 'bert-99': - inp['precision']['print'] = 'int8/float16' - - if inp['precision'].get('force', '') == '': - x = inp['precision'].get('print', '') - if x != '': - st.markdown('**{}**: {}'.format(inp['precision']['desc'], x)) - else: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_precision', - 'desc': inp['precision']}) - precision = r.get('value2') - inp['precision']['force'] = precision - - ########################################################################## - # Benchmark version - - script_meta_variations = script_meta['variations'] - - choices = [''] + [ - k for k in script_meta_variations if script_meta_variations[k].get( - 'group', '') == 'benchmark-version'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Force specific benchmark version?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_version', - 'desc': desc}) - benchmark_version = r.get('value2') - - if benchmark_version != '': - params['~~benchmark-version'] = [benchmark_version] - - ########################################################################## - # Run via Docker container - if can_have_docker_flag: - - default_choice = 'yes - run in container' - - choices = [default_choice, 'no - run natively'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Should CM script prepare and run Docker container in interactive mode to run MLPerf? You can then copy/paste CM commands generated by this GUI to benchmark different models.'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_docker', - 'desc': desc}) - benchmark_docker = r.get('value2') - - if benchmark_docker == 'yes - run in container': - add_to_st_inputs['@docker'] = True - add_to_st_inputs['@docker_cache'] = 'no' - - ########################################################################## - # Prepare submission - st.markdown('---') - - submission = st.toggle( - 'Would you like to prepare official submission?', - value=False) - if submission: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_hw_name', - 'desc': inp['hw_name']}) - inp['hw_name']['force'] = r.get('value2') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_submitter', - 'desc': inp['submitter']}) - submitter = r.get('value2') - inp['submitter']['force'] = submitter - - params['~~submission-generation'] = ['submission'] - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - inp['clean']['default'] = False - inp['repro']['force'] = True - - x = '*:red[Use the following command to find local directory with the submission tree and results:]*\n```bash\ncm find cache --tags=submission,dir\n```\n' - - x += '*:red[You will also find results in `mlperf-inference-submission.tar.gz` file that you can submit to MLPerf!]*\n\n' - - x += '*:red[Note that if some results are INVALID due to too short run, you can rerun the same CM command and it should increase the length of the benchmark until you get valid result!]*\n' - - st.markdown(x) - - st.markdown('---') - - else: - inp['submitter']['force'] = '' - inp['clean']['default'] = True - params['~submission'] = ['false'] - - choices = [ - 'Performance', - 'Accuracy', - 'Find Performance from a short run', - 'Performance and Accuracy'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'What to measure?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_measure', - 'desc': desc}) - measure = r.get('value2') - - x = '' - if measure == 'Performance': - x = 'performance-only' - elif measure == 'Accuracy': - x = 'accuracy-only' - elif measure == 'Find Performance from a short run': - x = 'find-performance' - elif measure == 'Performance and Accuracy': - x = 'submission' - - params['~~submission-generation'] = [x] - - ####################################################################### - # Prepare scenario - - xall = 'All applicable' - choices = ['Offline', 'Server', 'SingleStream', 'MultiStream', xall] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Which scenario(s)?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_scenario', - 'desc': desc}) - scenario = r.get('value2') - - if scenario == xall: - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - else: - inp['scenario']['force'] = scenario - - ########################################################################## - # Short or full run - - x = ['Full run', 'Short run'] - if submission: - choices = [x[0], x[1]] - else: - choices = [x[1], x[0]] - - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Short (test) or full (valid) run?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_how', - 'desc': desc}) - how = r.get('value2') - - if how == x[0]: - params['~~submission-generation-style'] = ['full'] - inp['execution_mode']['force'] = 'valid' - else: - params['~~submission-generation-style'] = ['short'] - inp['execution_mode']['force'] = 'test' - - ########################################################################## - # Power - -# desc = {'boolean':True, 'default':False, 'desc':'Measure power?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_power', 'desc':desc}) -# power = r.get('value2', False) - - power = st.toggle('Measure power consumption?', value=False) - - if power: - inp['power']['force'] = 'yes' - - y = 'adr.mlperf-power-client.power_server' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_server', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'adr.mlperf-power-client.port' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_port', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - st.markdown( - '*:red[See [online notes](https://github.com/mlcommons/ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md)] to setup power meter and server.*') - - else: - inp['power']['force'] = 'no' - inp['adr.mlperf-power-client.power_server']['force'] = '' - inp['adr.mlperf-power-client.port']['force'] = '' - - ########################################################################## - # Dashboard - -# desc = {'boolean':True, 'default':False, 'desc':'Output results to W&B dashboard?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_dashboard', 'desc':desc}) -# dashboard = r.get('value2', False) - - dashboard = st.toggle('Output results to W&B dashboard?', value=False) - - if dashboard: - params['~dashboard'] = ['true'] - - y = 'dashboard_wb_project' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_project', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'dashboard_wb_user' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_user', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - else: - params['~dashboard'] = ['false'] - inp['dashboard_wb_project']['force'] = '' - inp['dashboard_wb_user']['force'] = '' - - # Hide customization by default - params['hide_script_customization'] = True - - x = implementation_setup - if backend_setup != '': - if x != '': - x += '\n\n' - x += backend_setup - - extra['extra_notes_online'] = extra_notes_online - extra['extra_faq_online'] = url_faq_implementation - extra['extra_setup'] = x - - ########################################################################## - value_reproduce = inp.get('repro', {}).get('force', False) - reproduce = st.toggle( - 'Record extra info for reproducibility?', - value=value_reproduce) - - explore = st.toggle( - 'Explore/tune benchmark (batch size, threads, etc)?', - value=False) - - if reproduce or explore: - add_to_st_inputs.update({ - "@repro_extra.run-mlperf-inference-app.bench_uid": bench_uid, - "@repro_extra.run-mlperf-inference-app.compute_uid": compute_uid, - '@results_dir': '{{CM_EXPERIMENT_PATH3}}', - '@submission_dir': '{{CM_EXPERIMENT_PATH3}}' - }) - - inp['repro']['force'] = True - extra['use_experiment'] = True - - if explore: - add_to_st_inputs['@batch_size'] = '{{CM_EXPLORE_BATCH_SIZE{[1,2,4,8]}}}' - - ########################################################################## - debug = st.toggle( - 'Debug and run MLPerf benchmark natively from command line after CM auto-generates CMD?', - value=False) - if debug: - inp['debug']['force'] = True - - extra['add_to_st_inputs'] = add_to_st_inputs - - return {'return': 0, 'end_html': end_html, 'extra': extra} From ccb1cefaa86c76cf328dc36bb166bfd12975b36e Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:38:34 +0530 Subject: [PATCH 07/63] run on pull request --- .github/workflows/check-broken-links.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index af257ebf4..5b763f57e 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -1,9 +1,9 @@ name: "Check .md README files for broken links" on: - push: + pull_request: branches: - - master + - dev jobs: markdown-link-check: From 036b4e97caaf2b00f232bedae6894b81c6ce1f08 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Thu, 12 Dec 2024 18:05:03 +0530 Subject: [PATCH 08/63] change base branch to dev --- .github/workflows/check-broken-links.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index 5b763f57e..aaaf83d14 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -18,3 +18,4 @@ jobs: with: use-quiet-mode: 'yes' check-modified-files-only: 'yes' + base-branch: dev From 006f23fcf006ae3b4a2acf2d26ee105d1c2d674e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 16:36:08 +0000 Subject: [PATCH 09/63] Cleanup for mlperf-inference-rgat --- .../_cm.yaml | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index cd0e9fa6d..b13153227 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1237,32 +1237,31 @@ variations: - tags: get,generic-python-lib,_package.numpy - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - rgat,cuda: - deps: - - tags: get,generic-python-lib,_package.dgl - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-geometric + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-scatter + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-sparse - - tags: get,generic-python-lib,_package.torch-geometric - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL + - tags: get,generic-python-lib,_package.dgl #,_url.git+https://github.com/dmlc/dgl.git + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL + + rgat,cuda: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" rgat,cpu: - deps: - - tags: get,generic-python-lib,_package.torch-geometric - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.torch-scatter - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.torch-sparse - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.dgl - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: From 8e896ed6822c3b54e197db2c395ae41348dc5538 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 16:49:37 +0000 Subject: [PATCH 10/63] Fix torch and numpy version for mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b13153227..b61015505 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1226,6 +1226,9 @@ variations: group: models env: CM_MODEL: rgat + adr: + pytorch: + version: "2.1.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm @@ -1235,6 +1238,8 @@ variations: - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - tags: get,generic-python-lib,_package.torch-geometric From 1717a569bc42a01a900683db37970fa346583a45 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:39:38 +0000 Subject: [PATCH 11/63] Support pytorch 2.4 for app-mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 10 ++++++---- .../app-mlperf-inference-mlcommons-python/customize.py | 5 ++++- script/get-generic-python-lib/customize.py | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b61015505..15ade7a55 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1226,9 +1226,10 @@ variations: group: models env: CM_MODEL: rgat - adr: + add_deps_recursive: pytorch: - version: "2.1.0" + version_max: "2.4.1" + version_max_usable: "2.4.1" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm @@ -1254,7 +1255,7 @@ variations: update_tags_from_env_with_prefix: _find_links_url.: - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - - tags: get,generic-python-lib,_package.dgl #,_url.git+https://github.com/dmlc/dgl.git + - tags: get,generic-python-lib,_package.dgl update_tags_from_env_with_prefix: _find_links_url.: - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL @@ -1262,11 +1263,12 @@ variations: rgat,cuda: env: CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" rgat,cpu: env: CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/repo.html" # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 93b62873b..196bf38dc 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -388,7 +388,10 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f" --api-server { + env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path { + env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name { + env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" diff --git a/script/get-generic-python-lib/customize.py b/script/get-generic-python-lib/customize.py index 6421a22a1..57f25127f 100644 --- a/script/get-generic-python-lib/customize.py +++ b/script/get-generic-python-lib/customize.py @@ -189,4 +189,8 @@ def postprocess(i): if pip_version and len(pip_version) > 1 and int(pip_version[0]) >= 23: env['CM_PYTHON_PIP_COMMON_EXTRA'] = " --break-system-packages" + if version.count('.') > 1: + env[f"{env_version_key}_MAJOR_MINOR"] = ".".join( + version.split(".")[:2]) + return {'return': 0, 'version': version} From 17d7c0898fcf1eea4a9ad94aedb579999f85004a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:55:05 +0000 Subject: [PATCH 12/63] Support igbh dataset from host --- script/app-mlperf-inference/_cm.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index eb536e8e5..c57003c10 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -781,6 +781,15 @@ variations: - mlperf-accuracy-script - 3d-unet-accuracy-script tags: run,accuracy,mlperf,_igbh + docker: + deps: + - tags: get,dataset,igbh + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - igbh-original + - igbh-dataset sdxl: group: From 58b3bfb38654d315697b9d6d424188087650fd3d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:22:00 +0000 Subject: [PATCH 13/63] Fix fstring formatting in app-mlperf-inference-mlcommons-python --- script/app-mlperf-inference-mlcommons-python/customize.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 196bf38dc..1a2d3b023 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -388,10 +388,9 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server { - env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path { - env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name { - env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f""" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} \ + --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} \ + --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm """ else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" From c93b9c26c0b601742e7b675b6d5f76dc70d949fd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:39:42 +0000 Subject: [PATCH 14/63] Fix use_dataset_from_host for igbh --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 15ade7a55..abcb1f293 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -619,9 +619,14 @@ deps: enable_if_env: CM_MODEL: - rgat - skip_if_env: + skip_if_any_env: CM_DATASET_IGBH_PATH: - "on" + skip_if_env: + CM_RUN_STATE_DOCKER: + - 'yes' + CM_USE_DATASET_FROM_HOST: + - 'yes' ######################################################################## # Install MLPerf inference dependencies From 64f69e6429229f81a50313b4eca12e794f7c34c7 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:43:37 +0000 Subject: [PATCH 15/63] Remove torchvision deps for mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index abcb1f293..76f534641 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1240,7 +1240,6 @@ variations: - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - - tags: get,generic-python-lib,_package.torchvision - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy From c1e00cc7beccb509e46a7f0c274eaa4147c2af98 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:07:10 +0000 Subject: [PATCH 16/63] Remove torchvision deps for mlperf inference rgat cuda --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 76f534641..89646244b 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -222,6 +222,7 @@ deps: CM_MODEL: - dlrm-v2-99 - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - pytorch @@ -234,6 +235,11 @@ deps: names: - ml-engine-torchvision - torchvision + skip_if_env: + CM_MODEL: + - dlrm-v2-99 + - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - pytorch From 4ef87fab0d33303ca0824027215f1789eaf3496c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:31:24 +0000 Subject: [PATCH 17/63] Create test-mlperf-inference-rgat.yml --- .../workflows/test-mlperf-inference-rgat.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/test-mlperf-inference-rgat.yml diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml new file mode 100644 index 000000000..de5b0fbb6 --- /dev/null +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -0,0 +1,48 @@ +name: MLPerf inference rgat + +on: + pull_request_target: + branches: [ "main", "dev" ] + paths: + - '.github/workflows/test-mlperf-inference-rgat.yml' + - '**' + - '!**.md' + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: [ "3.12" ] + backend: [ "pytorch" ] + implementation: [ "python" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} + run: | + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + - name: Push Results + if: github.repository_owner == 'gateoverflow' + env: + USER: "GitHub Action" + EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} + run: | + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" + git config --global credential.https://github.com.helper "" + git config --global credential.https://github.com.helper "!gh auth git-credential" + git config --global credential.https://gist.github.com.helper "" + git config --global credential.https://gist.github.com.helper "!gh auth git-credential" + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet From 31c3143f795edcde98f83f08ace2bac04ad5e350 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:49:17 +0000 Subject: [PATCH 18/63] Fix default cm-repo-branch for build-dockerfile --- script/build-dockerfile/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/build-dockerfile/_cm.yaml b/script/build-dockerfile/_cm.yaml index 7535311ea..f54d3a216 100644 --- a/script/build-dockerfile/_cm.yaml +++ b/script/build-dockerfile/_cm.yaml @@ -19,7 +19,7 @@ default_env: ' CM_DOCKER_OS: ubuntu CM_DOCKER_NOT_PULL_UPDATE: False - CM_MLOPS_REPO_BRANCH: mlperf-inference + CM_MLOPS_REPO_BRANCH: dev input_mapping: build: CM_BUILD_DOCKER_IMAGE From b899c2019a6fe4b17d81d6cf0b54cd6403358874 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 13 Dec 2024 12:07:41 +0530 Subject: [PATCH 19/63] capture docker tool --- script/get-docker/_cm.yaml | 3 ++- script/get-docker/customize.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/script/get-docker/_cm.yaml b/script/get-docker/_cm.yaml index e2f33e875..5e652cbe4 100644 --- a/script/get-docker/_cm.yaml +++ b/script/get-docker/_cm.yaml @@ -9,7 +9,8 @@ docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ - "CM_DOCKER_VERSION" + "CM_DOCKER_VERSION", + "CM_DOCKER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 0d41346b4..ca21e7a7a 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -47,8 +47,14 @@ def detect_version(i): version = r['version'] + tool = "docker" + + if "podman" in r['string'].lower(): + tool = "podman" + + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version} + return {'return': 0, 'version': version, "tool":tool} def postprocess(i): @@ -60,6 +66,7 @@ def postprocess(i): return r version = r['version'] + tool = r['tool'] found_file_path = env['CM_DOCKER_BIN_WITH_PATH'] found_path = os.path.dirname(found_file_path) @@ -69,5 +76,7 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version env['CM_DOCKER_VERSION'] = version + + env['CM_DOCKER_TOOL'] = tool return {'return': 0, 'version': version} From 71fd59a0a9e6f2ed5bd030a8ed9b08a92cbf7825 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 13 Dec 2024 12:10:09 +0530 Subject: [PATCH 20/63] docker tool -> container tool --- script/get-docker/_cm.yaml | 2 +- script/get-docker/customize.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script/get-docker/_cm.yaml b/script/get-docker/_cm.yaml index 5e652cbe4..881039852 100644 --- a/script/get-docker/_cm.yaml +++ b/script/get-docker/_cm.yaml @@ -10,7 +10,7 @@ input_description: {} input_mapping: {} new_env_keys: [ "CM_DOCKER_VERSION", - "CM_DOCKER_TOOL" + "CM_CONTAINER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index ca21e7a7a..30ccf627d 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -77,6 +77,6 @@ def postprocess(i): env['CM_DOCKER_VERSION'] = version - env['CM_DOCKER_TOOL'] = tool + env['CM_CONTAINER_TOOL'] = tool return {'return': 0, 'version': version} From 216081dcd43ded15583f6bb70d366170ec936e3b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 17:41:48 +0530 Subject: [PATCH 21/63] [Automated Commit] Format Codebase (#51) * Fixes for rgat submission generation --- .../workflows/test-mlperf-inference-rgat.yml | 2 +- script/get-docker/customize.py | 5 ++- script/get-ml-model-rgat/_cm.yaml | 1 + script/process-mlperf-accuracy/customize.py | 34 ++++++++++++++----- script/run-mlperf-inference-app/_cm.yaml | 2 ++ 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index de5b0fbb6..478e666c6 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 30ccf627d..fbf0a5bbf 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -52,9 +52,8 @@ def detect_version(i): if "podman" in r['string'].lower(): tool = "podman" - print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version, "tool":tool} + return {'return': 0, 'version': version, "tool": tool} def postprocess(i): @@ -76,7 +75,7 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version env['CM_DOCKER_VERSION'] = version - + env['CM_CONTAINER_TOOL'] = tool return {'return': 0, 'version': version} diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index d7615acd2..c4ecc56e0 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -63,4 +63,5 @@ variations: group: download-tool rclone,fp32: env: + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index f1d8b7874..21569fd57 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -118,18 +118,30 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += ( + f""" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}'""" + ) if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += ( + f""" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' """ + ) else: - extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + extra_options += f""" --compliance-images-path '{ + os.path.join( + result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += ( + f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + ) if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += ( + f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + ) # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -162,9 +174,15 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += ( + f""" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' """ + ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += ( + f""" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' """ + ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ @@ -179,7 +197,7 @@ def preprocess(i): else: env['CM_DATASET_IGBH_SIZE'] = "tiny" CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( - result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'" else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 689aaabc3..12b57a6f7 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -344,6 +344,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '4.1' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v4.1 adr: get-mlperf-inference-results-dir: tags: _version.r4_1 @@ -357,6 +358,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 group: benchmark-version adr: get-mlperf-inference-results-dir: From 9136723ebe0ab559d4cf53bb0a624ccb3f3bf5e3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 13:00:46 +0000 Subject: [PATCH 22/63] Update test-mlperf-inference-rgat.yml --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 478e666c6..87a9314ca 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --adr.mlperf-implementation.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: From edcf36c05690e92eec0187028c4fa079f4b64f9c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 19:48:23 +0530 Subject: [PATCH 23/63] Test (#52) * Make r5.0-dev the default version for mlperf-inference --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 1 + script/run-mlperf-inference-app/_cm.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 796d5674e..eac7179b7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -28,6 +28,7 @@ deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm + prehook_deps: #paper - env: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 12b57a6f7..8fe9b88d1 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -327,7 +327,6 @@ variations: tags: _version.r4_0-dev r4.1-dev: - default: true env: CM_MLPERF_INFERENCE_VERSION: '4.1-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default @@ -355,6 +354,7 @@ variations: group: benchmark-version r5.0-dev: + default: true env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default From a1b8a48ea7316643dd8bce02503477756a193655 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 06:58:59 +0530 Subject: [PATCH 24/63] Test (#53) * Fixes for rgat submission generation --- .github/workflows/test-mlperf-inference-rgat.yml | 4 ++-- script/app-image-classification-onnx-py/_cm.yaml | 1 - .../app-mlperf-inference-mlcommons-python/_cm.yaml | 4 ++-- .../customize.py | 12 ++++++++++-- script/get-mlperf-inference-src/_cm.yaml | 4 ++-- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 87a9314ca..03941023e 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -1,7 +1,7 @@ name: MLPerf inference rgat on: - pull_request_target: + pull_request: branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-rgat.yml' @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --adr.mlperf-implementation.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/script/app-image-classification-onnx-py/_cm.yaml b/script/app-image-classification-onnx-py/_cm.yaml index 740a8a18a..e53b91ec2 100644 --- a/script/app-image-classification-onnx-py/_cm.yaml +++ b/script/app-image-classification-onnx-py/_cm.yaml @@ -22,7 +22,6 @@ default_env: deps: - tags: detect,os -#- tags: get,sys-utils-cm - names: - python - python3 diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 89646244b..45401431f 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1239,8 +1239,8 @@ variations: CM_MODEL: rgat add_deps_recursive: pytorch: - version_max: "2.4.1" - version_max_usable: "2.4.1" + version_max: "2.4.0" + version_max_usable: "2.4.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 040e7cb09..a27086237 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -160,11 +160,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{system_meta_tmp['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + sw_notes = f"""{ + system_meta_tmp['sw_notes']} { + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}""" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{system_meta_tmp['hw_notes']} {env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + hw_notes = f"""{ + system_meta_tmp['hw_notes']} { + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}""" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) @@ -297,6 +301,10 @@ def generate_submission(env, state, inp, submission_division): system_path = os.path.join(path_submission, "systems") submission_system_path = system_path + if not os.path.isdir(submission_path): + os.makedirs(submission_path) + if not os.path.isdir(measurement_path): + os.makedirs(measurement_path) if not os.path.isdir(submission_system_path): os.makedirs(submission_system_path) system_file = os.path.join(submission_system_path, sub_res + ".json") diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index c100e32e8..b8bd39092 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -134,10 +134,10 @@ variations: versions: custom: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 deepsparse: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: deepsparse CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: From 48f7a91172035ec5ccf6fc0986e9009bc075b2d6 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 01:45:13 +0000 Subject: [PATCH 25/63] Update VERSION | rgat-fixes --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 45a346dba..592e815ea 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.11 +0.6.12 From 3d9715f2d7e3cbe111ab1f64900c01997ff63f08 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 14 Dec 2024 01:45:25 +0000 Subject: [PATCH 26/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 252b8a36b..89f5a3e18 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -f5e04069c8d7395be34f94fa8a94edc6c317b58e +48f7a91172035ec5ccf6fc0986e9009bc075b2d6 From 90a4412f05a3c29a536dad41e9c1ee43da0f9514 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 10:34:46 +0000 Subject: [PATCH 27/63] Update MLPerf automation repo in github actions (#54) --- .../test-amd-mlperf-inference-implementations.yml | 4 ++-- ...erence-bert-deepsparse-tf-onnxruntime-pytorch.yml | 2 +- .github/workflows/test-mlperf-inference-dlrm.yml | 4 ++-- .github/workflows/test-mlperf-inference-gptj.yml | 4 ++-- .github/workflows/test-mlperf-inference-llama2.yml | 8 ++++---- .github/workflows/test-mlperf-inference-mixtral.yml | 4 ++-- .../test-mlperf-inference-mlcommons-cpp-resnet50.yml | 2 +- .github/workflows/test-mlperf-inference-resnet50.yml | 2 +- .../workflows/test-mlperf-inference-retinanet.yml | 2 +- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- .github/workflows/test-mlperf-inference-sdxl.yaml | 4 ++-- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 12 +++++++----- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 2 +- script/get-gh-actions-runner/_cm.yaml | 7 +++++++ 15 files changed, 35 insertions(+), 26 deletions(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index b635d266e..2e140c32e 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes - # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c + cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes + # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=dev --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 3594aaf86..9aa9b8293 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -60,4 +60,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 6440d0448..4d7727457 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' @@ -45,4 +45,4 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 0562b9176..db0ed5923 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -26,6 +26,6 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index ab1c9bb48..6a8c1adb8 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: MLPerf inference LLAMA 2 70B +name: MLPerf inference LLAMA2-70B on: schedule: @@ -20,7 +20,7 @@ jobs: precision: [ "bfloat16" ] steps: - - name: Test MLPerf Inference LLAMA 2 70B reference implementation + - name: Test MLPerf Inference LLAMA2-70B reference implementation run: | source gh_action/bin/deactivate || python3 -m venv gh_action source gh_action/bin/activate @@ -31,5 +31,5 @@ jobs: pip install "huggingface_hub[cli]" git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 0a6a37708..b29341119 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -30,5 +30,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index ff856ad54..72b0d1fe3 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -59,4 +59,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 4388e4bb9..54cb7c91c 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index eac9346fe..a319f6772 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 03941023e..259edab72 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -45,4 +45,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index aea41cee2..03ae46d4e 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -21,5 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 2974e651c..449fd033b 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - system: [ "GO-spr", "phoenix", "i9" ] + system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] python-version: [ "3.12" ] model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ] exclude: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 673dba804..fec9d7dbe 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -9,7 +9,7 @@ jobs: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr ] env: - CM_REPOS: $HOME/GH_CM + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -27,14 +27,16 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions build_nvidia: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr] + env: + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -52,7 +54,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index eac7179b7..eacd5be5c 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -34,7 +34,7 @@ prehook_deps: - env: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 - CM_DOWNLOAD_FILENAME: node_feet.npy + CM_DOWNLOAD_FILENAME: node_feat.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat force_cache: true diff --git a/script/get-gh-actions-runner/_cm.yaml b/script/get-gh-actions-runner/_cm.yaml index 3008f6365..287ee254a 100644 --- a/script/get-gh-actions-runner/_cm.yaml +++ b/script/get-gh-actions-runner/_cm.yaml @@ -6,6 +6,7 @@ can_force_cache: true tags: - get - gh +- github - actions-runner - runner-code - runner @@ -29,21 +30,27 @@ deps: variations: config: + group: command + default: true env: CM_GH_ACTIONS_RUNNER_COMMAND: config remove: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: remove install: + group: command deps: - tags: get,gh,actions-runner,_config force_cache: yes env: CM_GH_ACTIONS_RUNNER_COMMAND: install uninstall: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: uninstall start: + group: command deps: - tags: get,gh,actions-runner,_install force_cache: yes From af15e72948fb4677118a4ae562bc4f37e6e33909 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 19:49:55 +0000 Subject: [PATCH 28/63] Support nvmitten for aarch64 (#55) * Support nvmitten for aarch64 --- script/app-mlperf-inference/_cm.yaml | 8 +++++--- setup.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index c57003c10..f3ef84523 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -338,12 +338,16 @@ variations: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - skip_if_env: CM_HOST_PLATFORM_FLAVOR: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-aarch64-GraceHopper-release + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' @@ -1627,7 +1631,7 @@ variations: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' #uses public code for inference v4.1 @@ -1646,8 +1650,6 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' - env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' r4.1_default: group: diff --git a/setup.py b/setup.py index a6d5a73f7..8cc2aec00 100644 --- a/setup.py +++ b/setup.py @@ -145,11 +145,12 @@ def custom_function(self): 'artifact': 'mlcommons@cm4mlops', 'force': True, 'all': True}) + branch = os.environ.get('CM_MLOPS_REPO_BRANCH', 'dev') r = cmind.access({'action': 'pull', 'automation': 'repo', 'artifact': 'mlcommons@mlperf-automations', - 'checkout': commit_hash}) - # r = cmind.access({'action':'pull', 'automation':'repo', 'artifact':'mlcommons@mlperf-automations', 'checkout': commit_hash}) + 'checkout': commit_hash, + 'branch': branch}) print(r) if r['return'] > 0: return r['return'] From 8b92713befedfe333570533bc8ca2e3a7bce4085 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 19 Dec 2024 19:50:13 +0000 Subject: [PATCH 29/63] Increment version to 0.6.13 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 592e815ea..e196726d2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.12 +0.6.13 From b3a34ec085e391351d70a6f663dd0c95b0423e45 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 19 Dec 2024 19:50:16 +0000 Subject: [PATCH 30/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 89f5a3e18..de1c6db94 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -48f7a91172035ec5ccf6fc0986e9009bc075b2d6 +8b92713befedfe333570533bc8ca2e3a7bce4085 From 3f25d3c211771d6f557e67cfc4c8d9fdcb2c0106 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:06:24 +0000 Subject: [PATCH 31/63] Copy bert model for nvidia-mlperf-inference implementation instead of softlink (#56) * Copy bert model for nvidia mlperf inference implementation instead of softlink --- script/app-mlperf-inference-nvidia/customize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index a09fd9715..36324cd0c 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -87,13 +87,13 @@ def preprocess(i): if not os.path.exists(fp32_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") if not os.path.exists(int8_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") if not os.path.exists(vocab_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"cp -r {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") model_name = "bert" model_path = fp32_model_path From a09686d660338aaecb77b78881a7e4b25ff5a5ca Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:07:40 +0000 Subject: [PATCH 32/63] Update version (#57) * Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e196726d2..fcbaa8478 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.13 +0.6.14 From e6ad511b0535786251b4f30c71c4fb1e5511dcf9 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Fri, 20 Dec 2024 09:07:56 +0000 Subject: [PATCH 33/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index de1c6db94..2d3c8a628 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -8b92713befedfe333570533bc8ca2e3a7bce4085 +a09686d660338aaecb77b78881a7e4b25ff5a5ca From f399c2cd7db8406740274e9b7d2898f8ddb229e9 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 19:28:55 +0000 Subject: [PATCH 34/63] Update github actions - use master branch of inference repository (#58) * Use master branch of inference repo in github action --- .github/workflows/test-cm-based-submission-generation.yml | 2 +- .github/workflows/test-mlperf-inference-dlrm.yml | 2 +- .github/workflows/test-mlperf-inference-gptj.yml | 2 +- .github/workflows/test-mlperf-inference-llama2.yml | 2 +- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- .github/workflows/test-mlperf-inference-resnet50.yml | 4 ++-- .github/workflows/test-mlperf-inference-retinanet.yml | 4 ++-- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- .github/workflows/test-mlperf-inference-sdxl.yaml | 2 +- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index d62f8e59b..22f8914d6 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -80,7 +80,7 @@ jobs: fi # Dynamically set the log group to simulate a dynamic step name echo "::group::$description" - cm ${{ matrix.action }} script --tags=generate,inference,submission --adr.submission-checker-src.tags=_branch.dev --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args + cm ${{ matrix.action }} script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 4d7727457..3ed51759d 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index db0ed5923..6a1152893 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -26,6 +26,6 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --pull_changes=yes --pull_inference_changes=yes --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index 6a8c1adb8..184940330 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -31,5 +31,5 @@ jobs: pip install "huggingface_hub[cli]" git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index b29341119..597a4cdbc 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -30,5 +30,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 54cb7c91c..f4ed3f4f1 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -46,11 +46,11 @@ jobs: - name: Test MLPerf Inference ResNet50 (Windows) if: matrix.os == 'windows-latest' run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Test MLPerf Inference ResNet50 (Linux/macOS) if: matrix.os != 'windows-latest' run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index a319f6772..f392d2d7d 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -46,11 +46,11 @@ jobs: - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os == 'windows-latest' run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os != 'windows-latest' run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 259edab72..d2edb5855 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index 03ae46d4e..d1029e80d 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -21,5 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 449fd033b..370f1a74d 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -39,6 +39,6 @@ jobs: pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --adr.submission-checker-src.tags=_branch.dev --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index fec9d7dbe..6620c4913 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -27,7 +27,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions From d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 10:01:39 +0000 Subject: [PATCH 35/63] Migrate MLPerf inference unofficial results repo to MLCommons (#59) * Use mlcommons repo for uploading unofficial results of nvidia/intel github actions * Fix format * Fix rgat download path, added libbz2 deps for draw-graph-from-json * Support windows for pull-git-repo * Fix libbz2-dev detect * Added separate installation options for libbz2-dev and bzip2 * Restrict libbz2-dev install only for ubuntu (install-python-src) * Update VERSION --- ...intel-mlperf-inference-implementations.yml | 4 +- ...vidia-mlperf-inference-implementations.yml | 4 +- VERSION | 2 +- automation/script/module.py | 18 ++++++-- .../_cm.yaml | 42 ++++++++++++++++++- .../customize.py | 0 .../run.sh | 0 script/get-generic-sys-util/_cm.yaml | 20 +++++++-- script/get-ml-model-rgat/_cm.yaml | 13 +++--- script/get-ml-model-rgat/customize.py | 7 ++-- script/install-python-src/_cm.yaml | 3 ++ script/pull-git-repo/customize.py | 3 -- script/pull-git-repo/run.bat | 26 ++++++++++++ 13 files changed, 117 insertions(+), 25 deletions(-) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/_cm.yaml (94%) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/customize.py (100%) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/run.sh (100%) create mode 100644 script/pull-git-repo/run.bat diff --git a/.github/workflows/test-intel-mlperf-inference-implementations.yml b/.github/workflows/test-intel-mlperf-inference-implementations.yml index 9063af791..166a1a77c 100644 --- a/.github/workflows/test-intel-mlperf-inference-implementations.yml +++ b/.github/workflows/test-intel-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops pip install tabulate - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 370f1a74d..83e357613 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -39,6 +39,6 @@ jobs: pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name diff --git a/VERSION b/VERSION index fcbaa8478..6769f67e2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.14 +0.6.15 diff --git a/automation/script/module.py b/automation/script/module.py index f0347bbeb..fe0582648 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1798,9 +1798,16 @@ def _run(self, i): tmp_curdir = os.getcwd() if env.get('CM_OUTDIRNAME', '') != '': - if not os.path.exists(env['CM_OUTDIRNAME']): - os.makedirs(env['CM_OUTDIRNAME']) - os.chdir(env['CM_OUTDIRNAME']) + if os.path.isabs(env['CM_OUTDIRNAME']) or recursion: + c_outdirname = env['CM_OUTDIRNAME'] + else: + c_outdirname = os.path.join( + env['CM_TMP_CURRENT_PATH'], env['CM_OUTDIRNAME']) + env['CM_OUTDIRNAME'] = c_outdirname + + if not os.path.exists(c_outdirname): + os.makedirs(c_outdirname) + os.chdir(c_outdirname) # Check if pre-process and detect if 'preprocess' in dir(customize_code) and not fake_run: @@ -5860,7 +5867,10 @@ def convert_env_to_script(env, os_info, start_script=None): key = key[1:] # Append the existing environment variable to the new value - env_value = f"{env_separator.join(env_value)}{env_separator}{os_info['env_var'].replace('env_var', key)}" + env_value = f"""{ + env_separator.join(env_value)}{env_separator}{ + os_info['env_var'].replace( + 'env_var', key)}""" # Replace placeholders in the platform-specific environment command env_command = os_info['set_env'].replace( diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-igbh/_cm.yaml similarity index 94% rename from script/get-dataset-mlperf-inference-igbh/_cm.yaml rename to script/get-dataset-igbh/_cm.yaml index eacd5be5c..1b7c86ae7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-igbh/_cm.yaml @@ -1,4 +1,4 @@ -alias: get-dataset-mlperf-inference-igbh +alias: get-dataset-igbh automation_alias: script automation_uid: 5b4e0237da074764 cache: true @@ -37,6 +37,8 @@ prehook_deps: CM_DOWNLOAD_FILENAME: node_feat.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat + force_env_keys: + - CM_OUTDIRNAME force_cache: true enable_if_env: CM_DATASET_IGBH_TYPE: @@ -54,6 +56,8 @@ prehook_deps: CM_DOWNLOAD_FILENAME: node_label_19.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_19 + force_env_keys: + - CM_OUTDIRNAME force_cache: true enable_if_env: CM_DATASET_IGBH_TYPE: @@ -72,6 +76,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_2K force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -89,6 +95,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -107,6 +115,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -125,6 +135,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,author_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -142,6 +154,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -160,6 +174,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -177,6 +193,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -195,6 +213,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -212,6 +232,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -230,6 +252,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -247,6 +271,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -265,6 +291,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -282,6 +310,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -300,6 +330,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -318,6 +350,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ extra_cache_tags: dataset,igbh,paper_published_journal,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -336,6 +370,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -354,6 +390,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -372,6 +410,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-igbh/customize.py similarity index 100% rename from script/get-dataset-mlperf-inference-igbh/customize.py rename to script/get-dataset-igbh/customize.py diff --git a/script/get-dataset-mlperf-inference-igbh/run.sh b/script/get-dataset-igbh/run.sh similarity index 100% rename from script/get-dataset-mlperf-inference-igbh/run.sh rename to script/get-dataset-igbh/run.sh diff --git a/script/get-generic-sys-util/_cm.yaml b/script/get-generic-sys-util/_cm.yaml index 1d45c2c28..b75e24bbc 100644 --- a/script/get-generic-sys-util/_cm.yaml +++ b/script/get-generic-sys-util/_cm.yaml @@ -212,21 +212,33 @@ variations: brew: '' dnf: boost-devel yum: boost-devel - libbz2-dev: + bzip2: env: - CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_NAME: bzip2 CM_SYS_UTIL_VERSION_CMD_OVERRIDE: bzcat --version 2>&1 | grep bzip > tmp-ver.out CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) CM_TMP_VERSION_DETECT_GROUP_NUMBER: 1 new_env_keys: + - CM_BZIP2_VERSION + state: + bzip2: + apt: bzip2 + brew: bzip2 + dnf: bzip2 + yum: bzip2 + libbz2-dev: + env: + CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_VERSION_CMD: dpkg -s libbz2-dev | grep 'Version' + CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) + CM_TMP_VERSION_DETECT_GROUP_NUMBER: 0 + new_env_keys: - CM_LIBBZ2_DEV_VERSION state: libbz2_dev: apt: libbz2-dev - brew: bzip2 dnf: libbzip2-devel yum: libbzip2-devel - zlib-devel: libbz2-devel libev-dev: env: CM_SYS_UTIL_NAME: libev_dev diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index c4ecc56e0..bcec6df1c 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -21,12 +21,14 @@ prehook_deps: CM_TMP_REQUIRE_DOWNLOAD: - 'yes' env: - CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_PATH - extra_cache_tags: rgat,gnn,model + CM_DOWNLOAD_FINAL_ENV_NAME: RGAT_DIR_PATH + extra_cache_tags: rgat,gnn,model,ml-model force_cache: true names: - - dae - tags: download-and-extract + - download-file + tags: download,file + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -55,7 +57,7 @@ variations: group: download-source rclone: adr: - dae: + download-file: tags: _rclone env: CM_DOWNLOAD_TOOL: rclone @@ -65,3 +67,4 @@ variations: env: CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt + CM_DOWNLOAD_FILENAME: RGAT diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index df810a5ab..d920a8711 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -20,9 +20,10 @@ def postprocess(i): env = i['env'] if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': - env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( - env['CM_ML_MODEL_PATH'], "RGAT.pt") - elif env.get('CM_ML_MODEL_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env.get( + 'RGAT_CHECKPOINT_PATH', os.path.join(env['RGAT_DIR_PATH'], "RGAT.pt")) + + if env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] diff --git a/script/install-python-src/_cm.yaml b/script/install-python-src/_cm.yaml index c0a618346..5aeed2a6b 100644 --- a/script/install-python-src/_cm.yaml +++ b/script/install-python-src/_cm.yaml @@ -16,6 +16,9 @@ deps: - tags: detect,cpu - tags: get,generic-sys-util,_libffi-dev - tags: get,generic-sys-util,_libbz2-dev + enable_if_env: + CM_HOST_OS_FLAVOR: + - ubuntu - tags: get,generic-sys-util,_libssl-dev - enable_if_env: CM_HOST_OS_FLAVOR: diff --git a/script/pull-git-repo/customize.py b/script/pull-git-repo/customize.py index 55a581bb5..3dfd21e26 100644 --- a/script/pull-git-repo/customize.py +++ b/script/pull-git-repo/customize.py @@ -7,9 +7,6 @@ def preprocess(i): os_info = i['os_info'] - if os_info['platform'] == 'windows': - return {'return': 1, 'error': 'Windows is not supported in this script yet'} - env = i['env'] meta = i['meta'] diff --git a/script/pull-git-repo/run.bat b/script/pull-git-repo/run.bat new file mode 100644 index 000000000..8642fce0e --- /dev/null +++ b/script/pull-git-repo/run.bat @@ -0,0 +1,26 @@ +@echo off +setlocal enabledelayedexpansion + +REM Save the current directory +set "CUR_DIR=%CD%" +set "SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%" + +REM Change to the specified path +set "path=%CM_GIT_CHECKOUT_PATH%" +echo cd %path% + +cd /d "%path%" +if errorlevel 1 ( + echo Failed to change directory to %path% + exit /b %errorlevel% +) + +REM Execute the Git pull command +echo %CM_GIT_PULL_CMD% +call %CM_GIT_PULL_CMD% +REM Don't fail if there are local changes +REM if errorlevel 1 exit /b %errorlevel% + +REM Return to the original directory +cd /d "%CUR_DIR%" +endlocal From 2b1e23c9adc298d5bb6eeacdc567bda04ed14155 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 10:01:53 +0000 Subject: [PATCH 36/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 2d3c8a628..8db05eabf 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -a09686d660338aaecb77b78881a7e4b25ff5a5ca +d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 From 3439a72cdc9aaec2bdbaf81683eb4eeb63a0cdee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 10:15:53 +0000 Subject: [PATCH 37/63] Create reset-fork.yml --- .github/workflows/reset-fork.yml | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/reset-fork.yml diff --git a/.github/workflows/reset-fork.yml b/.github/workflows/reset-fork.yml new file mode 100644 index 000000000..396c56f48 --- /dev/null +++ b/.github/workflows/reset-fork.yml @@ -0,0 +1,42 @@ +name: Reset Current Branch to Upstream After Squash Merge + +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch to reset (leave blank for current branch)' + required: false + default: '' + +jobs: + reset-branch: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Detect Current Branch + if: ${{ inputs.branch == '' }} + run: echo "branch=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_ENV + + - name: Use Input Branch + if: ${{ inputs.branch != '' }} + run: echo "branch=${{ inputs.branch }}" >> $GITHUB_ENV + + - name: Add Upstream Remote + run: | + git remote add upstream https://github.com/mlcommons/mlperf-automations.git + git fetch upstream + - name: Reset Branch to Upstream + run: | + git checkout ${{ env.branch }} + git reset --hard upstream/${{ env.branch }} + if: success() + + - name: Force Push to Origin + run: | + git push origin ${{ env.branch }} --force-with-lease + if: success() From 5ddfc95490ded9d680e2d70fa2617a34b4f68e3c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:15:54 +0000 Subject: [PATCH 38/63] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e55cac00a..18cdea502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@084d4d6171a6e7ae9582a63777fbd19afa19947a#egg=cmind&subdirectory=cm"] +requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#egg=cmind&subdirectory=cm"] From f5eb712934119192e9b562cf6d06987c7ec8106a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:16:54 +0000 Subject: [PATCH 39/63] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 6769f67e2..c4c2d2b11 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.15 +0.6.16 From 17833dfe8f1c751ca25fa2d97e55bfc1b8c12366 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 12:17:06 +0000 Subject: [PATCH 40/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 8db05eabf..39ee5566a 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 +f5eb712934119192e9b562cf6d06987c7ec8106a From cfd76e1ca790263392fe4ee524bf987c880b685c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:48:12 +0000 Subject: [PATCH 41/63] Fix scc24 github action (#61) * Update test-scc24-sdxl.yaml * Fix scc24 github action --- .github/workflows/test-scc24-sdxl.yaml | 16 +++++++++------- script/app-mlperf-inference/_cm.yaml | 1 - 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 6620c4913..1dd73836d 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,14 +2,15 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "20 01 * * *" + - cron: "40 12 * * *" jobs: build_reference: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr ] env: - CM_DOCKER_REPO: mlperf-automations + CM_DOCKER_REPO: mlcommons@mlperf-automations + CM_DOCKER_REPO_BRANCH: dev strategy: fail-fast: false matrix: @@ -27,8 +28,8 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions @@ -36,7 +37,8 @@ jobs: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr] env: - CM_DOCKER_REPO: mlperf-automations + CM_DOCKER_REPO: mlcommons@mlperf-automations + CM_DOCKER_REPO_BRANCH: dev strategy: fail-fast: false matrix: @@ -54,7 +56,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index f3ef84523..9ba86635c 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1797,7 +1797,6 @@ docker: pre_run_cmds: #- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update - cm pull repo - - cm rm cache --tags=inference,src -f mounts: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}" From d0c6c3eb97402c8f97b1c7bf70eaa0fb86902951 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:00:17 +0000 Subject: [PATCH 42/63] Fix dangling softlink issue with nvidia-mlperf-inference-bert (#64) * Remove destination for dangling symbolic links (mlperf-inference-nvidia-bert) * Fix format nvidia-mlperf-inference code --- ...vidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- .../app-mlperf-inference-nvidia/customize.py | 30 +++++++++++-------- .../get-mlperf-inference-utils/customize.py | 2 +- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 83e357613..986fc2569 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "15 02 * * *" #to be adjusted + - cron: "08 13 * * *" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 1dd73836d..7f402ce7d 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "40 12 * * *" + - cron: "56 12 * * *" jobs: build_reference: diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 36324cd0c..f9eb06822 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -37,14 +37,14 @@ def preprocess(i): make_command = env['MLPERF_NVIDIA_RUN_COMMAND'] if make_command == "prebuild": - cmds.append(f"make prebuild NETWORK_NODE=SUT") + cmds.append(f"""make prebuild NETWORK_NODE=SUT""") if env['CM_MODEL'] == "resnet50": target_data_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'data', 'imagenet') if not os.path.exists(target_data_path): cmds.append( - f"ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}") + f"""ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}""") model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], @@ -53,11 +53,11 @@ def preprocess(i): 'resnet50_v1.onnx') if not os.path.exists(os.path.dirname(model_path)): - cmds.append(f"mkdir -p {os.path.dirname(model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(model_path)}""") if not os.path.exists(model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}") + f"""ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}""") model_name = "resnet50" elif "bert" in env['CM_MODEL']: @@ -83,17 +83,17 @@ def preprocess(i): 'vocab.txt') if not os.path.exists(os.path.dirname(fp32_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp32_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp32_model_path)}""") if not os.path.exists(fp32_model_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}""") if not os.path.exists(int8_model_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}""") if not os.path.exists(vocab_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}""") model_name = "bert" model_path = fp32_model_path @@ -112,9 +112,9 @@ def preprocess(i): # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv") + f"""cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt") + f"""cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt""") fp16_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'models', @@ -124,7 +124,7 @@ def preprocess(i): 'stable_diffusion_fp16') if not os.path.exists(os.path.dirname(fp16_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp16_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp16_model_path)}""") if not os.path.exists(fp16_model_path): if os.path.islink(fp16_model_path): @@ -687,11 +687,15 @@ def preprocess(i): '') # will be ignored during build engine if "stable-diffusion" in env["CM_MODEL"]: - extra_build_engine_options_string += f" --model_path {os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL/')}" + extra_build_engine_options_string += f""" --model_path { + os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL/')}""" run_config += " --no_audit_verify" - cmds.append(f"make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'") + cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") run_cmd = " && ".join(cmds) env['CM_MLPERF_RUN_CMD'] = run_cmd diff --git a/script/get-mlperf-inference-utils/customize.py b/script/get-mlperf-inference-utils/customize.py index 6f7f0a49b..179342fc7 100644 --- a/script/get-mlperf-inference-utils/customize.py +++ b/script/get-mlperf-inference-utils/customize.py @@ -15,7 +15,7 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') - utils_path = i['run_script_input']['path'] + utils_path = env['CM_TMP_CURRENT_SCRIPT_PATH'] env['+PYTHONPATH'] = [utils_path] From 188708bd8e944a9c05175db7fd34dee6d7c1d5fc Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:00:55 +0000 Subject: [PATCH 43/63] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index c4c2d2b11..fa209468a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.16 +0.6.17 From 26cf83375a3f55b8c7aaf9bebb7cf5b7508614f8 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 22:01:06 +0000 Subject: [PATCH 44/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 39ee5566a..d4ddba84d 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -f5eb712934119192e9b562cf6d06987c7ec8106a +188708bd8e944a9c05175db7fd34dee6d7c1d5fc From 7f48c88e72c6036b555bb80d01f853bc468e8b06 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:11:16 +0000 Subject: [PATCH 45/63] Support pull_inference_changes in run-mlperf-inference-app (#65) * Update test-scc24-sdxl.yaml * Update test-nvidia-mlperf-inference-implementations.yml * Added pull_inference_changes support to run-mlperf-inference-app --- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- script/run-mlperf-inference-app/_cm.yaml | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 986fc2569..f25dab3d9 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 13 * * *" #to be adjusted + - cron: "08 23 * * *" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 7f402ce7d..b637b0226 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "56 12 * * *" + - cron: "56 22 * * *" jobs: build_reference: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 8fe9b88d1..202da39d4 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -137,6 +137,12 @@ deps: - names: - inference-src tags: get,mlcommons,inference,src +- tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] From b051bb1858fc61de02fc68765fc11155fe457b2a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 03:45:09 +0530 Subject: [PATCH 46/63] Added pull_inference_changes support to run-mlperf-inference-app --- script/run-mlperf-inference-app/_cm.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 202da39d4..a2e3688e7 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -138,11 +138,11 @@ deps: - inference-src tags: get,mlcommons,inference,src - tags: pull,git,repo - env: - CM_GIT_CHECKOUT_PATH: '<<>>' - enable_if_env: - CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: - - 'yes' + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] From 7bc5f0d8059435374d8e789879754640707ae557 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 22:42:29 +0000 Subject: [PATCH 47/63] Fix github action failures (#68) * Update test-nvidia-mlperf-inference-implementations.yml * Update test-mlperf-inference-mixtral.yml * Fix submission generation github action * Set predeps:False for mlperf-inference-submission-generation * Added version support for submission generation --- .../test-cm-based-submission-generation.yml | 15 +++++++++++---- .../workflows/test-mlperf-inference-mixtral.yml | 1 + ...st-nvidia-mlperf-inference-implementations.yml | 3 ++- automation/script/module.py | 2 +- .../generate-mlperf-inference-submission/_cm.yaml | 2 ++ 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index 22f8914d6..b0e91421a 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -80,19 +80,26 @@ jobs: fi # Dynamically set the log group to simulate a dynamic step name echo "::group::$description" - cm ${{ matrix.action }} script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args + cm ${{ matrix.action }} script --tags=generate,inference,submission --version=v4.1 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? + exit $? || echo "STEP_FAILED=true" >> $GITHUB_ENV echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then # For cases 5 and 6, exit status should be 0 if cm command fails, 1 if it succeeds if [[ ${exit_status} -ne 0 ]]; then - exit 0 + echo "STEP_FAILED=false" >> $GITHUB_ENV else - exit ${exit_status} + echo "STEP_FAILED=true" >> $GITHUB_ENV fi else # For other cases, exit with the original status - test ${exit_status} -eq 0 || exit ${exit_status} + test ${exit_status} -eq 0 || echo "STEP_FAILED=true" >> $GITHUB_ENV fi echo "::endgroup::" + - name: Fail if Step Failed + if: env.STEP_FAILED == 'true' + continue-on-error: false + run: | + echo "Manually failing the workflow because the step failed." + exit 1 diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 597a4cdbc..a12f58c5f 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -10,6 +10,7 @@ on: jobs: build_reference: if: github.repository_owner == 'gateoverflow' + timeout-minutes: 1440 runs-on: [ self-hosted, phoenix, linux, x64 ] strategy: fail-fast: false diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index f25dab3d9..fec9fd549 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,11 +2,12 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 23 * * *" #to be adjusted + - cron: "08 01 * * *" #to be adjusted jobs: run_nvidia: if: github.repository_owner == 'gateoverflow' + timeout-minutes: 1440 runs-on: - self-hosted - linux diff --git a/automation/script/module.py b/automation/script/module.py index fe0582648..eda2920c7 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1635,7 +1635,7 @@ def _run(self, i): 'self': self } - # Check if pre-process and detect + # Check and run predeps in customize.py if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml index 064cbc307..5e43dd787 100644 --- a/script/generate-mlperf-inference-submission/_cm.yaml +++ b/script/generate-mlperf-inference-submission/_cm.yaml @@ -7,6 +7,7 @@ default_env: CM_MLPERF_RUN_STYLE: valid CM_MLPERF_SUBMISSION_DIR_SHARED: 'yes' CM_RUN_MLPERF_ACCURACY: 'on' +predeps: False deps: - names: - python @@ -84,6 +85,7 @@ input_mapping: sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA tar: CM_TAR_SUBMISSION_DIR get_platform_details: CM_GET_PLATFORM_DETAILS + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION post_deps: - enable_if_env: CM_RUN_MLPERF_ACCURACY: From 225220c7d9bb7e66e5b9a1e1ebfc3e0180fbd094 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 22:52:05 +0000 Subject: [PATCH 48/63] Update test-cm4mlops-wheel-ubuntu.yml --- .github/workflows/test-cm4mlops-wheel-ubuntu.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-cm4mlops-wheel-ubuntu.yml b/.github/workflows/test-cm4mlops-wheel-ubuntu.yml index 4a37a5290..6ec9905d6 100644 --- a/.github/workflows/test-cm4mlops-wheel-ubuntu.yml +++ b/.github/workflows/test-cm4mlops-wheel-ubuntu.yml @@ -5,7 +5,6 @@ on: branches: - main - dev - - mlperf-inference paths: - '.github/workflows/test-cm4mlops-wheel-ubuntu.yml' - 'setup.py' @@ -16,7 +15,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, ubuntu-20.04] - python-version: ['3.7', '3.8', '3.11', '3.12'] + python-version: ['3.8', '3.11', '3.12'] exclude: - os: ubuntu-latest python-version: "3.8" From bb79019139303a8b103c741526cc7afa59bd8370 Mon Sep 17 00:00:00 2001 From: sahilavaran <139779393+sahilavaran@users.noreply.github.com> Date: Mon, 23 Dec 2024 12:47:00 +0000 Subject: [PATCH 49/63] support --outdirname for ml models, partially fixed #63 (#71) --- script/get-ml-model-3d-unet-kits19/_cm.yaml | 8 ++++++++ script/get-ml-model-bert-large-squad/_cm.yaml | 4 ++++ script/get-ml-model-dlrm-terabyte/_cm.yaml | 2 ++ script/get-ml-model-gptj/_cm.yaml | 2 ++ script/get-ml-model-huggingface-zoo/_cm.yaml | 2 ++ script/get-ml-model-llama2/_cm.yaml | 2 ++ script/get-ml-model-mixtral/_cm.yaml | 2 ++ script/get-ml-model-retinanet-nvidia/_cm.yaml | 2 ++ script/get-ml-model-retinanet/_cm.yaml | 4 ++++ script/get-ml-model-stable-diffusion/_cm.yaml | 2 ++ script/get-ml-model-tiny-resnet/_cm.yaml | 2 ++ 11 files changed, 32 insertions(+) diff --git a/script/get-ml-model-3d-unet-kits19/_cm.yaml b/script/get-ml-model-3d-unet-kits19/_cm.yaml index 7dc7f5b06..658f306a7 100644 --- a/script/get-ml-model-3d-unet-kits19/_cm.yaml +++ b/script/get-ml-model-3d-unet-kits19/_cm.yaml @@ -43,6 +43,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128_dynbatch.onnx @@ -60,6 +62,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch.ptc @@ -73,6 +77,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch_checkpoint.pth @@ -92,6 +98,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128.tf diff --git a/script/get-ml-model-bert-large-squad/_cm.yaml b/script/get-ml-model-bert-large-squad/_cm.yaml index e5b4d11bb..e81819279 100644 --- a/script/get-ml-model-bert-large-squad/_cm.yaml +++ b/script/get-ml-model-bert-large-squad/_cm.yaml @@ -13,6 +13,8 @@ new_env_keys: - CM_ML_MODEL* post_deps: - tags: get,dataset-aux,squad-vocab + force_env_keys: + - CM_OUTDIRNAME prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH @@ -25,6 +27,8 @@ prehook_deps: CM_ML_MODEL_BERT_PACKED: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-ml-model-dlrm-terabyte/_cm.yaml b/script/get-ml-model-dlrm-terabyte/_cm.yaml index 6227a9a17..4d5c93f1b 100644 --- a/script/get-ml-model-dlrm-terabyte/_cm.yaml +++ b/script/get-ml-model-dlrm-terabyte/_cm.yaml @@ -25,6 +25,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-ml-model-gptj/_cm.yaml b/script/get-ml-model-gptj/_cm.yaml index 25e8deca4..7d645976a 100644 --- a/script/get-ml-model-gptj/_cm.yaml +++ b/script/get-ml-model-gptj/_cm.yaml @@ -29,6 +29,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL diff --git a/script/get-ml-model-huggingface-zoo/_cm.yaml b/script/get-ml-model-huggingface-zoo/_cm.yaml index d10c3f448..154675bc1 100644 --- a/script/get-ml-model-huggingface-zoo/_cm.yaml +++ b/script/get-ml-model-huggingface-zoo/_cm.yaml @@ -38,6 +38,8 @@ variations: - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ML_MODEL_PATH tags: get,git,repo,_lfs + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _repo.https://huggingface.co/: - CM_MODEL_ZOO_STUB diff --git a/script/get-ml-model-llama2/_cm.yaml b/script/get-ml-model-llama2/_cm.yaml index 2ff45866b..fe082718e 100644 --- a/script/get-ml-model-llama2/_cm.yaml +++ b/script/get-ml-model-llama2/_cm.yaml @@ -26,6 +26,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: LLAMA2_CHECKPOINT_PATH: LLAMA2 checkpoint path tags: diff --git a/script/get-ml-model-mixtral/_cm.yaml b/script/get-ml-model-mixtral/_cm.yaml index 2542d4dc7..5b61ef9ca 100644 --- a/script/get-ml-model-mixtral/_cm.yaml +++ b/script/get-ml-model-mixtral/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: MIXTRAL_CHECKPOINT_PATH: MIXTRAL checkpoint path tags: diff --git a/script/get-ml-model-retinanet-nvidia/_cm.yaml b/script/get-ml-model-retinanet-nvidia/_cm.yaml index 4e114e43d..7f6880322 100644 --- a/script/get-ml-model-retinanet-nvidia/_cm.yaml +++ b/script/get-ml-model-retinanet-nvidia/_cm.yaml @@ -14,6 +14,8 @@ deps: - tags: get,mlperf,training,src,_nvidia-retinanet - tags: get,mlperf,inference,src - tags: get,ml-model,retinanet,_pytorch,_fp32,_weights + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_TORCH_DEVICE: cpu tags: get,generic-python-lib,_torch diff --git a/script/get-ml-model-retinanet/_cm.yaml b/script/get-ml-model-retinanet/_cm.yaml index 90e937000..8da05da0e 100644 --- a/script/get-ml-model-retinanet/_cm.yaml +++ b/script/get-ml-model-retinanet/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: CM_TMP_ML_MODEL_RETINANET_NO_NMS: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL @@ -80,6 +82,8 @@ variations: extra_cache_tags: retinanet,training,patch,file force_cache: true tags: download,file,_url.https://raw.githubusercontent.com/arjunsuresh/ck-qaic/main/package/model-onnx-mlperf-retinanet-no-nms/remove-nms-and-extract-priors.patch + force_env_keys: + - CM_OUTDIRNAME - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_TRAINING_REPO_PATCHED_PATH CM_GIT_PATCH_FILEPATHS: <<>> diff --git a/script/get-ml-model-stable-diffusion/_cm.yaml b/script/get-ml-model-stable-diffusion/_cm.yaml index b2326daff..ae9ee2757 100644 --- a/script/get-ml-model-stable-diffusion/_cm.yaml +++ b/script/get-ml-model-stable-diffusion/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo,_model-stub.stabilityai/stable-diffusion-xl-base-1.0 + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_DOWNLOAD_TOOL: - rclone diff --git a/script/get-ml-model-tiny-resnet/_cm.yaml b/script/get-ml-model-tiny-resnet/_cm.yaml index 4f8406e29..791ecccee 100644 --- a/script/get-ml-model-tiny-resnet/_cm.yaml +++ b/script/get-ml-model-tiny-resnet/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: env: CM_EXTRACT_EXTRACTED_FILENAME: <<>> tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL From a9e8329cf5f036aea3c491f0a375cce2d89b5cd1 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 13:05:24 +0000 Subject: [PATCH 50/63] Update test-cm-based-submission-generation.yml (#73) --- .github/workflows/test-cm-based-submission-generation.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index b0e91421a..65262dea6 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -82,7 +82,6 @@ jobs: echo "::group::$description" cm ${{ matrix.action }} script --tags=generate,inference,submission --version=v4.1 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? - exit $? || echo "STEP_FAILED=true" >> $GITHUB_ENV echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then # For cases 5 and 6, exit status should be 0 if cm command fails, 1 if it succeeds @@ -93,7 +92,11 @@ jobs: fi else # For other cases, exit with the original status - test ${exit_status} -eq 0 || echo "STEP_FAILED=true" >> $GITHUB_ENV + if [[ ${exit_status} -eq 0 ]]; then + echo "STEP_FAILED=false" >> $GITHUB_ENV + else + echo "STEP_FAILED=true" >> $GITHUB_ENV + fi fi echo "::endgroup::" - name: Fail if Step Failed From 7dcef66c48436c29b6faae8f6b00ee4f81265617 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 14:03:09 +0000 Subject: [PATCH 51/63] Fix exit code for docker run failures (#74) * Update test-cm-based-submission-generation.yml * Force exit code = 1, for docker failures with exit code=512 --- script/run-docker-container/customize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 9703080d0..869386f91 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -314,6 +314,8 @@ def postprocess(i): print('') docker_out = os.system(CMD) if docker_out != 0: + if docker_out % 256 == 0: + docker_out = 1 return {'return': docker_out, 'error': 'docker run failed'} return {'return': 0} From d28df7ea880337a024f9c3854d3fdd14224b8e8e Mon Sep 17 00:00:00 2001 From: sahilavaran <139779393+sahilavaran@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:07:11 +0000 Subject: [PATCH 52/63] Support --outdirname for datasets fixes #63 (#75) * support --outdirname for ml models, partially fixed #63 * support --outdirname for ml datasets fixes #63 --- script/get-dataset-imagenet-aux/_cm.yaml | 2 ++ script/get-dataset-imagenet-calibration/_cm.yaml | 2 ++ script/get-dataset-imagenet-val/_cm.yaml | 2 ++ script/get-dataset-mlperf-inference-mixtral/_cm.yaml | 4 +++- script/get-dataset-openimages-annotations/_cm.yaml | 2 ++ script/get-dataset-openimages-calibration/_cm.yaml | 2 ++ script/get-dataset-openorca/_cm.yaml | 2 ++ script/get-dataset-squad-vocab/_cm.yaml | 2 ++ script/get-dataset-squad/_cm.yaml | 2 ++ 9 files changed, 19 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-imagenet-aux/_cm.yaml b/script/get-dataset-imagenet-aux/_cm.yaml index 242b53abc..c5944aedf 100644 --- a/script/get-dataset-imagenet-aux/_cm.yaml +++ b/script/get-dataset-imagenet-aux/_cm.yaml @@ -14,6 +14,8 @@ prehook_deps: extra_cache_tags: imagenet-aux,dataset-aux force_cache: true tags: download-and-extract,_extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-imagenet-calibration/_cm.yaml b/script/get-dataset-imagenet-calibration/_cm.yaml index 741d7e205..7e499146a 100644 --- a/script/get-dataset-imagenet-calibration/_cm.yaml +++ b/script/get-dataset-imagenet-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: imagenet-calibration,imagenet,calibration names: diff --git a/script/get-dataset-imagenet-val/_cm.yaml b/script/get-dataset-imagenet-val/_cm.yaml index 0b9923927..0a23afac1 100644 --- a/script/get-dataset-imagenet-val/_cm.yaml +++ b/script/get-dataset-imagenet-val/_cm.yaml @@ -35,6 +35,8 @@ prehook_deps: env: CM_EXTRACT_TO_FOLDER: imagenet-2012-val tags: download-and-extract,file,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env: - CM_DAE_EXTRA_TAGS update_tags_from_env_with_prefix: diff --git a/script/get-dataset-mlperf-inference-mixtral/_cm.yaml b/script/get-dataset-mlperf-inference-mixtral/_cm.yaml index f8684eef5..566f7bb05 100644 --- a/script/get-dataset-mlperf-inference-mixtral/_cm.yaml +++ b/script/get-dataset-mlperf-inference-mixtral/_cm.yaml @@ -8,9 +8,11 @@ new_env_keys: prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_PREPROCESSED_PATH - extra_cache_tags: mixtral,get-mixtral-dataset + extra_cache_tags: mixtral,get-mixtral-dataset force_cache: true tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-openimages-annotations/_cm.yaml b/script/get-dataset-openimages-annotations/_cm.yaml index a96e7f58e..16158cef6 100644 --- a/script/get-dataset-openimages-annotations/_cm.yaml +++ b/script/get-dataset-openimages-annotations/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: retinanet,get,dataset-openimages-annotations force_cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-openimages-calibration/_cm.yaml b/script/get-dataset-openimages-calibration/_cm.yaml index b8bd73e12..6edd3716c 100644 --- a/script/get-dataset-openimages-calibration/_cm.yaml +++ b/script/get-dataset-openimages-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: openimages-calibration,openimages,calibration names: diff --git a/script/get-dataset-openorca/_cm.yaml b/script/get-dataset-openorca/_cm.yaml index 861c49575..c860b0213 100644 --- a/script/get-dataset-openorca/_cm.yaml +++ b/script/get-dataset-openorca/_cm.yaml @@ -15,6 +15,8 @@ deps: names: - openorca-src tags: get,git,repo,_lfs,_repo.https://huggingface.co/datasets/Open-Orca/OpenOrca + force_env_keys: + - CM_OUTDIRNAME env: CM_DATASET: OPENORCA new_env_keys: diff --git a/script/get-dataset-squad-vocab/_cm.yaml b/script/get-dataset-squad-vocab/_cm.yaml index a6ec2e902..aa1bad21c 100644 --- a/script/get-dataset-squad-vocab/_cm.yaml +++ b/script/get-dataset-squad-vocab/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad-vocab force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-squad/_cm.yaml b/script/get-dataset-squad/_cm.yaml index a9dd6ed94..d47fc9ce3 100644 --- a/script/get-dataset-squad/_cm.yaml +++ b/script/get-dataset-squad/_cm.yaml @@ -16,6 +16,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL From cf575d03d1856bb36cb31934416b30fd1afed3ed Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 22:05:15 +0000 Subject: [PATCH 53/63] Support version in preprocess-submission, cleanups for coco2014 script (#76) * Dont use 'install' sub directory for coco2014 * Update test-scc24-sdxl.yaml * Update test-mlperf-inference-mixtral.yml * Support version in preprocess-mlperf-inference-submission --- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- script/get-dataset-coco2014/customize.py | 8 ++++---- script/get-dataset-coco2014/run.sh | 3 +-- script/preprocess-mlperf-inference-submission/_cm.yaml | 1 + .../preprocess-mlperf-inference-submission/customize.py | 5 ++++- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index a12f58c5f..04a944fd4 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -5,7 +5,7 @@ name: MLPerf inference MIXTRAL-8x7B on: schedule: - - cron: "08 23 * * *" # 30th minute and 20th hour => 20:30 UTC => 2 AM IST + - cron: "59 19 * * *" # 30th minute and 20th hour => 20:30 UTC => 2 AM IST jobs: build_reference: diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index fec9fd549..2bbccae6b 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 01 * * *" #to be adjusted + - cron: "08 01 * * */3" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index b637b0226..65e2e7510 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "56 22 * * *" + - cron: "34 19 * * *" jobs: build_reference: diff --git a/script/get-dataset-coco2014/customize.py b/script/get-dataset-coco2014/customize.py index e231126a5..b03e75049 100644 --- a/script/get-dataset-coco2014/customize.py +++ b/script/get-dataset-coco2014/customize.py @@ -27,14 +27,14 @@ def postprocess(i): os.getcwd(), 'install', 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": - env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install') + env['CM_DATASET_PATH_ROOT'] = os.getcwd() # env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data') env['CM_DATASET_CAPTIONS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'captions') + os.getcwd(), 'captions') env['CM_DATASET_LATENTS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'latents') + os.getcwd(), 'latents') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join( - os.getcwd(), 'install', 'calibration', 'data') + os.getcwd(), 'calibration', 'data') return {'return': 0} diff --git a/script/get-dataset-coco2014/run.sh b/script/get-dataset-coco2014/run.sh index 61b9ffe52..3685b161c 100644 --- a/script/get-dataset-coco2014/run.sh +++ b/script/get-dataset-coco2014/run.sh @@ -5,8 +5,7 @@ python3() { export -f python3 CUR=${PWD} -mkdir -p install -INSTALL_DIR=${CUR}/install +INSTALL_DIR=${CUR} cd ${CM_RUN_DIR} diff --git a/script/preprocess-mlperf-inference-submission/_cm.yaml b/script/preprocess-mlperf-inference-submission/_cm.yaml index 40ff0c669..eb5f959b4 100644 --- a/script/preprocess-mlperf-inference-submission/_cm.yaml +++ b/script/preprocess-mlperf-inference-submission/_cm.yaml @@ -22,6 +22,7 @@ deps: input_mapping: input: CM_MLPERF_INFERENCE_SUBMISSION_DIR submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submitter: CM_MLPERF_SUBMITTER tags: - run diff --git a/script/preprocess-mlperf-inference-submission/customize.py b/script/preprocess-mlperf-inference-submission/customize.py index fc91b08a8..f4ea74c8d 100644 --- a/script/preprocess-mlperf-inference-submission/customize.py +++ b/script/preprocess-mlperf-inference-submission/customize.py @@ -29,8 +29,11 @@ def preprocess(i): print(f"Cleaning {submission_processed}") shutil.rmtree(submission_processed) + version = env.get('CM_MLPERF_SUBMISSION_CHECKER_VERSION', '') + x_version = ' --version ' + version + ' ' if version != '' else '' + CMD = env['CM_PYTHON_BIN'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", - "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + x_version env['CM_RUN_CMD'] = CMD return {'return': 0} From 1fc32abb06bbad6dc5dc6593f50495d353f2496b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 14:56:55 +0530 Subject: [PATCH 54/63] Fixed stable-diffusion-xl name in SUT configs --- .../pytorch-framework/default-config.yaml | 38 ------------------- .../tensorrt-framework/default-config.yaml | 30 --------------- .../default-config.yaml | 5 +-- .../tensorrt-framework/default-config.yaml | 38 ------------------- .../default-config.yaml | 2 +- 5 files changed, 3 insertions(+), 110 deletions(-) delete mode 100644 script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml delete mode 100644 script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml delete mode 100644 script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml diff --git a/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml deleted file mode 100644 index 040ccbdcc..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- - resnet50: - Offline: - target_qps: 1000.0 - Server: - target_qps: 500.0 - retinanet: - Offline: - target_qps: 50.0 - Server: - target_qps: 30 - bert-99: - Offline: - target_qps: 100 - bert-99.9: - Offline: - target_qps: 100 - 3d-unet-99: - Offline: - target_qps: 1.0 - 3d-unet-99.9: - Offline: - target_qps: 1.0 - gptj-99.9: - Offline: - target_qps: 0.5 - Server: - target_qps: 0.3 - gptj-99: - Offline: - target_qps: 0.5 - Server: - target_qps: 0.3 - sdxl: - Offline: - target_qps: 0.1 - Server: - target_qps: 0.1 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml deleted file mode 100644 index f7a8477a8..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml +++ /dev/null @@ -1,30 +0,0 @@ -3d-unet-99: - Offline: - target_qps: 4.0 - SingleStream: - target_latency: 400 -3d-unet-99.9: - Offline: - target_qps: 4.0 - SingleStream: - target_latency: 400 -bert-99: - Offline: - target_qps: 4000.0 - Server: - target_qps: 3800.0 -bert-99.9: - Offline: - target_qps: 2000.0 - Server: - target_qps: 1600.0 -resnet50: - Offline: - target_qps: '42959.4' - Server: - target_qps: 35000.0 -retinanet: - Offline: - target_qps: 850.0 - Server: - target_qps: 630.0 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index a9ad05a50..d764ab24d 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -28,9 +28,8 @@ retinanet: target_qps: 850.0 Server: target_qps: 630.0 -sdxl: +stable-diffusion-xl: Offline: target_qps: 0.7 Server: - target_qps: 0.3 - + target_qps: 0.3 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml deleted file mode 100644 index 7b24138ac..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- - resnet50: - Offline: - target_qps: 43000.0 - Server: - target_qps: 37000.0 - retinanet: - Offline: - target_qps: 650.0 - Server: - target_qps: 600 - bert-99: - Offline: - target_qps: 4000 - bert-99.9: - Offline: - target_qps: 4000 - 3d-unet-99: - Offline: - target_qps: 2.0 - 3d-unet-99.9: - Offline: - target_qps: 2.0 - gptj-99.9: - Offline: - target_qps: 4 - Server: - target_qps: 3.5 - gptj-99: - Offline: - target_qps: 4 - Server: - target_qps: 3.5 - sdxl: - Offline: - target_qps: 2 - Server: - target_qps: 1 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index 4820e8b52..294b2eda7 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -35,7 +35,7 @@ target_qps: 8 Server: target_qps: 7 - sdxl: + stable-diffusion-xl: Offline: target_qps: 1.3 Server: From 79fb471a5e685a4a7cfb1835a249ddf51a9bc005 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 13:19:04 +0000 Subject: [PATCH 55/63] Fix tensorrt detect on aarch64 --- script/get-tensorrt/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-tensorrt/customize.py b/script/get-tensorrt/customize.py index addc7322c..263c645e8 100644 --- a/script/get-tensorrt/customize.py +++ b/script/get-tensorrt/customize.py @@ -13,7 +13,7 @@ def preprocess(i): # Not enforcing dev requirement for now if env.get('CM_TENSORRT_TAR_FILE_PATH', '') == '' and env.get( - 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': + 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR_', '') != 'aarch64': if os_info['platform'] == 'windows': extra_pre = '' From 51896966a62a0e11a3b5e7421d4a90929364e2f3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 20:38:22 +0530 Subject: [PATCH 56/63] Added torch deps for get-ml-model-gptj-nvidia --- script/get-ml-model-gptj/_cm.yaml | 4 ++++ script/get-ml-model-gptj/run-nvidia.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/script/get-ml-model-gptj/_cm.yaml b/script/get-ml-model-gptj/_cm.yaml index 7d645976a..25b2ef981 100644 --- a/script/get-ml-model-gptj/_cm.yaml +++ b/script/get-ml-model-gptj/_cm.yaml @@ -161,6 +161,10 @@ variations: - python3 tags: get,python3 - tags: get,generic-python-lib,_package.safetensors + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch rclone: add_deps_recursive: dae: diff --git a/script/get-ml-model-gptj/run-nvidia.sh b/script/get-ml-model-gptj/run-nvidia.sh index 27e5a675c..b16ee45da 100644 --- a/script/get-ml-model-gptj/run-nvidia.sh +++ b/script/get-ml-model-gptj/run-nvidia.sh @@ -17,5 +17,5 @@ export DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" make -C docker run LOCAL_USER=1 test $? -eq 0 || exit $? -${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 +PYTHONPATH='' ${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 test $? -eq 0 || exit $? From 76796b4c3966b04011c3cb6118412516c90ba50b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 15:15:53 +0000 Subject: [PATCH 57/63] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fa209468a..724e8d94e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.17 +0.6.18 From a90475d2de72bf0622cebe8d5ca8eb8c9d872fbd Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Tue, 24 Dec 2024 15:16:10 +0000 Subject: [PATCH 58/63] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index d4ddba84d..b21cfb83e 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -188708bd8e944a9c05175db7fd34dee6d7c1d5fc +76796b4c3966b04011c3cb6118412516c90ba50b From 3551660b68ffcff303ae7539ae9a62d34b19bc7e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 25 Dec 2024 12:15:26 +0000 Subject: [PATCH 59/63] Fix coco2014 sample ids path --- script/get-dataset-coco2014/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-dataset-coco2014/customize.py b/script/get-dataset-coco2014/customize.py index b03e75049..3f80e76f0 100644 --- a/script/get-dataset-coco2014/customize.py +++ b/script/get-dataset-coco2014/customize.py @@ -24,7 +24,7 @@ def postprocess(i): env = i['env'] if env.get('CM_GENERATE_SAMPLE_ID', '') == "yes": env['CM_COCO2014_SAMPLE_ID_PATH'] = os.path.join( - os.getcwd(), 'install', 'sample_ids.txt') + os.getcwd(), 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": env['CM_DATASET_PATH_ROOT'] = os.getcwd() From c4653788262678cd87478aaa8d5ae5a12f01978e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 12:06:26 +0000 Subject: [PATCH 60/63] Fixes for podman support (#79) * Fix coco2014 sample ids path * Fixes for podman, #33 * [Automated Commit] Format Codebase * Fixed rgat checkpoint path * Fixed rgat checkpoint path * [Automated Commit] Format Codebase --- automation/script/module.py | 51 ++++++++++++++---------- automation/script/module_misc.py | 11 ++++- script/get-ml-model-rgat/_cm.yaml | 4 +- script/get-ml-model-rgat/customize.py | 16 ++++++-- script/run-docker-container/_cm.yaml | 1 + script/run-docker-container/customize.py | 4 ++ 6 files changed, 59 insertions(+), 28 deletions(-) diff --git a/automation/script/module.py b/automation/script/module.py index eda2920c7..f37dc9830 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -507,28 +507,7 @@ def _run(self, i): if os.environ.get(key, '') != '' and env.get(key, '') == '': env[key] = os.environ[key] - # Check path/input/output in input and pass to env - for key in self.input_flags_converted_to_tmp_env: - value = i.get(key, '').strip() - if value != '': - env['CM_TMP_' + key.upper()] = value - - for key in self.input_flags_converted_to_env: - value = i.get( - key, - '').strip() if isinstance( - i.get( - key, - ''), - str) else i.get( - key, - '') - if value: - env[f"CM_{key.upper()}"] = value - - r = update_env_with_values(env) - if r['return'] > 0: - return r + r = self._update_env_from_input(env, i) ####################################################################### # Check if we want to skip cache (either by skip_cache or by fake_run) @@ -2294,6 +2273,34 @@ def _run(self, i): return rr + ########################################################################## + + def _update_env_from_input(self, env, i): + # Check path/input/output in input and pass to env + for key in self.input_flags_converted_to_tmp_env: + value = i.get(key, '').strip() + if value != '': + env['CM_TMP_' + key.upper()] = value + + for key in self.input_flags_converted_to_env: + value = i.get( + key, + '').strip() if isinstance( + i.get( + key, + ''), + str) else i.get( + key, + '') + if value: + env[f"CM_{key.upper()}"] = value + + r = update_env_with_values(env) + if r['return'] > 0: + return r + + return {'return': 0} + ########################################################################## def _fix_cache_paths(self, env): cm_repos_path = os.environ.get( diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 5de7f0402..cf6e94597 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -2071,6 +2071,14 @@ def docker(i): continue ''' + r = script_automation._update_env_from_input(env, i) + if r['return'] > 0: + return r + + # mount outdirname path + if env.get('CM_OUTDIRNAME', '') != '': + mounts.append(f"""{env['CM_OUTDIRNAME']}:{env['CM_OUTDIRNAME']}""") + # Check if need to update/map/mount inputs and env r = process_inputs({'run_cmd_arc': i_run_cmd_arc, 'docker_settings': docker_settings, @@ -2409,7 +2417,8 @@ def docker(i): print(final_run_cmd) print('') - docker_recreate_image = 'yes' if not norecreate_docker_image else 'no' + docker_recreate_image = 'yes' if str(norecreate_docker_image).lower() not in [ + "yes", "true", "1"] else 'no' if i.get('docker_push_image', '') in ['True', True, 'yes']: env['CM_DOCKER_PUSH_IMAGE'] = 'yes' diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index bcec6df1c..54e5c119c 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -3,13 +3,13 @@ automation_alias: script automation_uid: 5b4e0237da074764 cache: true category: AI/ML models +docker: + fake_run_deps: True env: CM_ML_MODEL: RGAT CM_ML_MODEL_DATASET: ICBH input_mapping: checkpoint: RGAT_CHECKPOINT_PATH - download_path: CM_DOWNLOAD_PATH - to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* - CM_ML_MODEL_RGAT_CHECKPOINT_PATH diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index d920a8711..3cda5a7a8 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -7,10 +7,17 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + download_dir = env.get('CM_OUTDIRNAME', '') + path = env.get('RGAT_CHECKPOINT_PATH', '').strip() if path == '' or not os.path.exists(path): - env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + if download_dir != '' and os.path.exists( + os.path.join(download_dir, "RGAT", "RGAT.pt")): + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + download_dir, "RGAT", "RGAT.pt") + else: + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' return {'return': 0} @@ -19,9 +26,12 @@ def postprocess(i): env = i['env'] + if env.get('RGAT_CHECKPOINT_PATH', '') == '': + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + env['RGAT_DIR_PATH'], "RGAT.pt") + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': - env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env.get( - 'RGAT_CHECKPOINT_PATH', os.path.join(env['RGAT_DIR_PATH'], "RGAT.pt")) + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env['RGAT_CHECKPOINT_PATH'] if env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index 5135070b9..8a04e5b43 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -68,3 +68,4 @@ prehook_deps: CM_DOCKER_CONTAINER_ID: - on tags: build,docker,image +- tags: get,docker diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 869386f91..2157d595f 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -167,6 +167,10 @@ def postprocess(i): if env.get('CM_DOCKER_EXTRA_RUN_ARGS', '') != '': run_opts += env['CM_DOCKER_EXTRA_RUN_ARGS'] + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_PODMAN_MAP_USER_ID', '').lower() not in ["no", "0", "false"]: + run_opts += " --userns=keep-id" + if env.get('CM_DOCKER_PORT_MAPS', []): for ports in env['CM_DOCKER_PORT_MAPS']: port_map_cmds.append(ports) From c3550d2d24ed509ac3f996555a22517f9c65109e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 17:00:43 +0000 Subject: [PATCH 61/63] Not use SHELL command in CM docker (#82) * Dont use SHELL command in dockerfile --- script/build-dockerfile/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 9c6012aa4..baf0860f8 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -172,7 +172,7 @@ def preprocess(i): shell = get_value(env, config, 'SHELL', 'CM_DOCKER_IMAGE_SHELL') if shell: - f.write('SHELL ' + shell + EOL) + # f.write('SHELL ' + shell + EOL) f.write(EOL) for arg in config['ARGS_DEFAULT']: From f79e2f3ff8d53fe9da78000a4e03c96ce653d421 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 17:35:22 +0000 Subject: [PATCH 62/63] Support adding dependent CM script commands in CM dockerfile --- .../_cm.yaml | 3 --- script/build-dockerfile/customize.py | 20 +++++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 45401431f..1c1a89eee 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -625,9 +625,6 @@ deps: enable_if_env: CM_MODEL: - rgat - skip_if_any_env: - CM_DATASET_IGBH_PATH: - - "on" skip_if_env: CM_RUN_STATE_DOCKER: - 'yes' diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index baf0860f8..8f451ebb6 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -379,6 +379,26 @@ def preprocess(i): if run_cmd_extra != '': x += ' ' + run_cmd_extra + if env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '') != '' and str(env.get( + 'CM_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')).lower() in ["yes", "1", "true"]: + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': f"""{env['CM_DOCKER_RUN_SCRIPT_TAGS']}""", + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True, + 'fake_deps': True + } + r = self_module.cmind.access(cm_input) + if r['return'] > 0: + return r + print_deps = r['new_state']['print_deps'] + fake_run_str = " --fake_run" if env.get('CM_DOCKER_FAKE_DEPS') else "" + cmds = ["RUN " + dep for dep in print_deps] + for cmd in cmds: + f.write(cmd + fake_run_str + EOL) + f.write(x + EOL) # fake_run to install the dependent scripts and caching them From 6ba3117e564f526a2a3d5cb393d964c4f936f5ee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 23:23:22 +0000 Subject: [PATCH 63/63] Fixes for igbh dataset detection (#85) * Support adding dependent CM script commands in CM dockerfile * Fix dataset-igbh register in cache * [Automated Commit] Format Codebase * Fix dataset-igbh register in cache * Fix dataset-igbh register in cache --- script/get-dataset-igbh/_cm.yaml | 80 ++++++++++++++-------------- script/get-dataset-igbh/customize.py | 15 +++++- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/script/get-dataset-igbh/_cm.yaml b/script/get-dataset-igbh/_cm.yaml index 1b7c86ae7..8e5c7b4cd 100644 --- a/script/get-dataset-igbh/_cm.yaml +++ b/script/get-dataset-igbh/_cm.yaml @@ -41,8 +41,8 @@ prehook_deps: - CM_OUTDIRNAME force_cache: true enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-feat @@ -60,8 +60,8 @@ prehook_deps: - CM_OUTDIRNAME force_cache: true enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-label19 @@ -79,8 +79,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-label2k @@ -98,8 +98,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-id-index-mapping @@ -118,8 +118,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - edge-index @@ -138,8 +138,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-id-index-mapping @@ -157,8 +157,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-node-feat @@ -177,8 +177,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - conference-id-index-mapping @@ -196,8 +196,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - conference-node-feat @@ -216,8 +216,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - institute-id-index-mapping @@ -235,8 +235,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - institute-node-feat @@ -255,8 +255,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - journal-id-index-mapping @@ -274,8 +274,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - journal-node-feat @@ -294,8 +294,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - fos-id-index-mapping @@ -313,8 +313,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - fos-node-feat @@ -333,8 +333,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-to-institute-edge-index @@ -353,8 +353,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-published-journal-edge-index @@ -373,8 +373,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-topic-fos-edge-index @@ -393,8 +393,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-venue-conference-edge-index @@ -413,8 +413,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-written-by-author-edge-index diff --git a/script/get-dataset-igbh/customize.py b/script/get-dataset-igbh/customize.py index 8f789bcad..de85bd900 100644 --- a/script/get-dataset-igbh/customize.py +++ b/script/get-dataset-igbh/customize.py @@ -19,6 +19,11 @@ def preprocess(i): graph_folder = os.path.join( env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') + if env.get('CM_DATASET_IGBH_PATH', + '') != '': # skip download, just register in cache + env['CM_DATASET_IGBH_OUT_PATH'] = env['CM_DATASET_IGBH_PATH'] + return {'return': 0} + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc @@ -31,14 +36,20 @@ def preprocess(i): run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ f" tools/download_igbh_test.py --target-path {download_loc} " + else: + env['CM_DATASET_IGBH_FULL_DOWNLOAD'] = 'yes' + # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} " + f"""{ + env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size { + env['CM_DATASET_IGBH_SIZE']} """ # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + f"""{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']} + """ env['CM_RUN_CMD'] = run_cmd