From 9526dacf3adfca2ecd8e4995e4cd717b73de7500 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 08:43:47 +0000 Subject: [PATCH 01/80] Fixes for igbh dataset download --- .../_cm.yaml | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 4750f3ff5..5af7233d7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -24,7 +24,7 @@ deps: - get-python - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama - - tags: get,generic-python-lib,_package.tqdm + prehook_deps: #paper - env: @@ -39,6 +39,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -55,6 +56,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-label19 tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -71,6 +73,7 @@ prehook_deps: - 'full' names: - dae + - paper-node-label2k tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -87,6 +90,7 @@ prehook_deps: - 'full' names: - dae + - paper-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -104,6 +108,7 @@ prehook_deps: - 'full' names: - dae + - edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -121,6 +126,7 @@ prehook_deps: - 'full' names: - dae + - author-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -137,6 +143,7 @@ prehook_deps: - 'full' names: - dae + - author-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -154,6 +161,7 @@ prehook_deps: - 'full' names: - dae + - conference-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -170,6 +178,7 @@ prehook_deps: - 'full' names: - dae + - conference-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -187,6 +196,7 @@ prehook_deps: - 'full' names: - dae + - institute-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -203,6 +213,7 @@ prehook_deps: - 'full' names: - dae + - institute-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -220,6 +231,7 @@ prehook_deps: - 'full' names: - dae + - journal-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -236,6 +248,7 @@ prehook_deps: - 'full' names: - dae + - journal-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -253,6 +266,7 @@ prehook_deps: - 'full' names: - dae + - fos-id-index-mapping tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -269,6 +283,7 @@ prehook_deps: - 'full' names: - dae + - fos-node-feat tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -286,6 +301,7 @@ prehook_deps: - 'full' names: - dae + - author-to-institute-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -303,6 +319,7 @@ prehook_deps: - 'full' names: - dae + - paper-published-journal-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -320,6 +337,7 @@ prehook_deps: - 'full' names: - dae + - paper-topic-fos-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -337,6 +355,7 @@ prehook_deps: - 'full' names: - dae + - paper-venue-conference-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -354,6 +373,7 @@ prehook_deps: - 'full' names: - dae + - paper-written-by-author-edge-index tags: download-and-extract,_wget update_tags_from_env_with_prefix: _url.: @@ -363,13 +383,13 @@ variations: default: true group: dataset-type env: - CM_DATASET_IGBH_TYPE: debug - CM_DATASET_IGBH_SIZE: tiny + CM_IGBH_DATASET_TYPE: debug + CM_IGBH_DATASET_SIZE: tiny full: group: dataset-type env: - CM_DATASET_IGBH_TYPE: debug - CM_DATASET_IGBH_SIZE: tiny + CM_IGBH_DATASET_TYPE: full + CM_IGBH_DATASET_SIZE: full glt: env: CM_IGBH_GRAPH_COMPRESS: yes From 663d6be109920510f91bea60a5538074931f56cb Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 10:50:00 +0000 Subject: [PATCH 02/80] fixes for rgat reference implementation --- .../_cm.yaml | 16 ++-- .../customize.py | 2 +- .../_cm.yaml | 90 +++++++++---------- .../customize.py | 8 +- script/get-ml-model-rgat/_cm.yaml | 1 + script/get-ml-model-rgat/customize.py | 1 + script/get-mlperf-inference-src/_cm.yaml | 4 +- 7 files changed, 61 insertions(+), 61 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 8fa3df206..cd0e9fa6d 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -487,7 +487,7 @@ deps: CM_MODEL: - rgat skip_if_env: - CM_ML_MODEL_RGAT_CHECKPOINT_PATH: + RGAT_CHECKPOINT_PATH: - 'on' ######################################################################## @@ -1226,27 +1226,23 @@ variations: group: models env: CM_MODEL: rgat - adr: - pytorch: - version: 2.1.0 deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - version: 0.7.0 - tags: get,generic-python-lib,_package.torchvision - version: 0.16.0 - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy - version: 1.26.4 - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git rgat,cuda: deps: - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html + - tags: get,generic-python-lib,_package.dgl + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - tags: get,generic-python-lib,_package.torch-scatter - tags: get,generic-python-lib,_package.torch-sparse - tags: get,generic-python-lib,_package.torch-geometric @@ -1264,7 +1260,9 @@ variations: - tags: get,generic-python-lib,_package.torch-sparse env: CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html + - tags: get,generic-python-lib,_package.dgl + env: + CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index dcffa5672..93b62873b 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -493,7 +493,7 @@ def get_run_cmd_reference( scenario_extra_options + mode_extra_options + \ " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \ ' --dtype ' + dtype_rgat + \ - " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + " --model-path " + env['RGAT_CHECKPOINT_PATH'] if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes": cmd += " --in-memory " diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 5af7233d7..4c905ce93 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -14,7 +14,7 @@ new_env_keys: - CM_DATASET_IGBH_PATH - CM_DATASET_IGBH_SIZE input_mapping: - out_path: CM_IGBH_DATASET_OUT_PATH + out_path: CM_DATASET_IGBH_OUT_PATH deps: - tags: mlperf,inference,source names: @@ -31,11 +31,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 CM_DOWNLOAD_FILENAME: node_feet.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -48,11 +48,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_19.npy CM_DOWNLOAD_CHECKSUM: be6fda45566e679bdb05ebea98ad16d4 CM_DOWNLOAD_FILENAME: node_label_19.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_19 force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -65,11 +65,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_label_2K.npy CM_DOWNLOAD_CHECKSUM: 6eccab9a14f92f42be5b367c39002031 CM_DOWNLOAD_FILENAME: node_label_2K.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_2K force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -82,11 +82,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/paper_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: f70dd642a4f7e41d926c91c8c054fc4c CM_DOWNLOAD_FILENAME: paper_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -100,11 +100,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__cites__paper/edge_index.npy CM_DOWNLOAD_CHECKSUM: f4897f53636c04a9c66f6063ec635c16 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -118,11 +118,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/author_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 58c15aab7dae03bbd57e6a4ac5e61bd9 CM_DOWNLOAD_FILENAME: author_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,author_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -135,11 +135,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author/node_feat.npy CM_DOWNLOAD_CHECKSUM: 2ec2512b554088381c04ec013e893c8d CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -153,11 +153,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/conference_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 0bf7c555d8c697b31b6af6c4cb6b6612 CM_DOWNLOAD_FILENAME: conference_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -170,11 +170,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/conference/node_feat.npy CM_DOWNLOAD_CHECKSUM: 898ff529b8cf972261fedd50df6377f8 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -188,11 +188,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/institute_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 03fb45eafb7bd35875ef4c7cd2a299a9 CM_DOWNLOAD_FILENAME: institute_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -205,11 +205,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/institute/node_feat.npy CM_DOWNLOAD_CHECKSUM: 12eaeced22d17b4e97d4b4742331c819 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -223,11 +223,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/journal_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: b630c20852b76d17a5c9c37b39176f69 CM_DOWNLOAD_FILENAME: journal_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -240,11 +240,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/journal/node_feat.npy CM_DOWNLOAD_CHECKSUM: 49d51b554b3004f10bee19d1c7f9b416 CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -258,11 +258,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/fos_id_index_mapping.npy CM_DOWNLOAD_CHECKSUM: 0f0cfde619361cde35d3be9f201d081a CM_DOWNLOAD_FILENAME: fos_id_index_mapping.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -275,11 +275,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/fos/node_feat.npy CM_DOWNLOAD_CHECKSUM: 3ef3df19e2475c387fec10bac82773df CM_DOWNLOAD_FILENAME: node_feat.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,node_feat force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -293,11 +293,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/author__affiliated_to__institute/edge_index.npy CM_DOWNLOAD_CHECKSUM: e35dba208f81e0987207f78787c75711 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ + CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -311,11 +311,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__published__journal/edge_index.npy CM_DOWNLOAD_CHECKSUM: 38505e83bde8e5cf94ae0a85afa60e13 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ extra_cache_tags: dataset,igbh,paper_published_journal,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -329,11 +329,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__topic__fos/edge_index.npy CM_DOWNLOAD_CHECKSUM: 427fb350a248ee6eaa8c21cde942fda4 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -347,11 +347,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__venue__conference/edge_index.npy CM_DOWNLOAD_CHECKSUM: 541b8d43cd93579305cfb71961e10a7d CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -365,11 +365,11 @@ prehook_deps: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper__written_by__author/edge_index.npy CM_DOWNLOAD_CHECKSUM: df39fe44bbcec93a640400e6d81ffcb5 CM_DOWNLOAD_FILENAME: edge_index.npy - CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ + CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index force_cache: true enable_if_env: - CM_IGBH_DATASET_TYPE: + CM_DATASET_IGBH_TYPE: - 'full' names: - dae @@ -383,13 +383,13 @@ variations: default: true group: dataset-type env: - CM_IGBH_DATASET_TYPE: debug - CM_IGBH_DATASET_SIZE: tiny + CM_DATASET_IGBH_TYPE: debug + CM_DATASET_IGBH_SIZE: tiny full: group: dataset-type env: - CM_IGBH_DATASET_TYPE: full - CM_IGBH_DATASET_SIZE: full + CM_DATASET_IGBH_TYPE: full + CM_DATASET_IGBH_SIZE: full glt: env: CM_IGBH_GRAPH_COMPRESS: yes diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-mlperf-inference-igbh/customize.py index a0e6f24a6..8f789bcad 100644 --- a/script/get-dataset-mlperf-inference-igbh/customize.py +++ b/script/get-dataset-mlperf-inference-igbh/customize.py @@ -19,9 +19,9 @@ def preprocess(i): graph_folder = os.path.join( env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') - download_loc = env.get('CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) - env['CM_IGBH_DATASET_DOWNLOAD_LOCATION'] = download_loc + env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc run_cmd += f"cd {graph_folder} " x_sep = " && " @@ -33,7 +33,7 @@ def preprocess(i): # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']}" + f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} " # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": @@ -50,7 +50,7 @@ def postprocess(i): env = i['env'] env['CM_DATASET_IGBH_PATH'] = env.get( - 'CM_IGBH_DATASET_OUT_PATH', os.getcwd()) + 'CM_DATASET_IGBH_OUT_PATH', os.getcwd()) print( f"Path to the IGBH dataset: {os.path.join(env['CM_DATASET_IGBH_PATH'], env['CM_DATASET_IGBH_SIZE'])}") diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 644bf688a..d7615acd2 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -13,6 +13,7 @@ input_mapping: new_env_keys: - CM_ML_MODEL_* - CM_ML_MODEL_RGAT_CHECKPOINT_PATH +- RGAT_CHECKPOINT_PATH prehook_deps: - enable_if_env: CM_DOWNLOAD_TOOL: diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index ac8feaad7..df810a5ab 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -25,6 +25,7 @@ def postprocess(i): elif env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] + env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] return {'return': 0} diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index c5e195a88..c100e32e8 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -142,11 +142,11 @@ versions: CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: main master: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: master r2.1: env: From e0b6ded653a16e08c199222f9f5bff9f4726b615 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 11:28:27 +0000 Subject: [PATCH 03/80] Added tqdm deps for get-dataset-igbh --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 4c905ce93..83cfe457e 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -24,6 +24,7 @@ deps: - get-python - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama + - tags: get,generic-python-lib,_package.tqdm prehook_deps: #paper From 45a08cb6f144807384d3601ec947d30f8f6acb43 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:11:24 +0530 Subject: [PATCH 04/80] Fix old repo name usage --- automation/script/module_misc.py | 6 +++--- script/app-mlperf-inference/_cm.yaml | 4 ++-- script/app-mlperf-inference/customize.py | 2 +- script/build-dockerfile/customize.py | 4 ++-- script/generate-mlperf-inference-submission/_cm.yaml | 4 ++-- script/test-cm-core/src/script/test_docker.py | 4 ++-- 6 files changed, 12 insertions(+), 12 deletions(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 9cae72b07..5de7f0402 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1634,12 +1634,12 @@ def dockerfile(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) cm_repo_branch = i.get( 'docker_cm_repo_branch', docker_settings.get( 'cm_repo_branch', - 'mlperf-inference')) + 'main')) cm_repo_flags = i.get( 'docker_cm_repo_flags', @@ -2295,7 +2295,7 @@ def docker(i): 'docker_cm_repo', docker_settings.get( 'cm_repo', - 'mlcommons@cm4mlops')) + 'mlcommons@mlperf-automations')) docker_path = i.get('docker_path', '').strip() if docker_path == '': diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 4c368346e..eb536e8e5 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1808,8 +1808,8 @@ docker: interactive: True extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev real_run: False os_version: '22.04' docker_input_mapping: diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index 41fd8570b..f62ae947d 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -356,7 +356,7 @@ def postprocess(i): host_info['system_name'] = env['CM_HOST_SYSTEM_NAME'] # Check CM automation repository - repo_name = 'mlcommons@cm4mlops' + repo_name = 'mlcommons@mlperf-automations' repo_hash = '' r = cm.access({'action': 'find', 'automation': 'repo', 'artifact': 'mlcommons@cm4mlops,9e97bb72b0474657'}) diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index f5cd06204..9c6012aa4 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -131,7 +131,7 @@ def preprocess(i): print( f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}") else: - cm_mlops_repo = "mlcommons@cm4mlops" + cm_mlops_repo = "mlcommons@mlperf-automations" cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}" @@ -299,7 +299,7 @@ def preprocess(i): f.write(EOL + '# Download CM repo for scripts' + EOL) if use_copy_repo: - docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops" + docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@mlperf-automations" f.write( f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml index 32003a1b3..064cbc307 100644 --- a/script/generate-mlperf-inference-submission/_cm.yaml +++ b/script/generate-mlperf-inference-submission/_cm.yaml @@ -31,8 +31,8 @@ deps: - 'on' tags: get,mlperf,submission,dir docker: - cm_repo: mlcommons@cm4mlops - cm_repo_branch: mlperf-inference + cm_repo: mlcommons@mlperf-automations + cm_repo_branch: dev deps: - names: get-mlperf-inference-results-dir skip_if_env: diff --git a/script/test-cm-core/src/script/test_docker.py b/script/test-cm-core/src/script/test_docker.py index ad867a2a1..1b63631c6 100644 --- a/script/test-cm-core/src/script/test_docker.py +++ b/script/test-cm-core/src/script/test_docker.py @@ -10,7 +10,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', @@ -27,7 +27,7 @@ 'add_deps_recursive': { 'compiler': {'tags': "gcc"} }, - 'docker_cm_repo': 'mlcommons@cm4mlops', + 'docker_cm_repo': 'mlcommons@mlperf-automations', 'image_name': 'cm-script-app-image-classification-onnx-py', 'env': { 'CM_DOCKER_RUN_SCRIPT_TAGS': 'app,image-classification,onnx,python', From 0b5bcfeca40bb9f7c0636c10e5765d9c5db87aff Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 11:53:45 +0000 Subject: [PATCH 05/80] Fix for avoiding user prompt in download-igbh --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 83cfe457e..796d5674e 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -15,6 +15,8 @@ new_env_keys: - CM_DATASET_IGBH_SIZE input_mapping: out_path: CM_DATASET_IGBH_OUT_PATH +env: + SKIP_USER_PROMPT: yes deps: - tags: mlperf,inference,source names: @@ -22,7 +24,7 @@ deps: - tags: get,python names: - get-python - - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/anandhu-eng/IGB-Datasets.git + - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/gateoverflow/IGB-Datasets.git - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm From 25d903be1648a615bc108d864d6709661b325f27 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:26:51 +0530 Subject: [PATCH 06/80] Remove deprecated gui usage --- script/run-mlperf-inference-app/_cm.yaml | 9 - script/run-mlperf-inference-app/customize.py | 584 ------------------- 2 files changed, 593 deletions(-) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 05ae0d476..689aaabc3 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -8,9 +8,6 @@ category: Modular MLPerf inference benchmark pipeline developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" -gui: - title: CM GUI to run MLPerf inference benchmarks and prepare submissions - clean_output_files: - open.tar.gz - summary.csv @@ -202,11 +199,6 @@ variations: env: CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' - dashboard: - default_gui: false - env: - CM_MLPERF_DASHBOARD: 'on' - find-performance: env: CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' @@ -396,7 +388,6 @@ variations: submission: base: - all-modes - default_gui: true default_variations: submission-generation-style: full env: diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 9b5f350d4..480beda5a 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -433,587 +433,3 @@ def get_url(url, path, path2, name, text): url_online = '[{}]({})'.format(text, urlx) return {'return': 0, 'url_online': url_online} - -########################################################################## - - -def gui(i): - - params = i['params'] - st = i['st'] - - script_meta = i['meta'] - - misc = i['misc_module'] - - script_path = i['script_path'] - script_url = i.get('script_url', '') - script_tags = i.get('script_tags', '') - - compute_meta = i.get('compute_meta', {}) - compute_tags = compute_meta.get('tags', []) - bench_meta = i.get('bench_meta', {}) - - compute_uid = compute_meta.get('uid', '') - bench_uid = bench_meta.get('uid', '') - - st_inputs_custom = {} - - bench_input = bench_meta.get('bench_input', {}) - - end_html = '' - - extra = {} - add_to_st_inputs = {} - - inp = script_meta['input_description'] - - # Here we can update params - v = compute_meta.get('mlperf_inference_device') - if v is not None and v != '': - inp['device']['force'] = v - - if v in ['tpu', 'gaudi']: - st.markdown('----') - st.markdown( - '**WARNING: unified CM workflow support for this hardware is pending - please [feel free to help](https://discord.gg/JjWNWXKxwT)!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - elif 'orin' in compute_tags: - st.markdown('----') - st.markdown( - '**WARNING: we need to encode CM knowledge from [this Orin setp](https://github.com/mlcommons/ck/blob/master/docs/mlperf/setup/setup-nvidia-jetson-orin.md) to this GUI!**') - return {'return': 0, 'skip': True, 'end_html': end_html} - - st.markdown('---') - st.markdown('**How would you like to run the MLPerf inference benchmark?**') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_device', - 'desc': inp['device']}) - device = r.get('value2') - inp['device']['force'] = device - - if device == 'cpu': - inp['implementation']['choices'] = ['mlcommons-python', - 'mlcommons-cpp', 'intel', 'ctuning-cpp-tflite'] - if 'intel' in compute_tags: - inp['implementation']['default'] = 'intel' - else: - inp['implementation']['default'] = 'mlcommons-python' - inp['backend']['choices'] = [ - 'onnxruntime', 'deepsparse', 'pytorch', 'tf', 'tvm-onnx'] - inp['backend']['default'] = 'onnxruntime' - elif device == 'rocm': - inp['implementation']['force'] = 'mlcommons-python' - inp['precision']['force'] = '' - inp['backend']['force'] = 'onnxruntime' - st.markdown( - '*WARNING: CM-MLPerf inference workflow was not tested thoroughly for AMD GPU - please feel free to test and improve!*') - elif device == 'qaic': - inp['implementation']['force'] = 'qualcomm' - inp['precision']['force'] = '' - inp['backend']['force'] = 'glow' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_division', - 'desc': inp['division']}) - division = r.get('value2') - inp['division']['force'] = division - - y = 'compliance' - if division == 'closed': - inp[y]['default'] = 'yes' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_compliance', - 'desc': inp[y]}) - compliance = r.get('value2') - inp[y]['force'] = compliance - - if compliance == 'yes': - st.markdown( - '*:red[See [online table with required compliance tests](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#5132-inference)].*') - - else: - inp[y]['force'] = 'no' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_category', - 'desc': inp['category']}) - category = r.get('value2') - inp['category']['force'] = category - - ########################################################################## - # Implementation - v = bench_input.get('mlperf_inference_implementation') - if v is not None and v != '': - inp['implementation']['force'] = v - else: - if device == 'cuda': - inp['implementation']['choices'] = [ - 'nvidia', 'mlcommons-python', 'mlcommons-cpp'] - inp['implementation']['default'] = 'nvidia' - inp['backend']['choices'] = ['tensorrt', 'onnxruntime', 'pytorch'] - inp['backend']['default'] = 'tensorrt' - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_implementation', - 'desc': inp['implementation']}) - implementation = r.get('value2') - inp['implementation']['force'] = implementation - - implementation_setup = '' - r = load_md(script_path, 'setup', 'i-' + implementation) - if r['return'] == 0: - implementation_setup = r['string'] - - url_faq_implementation = '' - r = get_url(script_url, script_path, 'faq', implementation, 'FAQ online') - if r['return'] == 0: - url_faq_implementation = r['url_online'] - - can_have_docker_flag = False - - if implementation == 'mlcommons-cpp': - # inp['backend']['choices'] = ['onnxruntime'] - inp['precision']['force'] = 'float32' - inp['backend']['force'] = 'onnxruntime' - inp['model']['choices'] = ['resnet50', 'retinanet'] - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-cpp)]*') - elif implementation == 'mlcommons-python': - inp['precision']['force'] = 'float32' - if device == 'cuda': - inp['backend']['choices'] = ['onnxruntime', 'pytorch', 'tf'] - inp['backend']['default'] = 'onnxruntime' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-mlcommons-python)]*') - elif implementation == 'ctuning-cpp-tflite': - inp['precision']['force'] = 'float32' - inp['model']['force'] = 'resnet50' - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-ctuning-cpp-tflite)]*') - elif implementation == 'nvidia': - inp['backend']['force'] = 'tensorrt' - extra['skip_script_docker_func'] = True - can_have_docker_flag = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/app-mlperf-inference-nvidia)]*') - elif implementation == 'intel': - inp['model']['choices'] = ['bert-99', 'gptj-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['int8', 'int4'] - inp['precision']['default'] = 'int8' - inp['category']['force'] = 'datacenter' - inp['backend']['force'] = 'pytorch' - inp['sut']['default'] = 'sapphire-rapids.112c' - can_have_docker_flag = True - extra['skip_script_docker_func'] = True -# st.markdown('*:red[Note: Intel implementation require extra CM command to build and run Docker container - you will run CM commands to run MLPerf benchmarks there!]*') - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-intel)]*') - elif implementation == 'qualcomm': - inp['model']['choices'] = ['resnet50', 'retinanet', 'bert-99'] - inp['model']['default'] = 'bert-99' - inp['precision']['default'] = 'float16' - extra['skip_script_docker_func'] = True - st.markdown( - '*:red[[CM automation recipe for this implementation](https://github.com/mlcommons/cm4mlops/tree/main/script/reproduce-mlperf-inference-qualcomm)]*') - - ########################################################################## - # Backend - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_backend', - 'desc': inp['backend']}) - backend = r.get('value2') - inp['backend']['force'] = backend - - backend_setup = '' - r = load_md(script_path, 'setup', 'b-' + backend) - if r['return'] == 0: - backend_setup = r['string'] - - if backend == 'deepsparse': - inp['model']['choices'] = [ - 'resnet50', 'retinanet', 'bert-99', 'bert-99.9'] - inp['model']['default'] = 'bert-99' - inp['precision']['choices'] = ['float32', 'int8'] - inp['precision']['default'] = 'int8' - if 'force' in inp['precision']: - del (inp['precision']['force']) - - ########################################################################## - # Model - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_model', - 'desc': inp['model']}) - model = r.get('value2') - inp['model']['force'] = model - - github_doc_model = '' - - if model == 'retinanet': - x = '50' - if implementation == 'mlcommons-python': - x = '200' - st.markdown( - ':red[This model requires ~{}GB of free disk space for preprocessed dataset in a full/submission run!]\n'.format(x)) - - elif model.startswith('bert-'): - github_doc_model = 'bert' - - elif model.startswith('3d-unet-'): - github_doc_model = '3d-unet' - - elif model == 'rnnt': - github_doc_model = 'rnnt' - - elif model.startswith('dlrm-v2-'): - github_doc_model = 'dlrm_v2' - - elif model.startswith('gptj-'): - github_doc_model = 'gpt-j' - - elif model == 'sdxl': - github_doc_model = 'stable-diffusion-xl' - - elif model.startswith('llama2-'): - github_doc_model = 'llama2-70b' - - elif model.startswith('mixtral-'): - github_doc_model = 'mixtral-8x7b' - - if github_doc_model == '': - github_doc_model = model - - model_cm_url = 'https://github.com/mlcommons/ck/tree/master/docs/mlperf/inference/{}'.format( - github_doc_model) - extra_notes_online = '[Extra notes online]({})\n'.format(model_cm_url) - - st.markdown( - '*[CM-MLPerf GitHub docs for this model]({})*'.format(model_cm_url)) - - ########################################################################## - # Precision - if implementation == 'intel': - if model == 'bert-99': - inp['precision']['force'] = 'int8' - elif model == 'gptj-99': - inp['precision']['force'] = 'int4' - elif implementation == 'qualcomm': - if model == 'resnet50': - inp['precision']['print'] = 'int8' - elif model == 'retinanet': - inp['precision']['print'] = 'int8' - elif model == 'bert-99': - inp['precision']['print'] = 'int8/float16' - - if inp['precision'].get('force', '') == '': - x = inp['precision'].get('print', '') - if x != '': - st.markdown('**{}**: {}'.format(inp['precision']['desc'], x)) - else: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_precision', - 'desc': inp['precision']}) - precision = r.get('value2') - inp['precision']['force'] = precision - - ########################################################################## - # Benchmark version - - script_meta_variations = script_meta['variations'] - - choices = [''] + [ - k for k in script_meta_variations if script_meta_variations[k].get( - 'group', '') == 'benchmark-version'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Force specific benchmark version?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_version', - 'desc': desc}) - benchmark_version = r.get('value2') - - if benchmark_version != '': - params['~~benchmark-version'] = [benchmark_version] - - ########################################################################## - # Run via Docker container - if can_have_docker_flag: - - default_choice = 'yes - run in container' - - choices = [default_choice, 'no - run natively'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Should CM script prepare and run Docker container in interactive mode to run MLPerf? You can then copy/paste CM commands generated by this GUI to benchmark different models.'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_docker', - 'desc': desc}) - benchmark_docker = r.get('value2') - - if benchmark_docker == 'yes - run in container': - add_to_st_inputs['@docker'] = True - add_to_st_inputs['@docker_cache'] = 'no' - - ########################################################################## - # Prepare submission - st.markdown('---') - - submission = st.toggle( - 'Would you like to prepare official submission?', - value=False) - if submission: - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_hw_name', - 'desc': inp['hw_name']}) - inp['hw_name']['force'] = r.get('value2') - - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_submitter', - 'desc': inp['submitter']}) - submitter = r.get('value2') - inp['submitter']['force'] = submitter - - params['~~submission-generation'] = ['submission'] - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - inp['clean']['default'] = False - inp['repro']['force'] = True - - x = '*:red[Use the following command to find local directory with the submission tree and results:]*\n```bash\ncm find cache --tags=submission,dir\n```\n' - - x += '*:red[You will also find results in `mlperf-inference-submission.tar.gz` file that you can submit to MLPerf!]*\n\n' - - x += '*:red[Note that if some results are INVALID due to too short run, you can rerun the same CM command and it should increase the length of the benchmark until you get valid result!]*\n' - - st.markdown(x) - - st.markdown('---') - - else: - inp['submitter']['force'] = '' - inp['clean']['default'] = True - params['~submission'] = ['false'] - - choices = [ - 'Performance', - 'Accuracy', - 'Find Performance from a short run', - 'Performance and Accuracy'] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'What to measure?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_measure', - 'desc': desc}) - measure = r.get('value2') - - x = '' - if measure == 'Performance': - x = 'performance-only' - elif measure == 'Accuracy': - x = 'accuracy-only' - elif measure == 'Find Performance from a short run': - x = 'find-performance' - elif measure == 'Performance and Accuracy': - x = 'submission' - - params['~~submission-generation'] = [x] - - ####################################################################### - # Prepare scenario - - xall = 'All applicable' - choices = ['Offline', 'Server', 'SingleStream', 'MultiStream', xall] - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Which scenario(s)?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_scenario', - 'desc': desc}) - scenario = r.get('value2') - - if scenario == xall: - params['~all-scenarios'] = ['true'] - inp['scenario']['force'] = '' - else: - inp['scenario']['force'] = scenario - - ########################################################################## - # Short or full run - - x = ['Full run', 'Short run'] - if submission: - choices = [x[0], x[1]] - else: - choices = [x[1], x[0]] - - desc = { - 'choices': choices, - 'default': choices[0], - 'desc': 'Short (test) or full (valid) run?'} - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_how', - 'desc': desc}) - how = r.get('value2') - - if how == x[0]: - params['~~submission-generation-style'] = ['full'] - inp['execution_mode']['force'] = 'valid' - else: - params['~~submission-generation-style'] = ['short'] - inp['execution_mode']['force'] = 'test' - - ########################################################################## - # Power - -# desc = {'boolean':True, 'default':False, 'desc':'Measure power?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_power', 'desc':desc}) -# power = r.get('value2', False) - - power = st.toggle('Measure power consumption?', value=False) - - if power: - inp['power']['force'] = 'yes' - - y = 'adr.mlperf-power-client.power_server' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_server', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'adr.mlperf-power-client.port' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_port', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - st.markdown( - '*:red[See [online notes](https://github.com/mlcommons/ck/blob/master/docs/tutorials/mlperf-inference-power-measurement.md)] to setup power meter and server.*') - - else: - inp['power']['force'] = 'no' - inp['adr.mlperf-power-client.power_server']['force'] = '' - inp['adr.mlperf-power-client.port']['force'] = '' - - ########################################################################## - # Dashboard - -# desc = {'boolean':True, 'default':False, 'desc':'Output results to W&B dashboard?'} -# r = misc.make_selector({'st':st, 'st_inputs':st_inputs_custom, 'params':params, 'key': 'mlperf_inference_dashboard', 'desc':desc}) -# dashboard = r.get('value2', False) - - dashboard = st.toggle('Output results to W&B dashboard?', value=False) - - if dashboard: - params['~dashboard'] = ['true'] - - y = 'dashboard_wb_project' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_project', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - y = 'dashboard_wb_user' - r = misc.make_selector({'st': st, - 'st_inputs': st_inputs_custom, - 'params': params, - 'key': 'mlperf_inference_power_wb_user', - 'desc': inp[y]}) - inp[y]['force'] = r.get('value2') - - else: - params['~dashboard'] = ['false'] - inp['dashboard_wb_project']['force'] = '' - inp['dashboard_wb_user']['force'] = '' - - # Hide customization by default - params['hide_script_customization'] = True - - x = implementation_setup - if backend_setup != '': - if x != '': - x += '\n\n' - x += backend_setup - - extra['extra_notes_online'] = extra_notes_online - extra['extra_faq_online'] = url_faq_implementation - extra['extra_setup'] = x - - ########################################################################## - value_reproduce = inp.get('repro', {}).get('force', False) - reproduce = st.toggle( - 'Record extra info for reproducibility?', - value=value_reproduce) - - explore = st.toggle( - 'Explore/tune benchmark (batch size, threads, etc)?', - value=False) - - if reproduce or explore: - add_to_st_inputs.update({ - "@repro_extra.run-mlperf-inference-app.bench_uid": bench_uid, - "@repro_extra.run-mlperf-inference-app.compute_uid": compute_uid, - '@results_dir': '{{CM_EXPERIMENT_PATH3}}', - '@submission_dir': '{{CM_EXPERIMENT_PATH3}}' - }) - - inp['repro']['force'] = True - extra['use_experiment'] = True - - if explore: - add_to_st_inputs['@batch_size'] = '{{CM_EXPLORE_BATCH_SIZE{[1,2,4,8]}}}' - - ########################################################################## - debug = st.toggle( - 'Debug and run MLPerf benchmark natively from command line after CM auto-generates CMD?', - value=False) - if debug: - inp['debug']['force'] = True - - extra['add_to_st_inputs'] = add_to_st_inputs - - return {'return': 0, 'end_html': end_html, 'extra': extra} From ccb1cefaa86c76cf328dc36bb166bfd12975b36e Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:38:34 +0530 Subject: [PATCH 07/80] run on pull request --- .github/workflows/check-broken-links.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index af257ebf4..5b763f57e 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -1,9 +1,9 @@ name: "Check .md README files for broken links" on: - push: + pull_request: branches: - - master + - dev jobs: markdown-link-check: From 036b4e97caaf2b00f232bedae6894b81c6ce1f08 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Thu, 12 Dec 2024 18:05:03 +0530 Subject: [PATCH 08/80] change base branch to dev --- .github/workflows/check-broken-links.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index 5b763f57e..aaaf83d14 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -18,3 +18,4 @@ jobs: with: use-quiet-mode: 'yes' check-modified-files-only: 'yes' + base-branch: dev From 006f23fcf006ae3b4a2acf2d26ee105d1c2d674e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 16:36:08 +0000 Subject: [PATCH 09/80] Cleanup for mlperf-inference-rgat --- .../_cm.yaml | 43 +++++++++---------- 1 file changed, 21 insertions(+), 22 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index cd0e9fa6d..b13153227 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1237,32 +1237,31 @@ variations: - tags: get,generic-python-lib,_package.numpy - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - - rgat,cuda: - deps: - - tags: get,generic-python-lib,_package.dgl - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + - tags: get,generic-python-lib,_package.torch-geometric + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-scatter + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - tags: get,generic-python-lib,_package.torch-sparse - - tags: get,generic-python-lib,_package.torch-geometric - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL + - tags: get,generic-python-lib,_package.dgl #,_url.git+https://github.com/dmlc/dgl.git + update_tags_from_env_with_prefix: + _find_links_url.: + - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL + + rgat,cuda: + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" rgat,cpu: - deps: - - tags: get,generic-python-lib,_package.torch-geometric - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.torch-scatter - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.torch-sparse - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" - - tags: get,generic-python-lib,_package.dgl - env: - CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + env: + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" # Target devices cpu: From 8e896ed6822c3b54e197db2c395ae41348dc5538 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 16:49:37 +0000 Subject: [PATCH 10/80] Fix torch and numpy version for mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b13153227..b61015505 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1226,6 +1226,9 @@ variations: group: models env: CM_MODEL: rgat + adr: + pytorch: + version: "2.1.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm @@ -1235,6 +1238,8 @@ variations: - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" - tags: get,generic-python-lib,_package.pydantic - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git - tags: get,generic-python-lib,_package.torch-geometric From 1717a569bc42a01a900683db37970fa346583a45 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:39:38 +0000 Subject: [PATCH 11/80] Support pytorch 2.4 for app-mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 10 ++++++---- .../app-mlperf-inference-mlcommons-python/customize.py | 5 ++++- script/get-generic-python-lib/customize.py | 4 ++++ 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index b61015505..15ade7a55 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1226,9 +1226,10 @@ variations: group: models env: CM_MODEL: rgat - adr: + add_deps_recursive: pytorch: - version: "2.1.0" + version_max: "2.4.1" + version_max_usable: "2.4.1" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm @@ -1254,7 +1255,7 @@ variations: update_tags_from_env_with_prefix: _find_links_url.: - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL - - tags: get,generic-python-lib,_package.dgl #,_url.git+https://github.com/dmlc/dgl.git + - tags: get,generic-python-lib,_package.dgl update_tags_from_env_with_prefix: _find_links_url.: - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL @@ -1262,11 +1263,12 @@ variations: rgat,cuda: env: CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>.html" - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/cu121/repo.html" rgat,cpu: env: CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<>>+cpu.html" + CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<>>/repo.html" # Target devices cpu: diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 93b62873b..196bf38dc 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -388,7 +388,10 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f" --api-server { + env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path { + env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name { + env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" diff --git a/script/get-generic-python-lib/customize.py b/script/get-generic-python-lib/customize.py index 6421a22a1..57f25127f 100644 --- a/script/get-generic-python-lib/customize.py +++ b/script/get-generic-python-lib/customize.py @@ -189,4 +189,8 @@ def postprocess(i): if pip_version and len(pip_version) > 1 and int(pip_version[0]) >= 23: env['CM_PYTHON_PIP_COMMON_EXTRA'] = " --break-system-packages" + if version.count('.') > 1: + env[f"{env_version_key}_MAJOR_MINOR"] = ".".join( + version.split(".")[:2]) + return {'return': 0, 'version': version} From 17d7c0898fcf1eea4a9ad94aedb579999f85004a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 17:55:05 +0000 Subject: [PATCH 12/80] Support igbh dataset from host --- script/app-mlperf-inference/_cm.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index eb536e8e5..c57003c10 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -781,6 +781,15 @@ variations: - mlperf-accuracy-script - 3d-unet-accuracy-script tags: run,accuracy,mlperf,_igbh + docker: + deps: + - tags: get,dataset,igbh + enable_if_env: + CM_USE_DATASET_FROM_HOST: + - 'yes' + names: + - igbh-original + - igbh-dataset sdxl: group: From 58b3bfb38654d315697b9d6d424188087650fd3d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:22:00 +0000 Subject: [PATCH 13/80] Fix fstring formatting in app-mlperf-inference-mlcommons-python --- script/app-mlperf-inference-mlcommons-python/customize.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/customize.py b/script/app-mlperf-inference-mlcommons-python/customize.py index 196bf38dc..1a2d3b023 100644 --- a/script/app-mlperf-inference-mlcommons-python/customize.py +++ b/script/app-mlperf-inference-mlcommons-python/customize.py @@ -388,10 +388,9 @@ def get_run_cmd_reference( env['CM_VLLM_SERVER_MODEL_NAME'] = env.get( "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct" # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000" - cmd += f" --api-server { - env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path { - env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name { - env['CM_VLLM_SERVER_MODEL_NAME']} --vllm " + cmd += f""" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} \ + --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} \ + --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm """ else: cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}" From c93b9c26c0b601742e7b675b6d5f76dc70d949fd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:39:42 +0000 Subject: [PATCH 14/80] Fix use_dataset_from_host for igbh --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 15ade7a55..abcb1f293 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -619,9 +619,14 @@ deps: enable_if_env: CM_MODEL: - rgat - skip_if_env: + skip_if_any_env: CM_DATASET_IGBH_PATH: - "on" + skip_if_env: + CM_RUN_STATE_DOCKER: + - 'yes' + CM_USE_DATASET_FROM_HOST: + - 'yes' ######################################################################## # Install MLPerf inference dependencies From 64f69e6429229f81a50313b4eca12e794f7c34c7 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 18:43:37 +0000 Subject: [PATCH 15/80] Remove torchvision deps for mlperf-inference-rgat --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index abcb1f293..76f534641 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1240,7 +1240,6 @@ variations: - tags: get,generic-python-lib,_package.tqdm - tags: get,generic-python-lib,_package.requests - tags: get,generic-python-lib,_package.torchdata - - tags: get,generic-python-lib,_package.torchvision - tags: get,generic-python-lib,_package.pybind11 - tags: get,generic-python-lib,_package.PyYAML - tags: get,generic-python-lib,_package.numpy From c1e00cc7beccb509e46a7f0c274eaa4147c2af98 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:07:10 +0000 Subject: [PATCH 16/80] Remove torchvision deps for mlperf inference rgat cuda --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 76f534641..89646244b 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -222,6 +222,7 @@ deps: CM_MODEL: - dlrm-v2-99 - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - pytorch @@ -234,6 +235,11 @@ deps: names: - ml-engine-torchvision - torchvision + skip_if_env: + CM_MODEL: + - dlrm-v2-99 + - dlrm-v2-99.9 + - rgat enable_if_env: CM_MLPERF_BACKEND: - pytorch From 4ef87fab0d33303ca0824027215f1789eaf3496c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:31:24 +0000 Subject: [PATCH 17/80] Create test-mlperf-inference-rgat.yml --- .../workflows/test-mlperf-inference-rgat.yml | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 .github/workflows/test-mlperf-inference-rgat.yml diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml new file mode 100644 index 000000000..de5b0fbb6 --- /dev/null +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -0,0 +1,48 @@ +name: MLPerf inference rgat + +on: + pull_request_target: + branches: [ "main", "dev" ] + paths: + - '.github/workflows/test-mlperf-inference-rgat.yml' + - '**' + - '!**.md' + +jobs: + build: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest] + python-version: [ "3.12" ] + backend: [ "pytorch" ] + implementation: [ "python" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} + run: | + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + - name: Push Results + if: github.repository_owner == 'gateoverflow' + env: + USER: "GitHub Action" + EMAIL: "admin@gateoverflow.com" + GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} + run: | + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" + git config --global credential.https://github.com.helper "" + git config --global credential.https://github.com.helper "!gh auth git-credential" + git config --global credential.https://gist.github.com.helper "" + git config --global credential.https://gist.github.com.helper "!gh auth git-credential" + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet From 31c3143f795edcde98f83f08ace2bac04ad5e350 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 12 Dec 2024 19:49:17 +0000 Subject: [PATCH 18/80] Fix default cm-repo-branch for build-dockerfile --- script/build-dockerfile/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/build-dockerfile/_cm.yaml b/script/build-dockerfile/_cm.yaml index 7535311ea..f54d3a216 100644 --- a/script/build-dockerfile/_cm.yaml +++ b/script/build-dockerfile/_cm.yaml @@ -19,7 +19,7 @@ default_env: ' CM_DOCKER_OS: ubuntu CM_DOCKER_NOT_PULL_UPDATE: False - CM_MLOPS_REPO_BRANCH: mlperf-inference + CM_MLOPS_REPO_BRANCH: dev input_mapping: build: CM_BUILD_DOCKER_IMAGE From b899c2019a6fe4b17d81d6cf0b54cd6403358874 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 13 Dec 2024 12:07:41 +0530 Subject: [PATCH 19/80] capture docker tool --- script/get-docker/_cm.yaml | 3 ++- script/get-docker/customize.py | 11 ++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/script/get-docker/_cm.yaml b/script/get-docker/_cm.yaml index e2f33e875..5e652cbe4 100644 --- a/script/get-docker/_cm.yaml +++ b/script/get-docker/_cm.yaml @@ -9,7 +9,8 @@ docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ - "CM_DOCKER_VERSION" + "CM_DOCKER_VERSION", + "CM_DOCKER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 0d41346b4..ca21e7a7a 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -47,8 +47,14 @@ def detect_version(i): version = r['version'] + tool = "docker" + + if "podman" in r['string'].lower(): + tool = "podman" + + print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version} + return {'return': 0, 'version': version, "tool":tool} def postprocess(i): @@ -60,6 +66,7 @@ def postprocess(i): return r version = r['version'] + tool = r['tool'] found_file_path = env['CM_DOCKER_BIN_WITH_PATH'] found_path = os.path.dirname(found_file_path) @@ -69,5 +76,7 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version env['CM_DOCKER_VERSION'] = version + + env['CM_DOCKER_TOOL'] = tool return {'return': 0, 'version': version} From 71fd59a0a9e6f2ed5bd030a8ed9b08a92cbf7825 Mon Sep 17 00:00:00 2001 From: anandhu-eng Date: Fri, 13 Dec 2024 12:10:09 +0530 Subject: [PATCH 20/80] docker tool -> container tool --- script/get-docker/_cm.yaml | 2 +- script/get-docker/customize.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/script/get-docker/_cm.yaml b/script/get-docker/_cm.yaml index 5e652cbe4..881039852 100644 --- a/script/get-docker/_cm.yaml +++ b/script/get-docker/_cm.yaml @@ -10,7 +10,7 @@ input_description: {} input_mapping: {} new_env_keys: [ "CM_DOCKER_VERSION", - "CM_DOCKER_TOOL" + "CM_CONTAINER_TOOL" ] new_state_keys: [] post_deps: [] diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index ca21e7a7a..30ccf627d 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -77,6 +77,6 @@ def postprocess(i): env['CM_DOCKER_VERSION'] = version - env['CM_DOCKER_TOOL'] = tool + env['CM_CONTAINER_TOOL'] = tool return {'return': 0, 'version': version} From 216081dcd43ded15583f6bb70d366170ec936e3b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 17:41:48 +0530 Subject: [PATCH 21/80] [Automated Commit] Format Codebase (#51) * Fixes for rgat submission generation --- .../workflows/test-mlperf-inference-rgat.yml | 2 +- script/get-docker/customize.py | 5 ++- script/get-ml-model-rgat/_cm.yaml | 1 + script/process-mlperf-accuracy/customize.py | 34 ++++++++++++++----- script/run-mlperf-inference-app/_cm.yaml | 2 ++ 5 files changed, 32 insertions(+), 12 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index de5b0fbb6..478e666c6 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/script/get-docker/customize.py b/script/get-docker/customize.py index 30ccf627d..fbf0a5bbf 100644 --- a/script/get-docker/customize.py +++ b/script/get-docker/customize.py @@ -52,9 +52,8 @@ def detect_version(i): if "podman" in r['string'].lower(): tool = "podman" - print(i['recursion_spaces'] + ' Detected version: {}'.format(version)) - return {'return': 0, 'version': version, "tool":tool} + return {'return': 0, 'version': version, "tool": tool} def postprocess(i): @@ -76,7 +75,7 @@ def postprocess(i): env['CM_DOCKER_CACHE_TAGS'] = 'version-' + version env['CM_DOCKER_VERSION'] = version - + env['CM_CONTAINER_TOOL'] = tool return {'return': 0, 'version': version} diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index d7615acd2..c4ecc56e0 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -63,4 +63,5 @@ variations: group: download-tool rclone,fp32: env: + CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt diff --git a/script/process-mlperf-accuracy/customize.py b/script/process-mlperf-accuracy/customize.py index f1d8b7874..21569fd57 100644 --- a/script/process-mlperf-accuracy/customize.py +++ b/script/process-mlperf-accuracy/customize.py @@ -118,18 +118,30 @@ def preprocess(i): extra_options = "" if env.get('CM_SDXL_STATISTICS_FILE_PATH', '') != '': - extra_options += f" --statistics-path '{env['CM_SDXL_STATISTICS_FILE_PATH']}' " + extra_options += ( + f""" --statistics-path '{ + env['CM_SDXL_STATISTICS_FILE_PATH']}'""" + ) if env.get('CM_SDXL_COMPLIANCE_IMAGES_PATH', '') != '': - extra_options += f" --compliance-images-path '{env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' " + extra_options += ( + f""" --compliance-images-path '{ + env['CM_SDXL_COMPLIANCE_IMAGES_PATH']}' """ + ) else: - extra_options += f""" --compliance-images-path '{os.path.join(result_dir, "images")}' """ + extra_options += f""" --compliance-images-path '{ + os.path.join( + result_dir, "images")}' """ if env.get('CM_COCO2014_SAMPLE_ID_PATH', '') != '': - extra_options += f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + extra_options += ( + f" --ids-path '{env['CM_COCO2014_SAMPLE_ID_PATH']}' " + ) if env.get('CM_SDXL_ACCURACY_RUN_DEVICE', '') != '': - extra_options += f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + extra_options += ( + f" --device '{env['CM_SDXL_ACCURACY_RUN_DEVICE']}' " + ) # env['DATASET_ANNOTATIONS_FILE_PATH'] = env['CM_DATASET_ANNOTATIONS_FILE_PATH'] CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "text_to_image", "tools", @@ -162,9 +174,15 @@ def preprocess(i): elif dataset == "terabyte": extra_options = "" if env.get('CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH', '') != '': - extra_options += f" --aggregation-trace-file '{env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' " + extra_options += ( + f""" --aggregation-trace-file '{ + env['CM_DLRM_V2_AGGREGATION_TRACE_FILE_PATH']}' """ + ) if env.get('CM_DLRM_V2_DAY23_FILE_PATH', '') != '': - extra_options += f" --day-23-file '{env['CM_DLRM_V2_DAY23_FILE_PATH']}' " + extra_options += ( + f""" --day-23-file '{ + env['CM_DLRM_V2_DAY23_FILE_PATH']}' """ + ) CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'], "pytorch", "tools", "accuracy-dlrm.py") + "' --mlperf-accuracy-file '" + os.path.join(result_dir, "mlperf_log_accuracy.json") + "'" + extra_options + \ @@ -179,7 +197,7 @@ def preprocess(i): else: env['CM_DATASET_IGBH_SIZE'] = "tiny" CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join( - result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' > '" + out_file + "'" + result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'" else: return {'return': 1, 'error': 'Unsupported dataset'} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 689aaabc3..12b57a6f7 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -344,6 +344,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '4.1' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v4.1 adr: get-mlperf-inference-results-dir: tags: _version.r4_1 @@ -357,6 +358,7 @@ variations: env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default + CM_MLPERF_SUBMISSION_CHECKER_VERSION: v5.0 group: benchmark-version adr: get-mlperf-inference-results-dir: From 9136723ebe0ab559d4cf53bb0a624ccb3f3bf5e3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 13:00:46 +0000 Subject: [PATCH 22/80] Update test-mlperf-inference-rgat.yml --- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 478e666c6..87a9314ca 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --adr.mlperf-implementation.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: From edcf36c05690e92eec0187028c4fa079f4b64f9c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 13 Dec 2024 19:48:23 +0530 Subject: [PATCH 23/80] Test (#52) * Make r5.0-dev the default version for mlperf-inference --- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 1 + script/run-mlperf-inference-app/_cm.yaml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index 796d5674e..eac7179b7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -28,6 +28,7 @@ deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm + prehook_deps: #paper - env: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 12b57a6f7..8fe9b88d1 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -327,7 +327,6 @@ variations: tags: _version.r4_0-dev r4.1-dev: - default: true env: CM_MLPERF_INFERENCE_VERSION: '4.1-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r4.1-dev_default @@ -355,6 +354,7 @@ variations: group: benchmark-version r5.0-dev: + default: true env: CM_MLPERF_INFERENCE_VERSION: '5.0-dev' CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: r5.0-dev_default From a1b8a48ea7316643dd8bce02503477756a193655 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 06:58:59 +0530 Subject: [PATCH 24/80] Test (#53) * Fixes for rgat submission generation --- .github/workflows/test-mlperf-inference-rgat.yml | 4 ++-- script/app-image-classification-onnx-py/_cm.yaml | 1 - .../app-mlperf-inference-mlcommons-python/_cm.yaml | 4 ++-- .../customize.py | 12 ++++++++++-- script/get-mlperf-inference-src/_cm.yaml | 4 ++-- 5 files changed, 16 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 87a9314ca..03941023e 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -1,7 +1,7 @@ name: MLPerf inference rgat on: - pull_request_target: + pull_request: branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-rgat.yml' @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.fix_submission_generation --adr.mlperf-implementation.tags=_branch.fix_submission_generation --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/script/app-image-classification-onnx-py/_cm.yaml b/script/app-image-classification-onnx-py/_cm.yaml index 740a8a18a..e53b91ec2 100644 --- a/script/app-image-classification-onnx-py/_cm.yaml +++ b/script/app-image-classification-onnx-py/_cm.yaml @@ -22,7 +22,6 @@ default_env: deps: - tags: detect,os -#- tags: get,sys-utils-cm - names: - python - python3 diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 89646244b..45401431f 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -1239,8 +1239,8 @@ variations: CM_MODEL: rgat add_deps_recursive: pytorch: - version_max: "2.4.1" - version_max_usable: "2.4.1" + version_max: "2.4.0" + version_max_usable: "2.4.0" deps: - tags: get,generic-python-lib,_package.colorama - tags: get,generic-python-lib,_package.tqdm diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 040e7cb09..a27086237 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -160,11 +160,15 @@ def generate_submission(env, state, inp, submission_division): print('* MLPerf inference submitter: {}'.format(submitter)) if env.get('CM_MLPERF_SUT_SW_NOTES_EXTRA', '') != '': - sw_notes = f"{system_meta_tmp['sw_notes']} {env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}" + sw_notes = f"""{ + system_meta_tmp['sw_notes']} { + env['CM_MLPERF_SUT_SW_NOTES_EXTRA']}""" system_meta_tmp['sw_notes'] = sw_notes if env.get('CM_MLPERF_SUT_HW_NOTES_EXTRA', '') != '': - hw_notes = f"{system_meta_tmp['hw_notes']} {env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}" + hw_notes = f"""{ + system_meta_tmp['hw_notes']} { + env['CM_MLPERF_SUT_HW_NOTES_EXTRA']}""" system_meta_tmp['hw_notes'] = hw_notes path_submission = os.path.join(path_submission_division, submitter) @@ -297,6 +301,10 @@ def generate_submission(env, state, inp, submission_division): system_path = os.path.join(path_submission, "systems") submission_system_path = system_path + if not os.path.isdir(submission_path): + os.makedirs(submission_path) + if not os.path.isdir(measurement_path): + os.makedirs(measurement_path) if not os.path.isdir(submission_system_path): os.makedirs(submission_system_path) system_file = os.path.join(submission_system_path, sub_res + ".json") diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index c100e32e8..b8bd39092 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -134,10 +134,10 @@ variations: versions: custom: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 deepsparse: env: - CM_MLPERF_LAST_RELEASE: v4.1 + CM_MLPERF_LAST_RELEASE: v5.0 CM_TMP_GIT_CHECKOUT: deepsparse CM_TMP_GIT_URL: https://github.com/neuralmagic/inference main: From 48f7a91172035ec5ccf6fc0986e9009bc075b2d6 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 14 Dec 2024 01:45:13 +0000 Subject: [PATCH 25/80] Update VERSION | rgat-fixes --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 45a346dba..592e815ea 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.11 +0.6.12 From 3d9715f2d7e3cbe111ab1f64900c01997ff63f08 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 14 Dec 2024 01:45:25 +0000 Subject: [PATCH 26/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 252b8a36b..89f5a3e18 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -f5e04069c8d7395be34f94fa8a94edc6c317b58e +48f7a91172035ec5ccf6fc0986e9009bc075b2d6 From 90a4412f05a3c29a536dad41e9c1ee43da0f9514 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 10:34:46 +0000 Subject: [PATCH 27/80] Update MLPerf automation repo in github actions (#54) --- .../test-amd-mlperf-inference-implementations.yml | 4 ++-- ...erence-bert-deepsparse-tf-onnxruntime-pytorch.yml | 2 +- .github/workflows/test-mlperf-inference-dlrm.yml | 4 ++-- .github/workflows/test-mlperf-inference-gptj.yml | 4 ++-- .github/workflows/test-mlperf-inference-llama2.yml | 8 ++++---- .github/workflows/test-mlperf-inference-mixtral.yml | 4 ++-- .../test-mlperf-inference-mlcommons-cpp-resnet50.yml | 2 +- .github/workflows/test-mlperf-inference-resnet50.yml | 2 +- .../workflows/test-mlperf-inference-retinanet.yml | 2 +- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- .github/workflows/test-mlperf-inference-sdxl.yaml | 4 ++-- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 12 +++++++----- script/get-dataset-mlperf-inference-igbh/_cm.yaml | 2 +- script/get-gh-actions-runner/_cm.yaml | 7 +++++++ 15 files changed, 35 insertions(+), 26 deletions(-) diff --git a/.github/workflows/test-amd-mlperf-inference-implementations.yml b/.github/workflows/test-amd-mlperf-inference-implementations.yml index b635d266e..2e140c32e 100644 --- a/.github/workflows/test-amd-mlperf-inference-implementations.yml +++ b/.github/workflows/test-amd-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes - # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c + cm run script --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes + # cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=dev --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 3594aaf86..9aa9b8293 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -60,4 +60,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Bert GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 6440d0448..4d7727457 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' @@ -45,4 +45,4 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=dlrm-v2-99 --implementation=intel --batch_size=1 --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 0562b9176..db0ed5923 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -26,6 +26,6 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index ab1c9bb48..6a8c1adb8 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: MLPerf inference LLAMA 2 70B +name: MLPerf inference LLAMA2-70B on: schedule: @@ -20,7 +20,7 @@ jobs: precision: [ "bfloat16" ] steps: - - name: Test MLPerf Inference LLAMA 2 70B reference implementation + - name: Test MLPerf Inference LLAMA2-70B reference implementation run: | source gh_action/bin/deactivate || python3 -m venv gh_action source gh_action/bin/activate @@ -31,5 +31,5 @@ jobs: pip install "huggingface_hub[cli]" git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 0a6a37708..b29341119 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -30,5 +30,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index ff856ad54..72b0d1fe3 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -59,4 +59,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from MLCommons C++ ResNet50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 4388e4bb9..54cb7c91c 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index eac9346fe..a319f6772 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 03941023e..259edab72 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -45,4 +45,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index aea41cee2..03ae46d4e 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -21,5 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 2974e651c..449fd033b 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - system: [ "GO-spr", "phoenix", "i9" ] + system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] python-version: [ "3.12" ] model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ] exclude: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 673dba804..fec9d7dbe 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -9,7 +9,7 @@ jobs: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr ] env: - CM_REPOS: $HOME/GH_CM + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -27,14 +27,16 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions build_nvidia: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr] + env: + CM_DOCKER_REPO: mlperf-automations strategy: fail-fast: false matrix: @@ -52,7 +54,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --precision=float16 --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-mlperf-inference-igbh/_cm.yaml index eac7179b7..eacd5be5c 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-mlperf-inference-igbh/_cm.yaml @@ -34,7 +34,7 @@ prehook_deps: - env: CM_PACKAGE_URL: https://igb-public.s3.us-east-2.amazonaws.com/IGBH/processed/paper/node_feat.npy CM_DOWNLOAD_CHECKSUM: 71058b9ac8011bafa1c5467504452d13 - CM_DOWNLOAD_FILENAME: node_feet.npy + CM_DOWNLOAD_FILENAME: node_feat.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat force_cache: true diff --git a/script/get-gh-actions-runner/_cm.yaml b/script/get-gh-actions-runner/_cm.yaml index 3008f6365..287ee254a 100644 --- a/script/get-gh-actions-runner/_cm.yaml +++ b/script/get-gh-actions-runner/_cm.yaml @@ -6,6 +6,7 @@ can_force_cache: true tags: - get - gh +- github - actions-runner - runner-code - runner @@ -29,21 +30,27 @@ deps: variations: config: + group: command + default: true env: CM_GH_ACTIONS_RUNNER_COMMAND: config remove: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: remove install: + group: command deps: - tags: get,gh,actions-runner,_config force_cache: yes env: CM_GH_ACTIONS_RUNNER_COMMAND: install uninstall: + group: command env: CM_GH_ACTIONS_RUNNER_COMMAND: uninstall start: + group: command deps: - tags: get,gh,actions-runner,_install force_cache: yes From af15e72948fb4677118a4ae562bc4f37e6e33909 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 19 Dec 2024 19:49:55 +0000 Subject: [PATCH 28/80] Support nvmitten for aarch64 (#55) * Support nvmitten for aarch64 --- script/app-mlperf-inference/_cm.yaml | 8 +++++--- setup.py | 5 +++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index c57003c10..f3ef84523 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -338,12 +338,16 @@ variations: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.0-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-public + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' - skip_if_env: CM_HOST_PLATFORM_FLAVOR: - x86_64 docker: base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v4.1-cuda12.4-pytorch24.04-ubuntu22.04-aarch64-GraceHopper-release + env: + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp310-cp310-linux_aarch64.whl' @@ -1627,7 +1631,7 @@ variations: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3-cp38-cp38-linux_x86_64.whl' + CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' #uses public code for inference v4.1 @@ -1646,8 +1650,6 @@ variations: default_env: CM_SKIP_SYS_UTILS: 'yes' CM_REGENERATE_MEASURE_FILES: 'yes' - env: - CM_ENV_NVMITTEN_DOCKER_WHEEL_PATH: '/opt/nvmitten-0.1.3b0-cp38-cp38-linux_x86_64.whl' r4.1_default: group: diff --git a/setup.py b/setup.py index a6d5a73f7..8cc2aec00 100644 --- a/setup.py +++ b/setup.py @@ -145,11 +145,12 @@ def custom_function(self): 'artifact': 'mlcommons@cm4mlops', 'force': True, 'all': True}) + branch = os.environ.get('CM_MLOPS_REPO_BRANCH', 'dev') r = cmind.access({'action': 'pull', 'automation': 'repo', 'artifact': 'mlcommons@mlperf-automations', - 'checkout': commit_hash}) - # r = cmind.access({'action':'pull', 'automation':'repo', 'artifact':'mlcommons@mlperf-automations', 'checkout': commit_hash}) + 'checkout': commit_hash, + 'branch': branch}) print(r) if r['return'] > 0: return r['return'] From 8b92713befedfe333570533bc8ca2e3a7bce4085 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 19 Dec 2024 19:50:13 +0000 Subject: [PATCH 29/80] Increment version to 0.6.13 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 592e815ea..e196726d2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.12 +0.6.13 From b3a34ec085e391351d70a6f663dd0c95b0423e45 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 19 Dec 2024 19:50:16 +0000 Subject: [PATCH 30/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 89f5a3e18..de1c6db94 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -48f7a91172035ec5ccf6fc0986e9009bc075b2d6 +8b92713befedfe333570533bc8ca2e3a7bce4085 From 3f25d3c211771d6f557e67cfc4c8d9fdcb2c0106 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:06:24 +0000 Subject: [PATCH 31/80] Copy bert model for nvidia-mlperf-inference implementation instead of softlink (#56) * Copy bert model for nvidia mlperf inference implementation instead of softlink --- script/app-mlperf-inference-nvidia/customize.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index a09fd9715..36324cd0c 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -87,13 +87,13 @@ def preprocess(i): if not os.path.exists(fp32_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") if not os.path.exists(int8_model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"cp -r {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") if not os.path.exists(vocab_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"cp -r {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") model_name = "bert" model_path = fp32_model_path From a09686d660338aaecb77b78881a7e4b25ff5a5ca Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 09:07:40 +0000 Subject: [PATCH 32/80] Update version (#57) * Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index e196726d2..fcbaa8478 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.13 +0.6.14 From e6ad511b0535786251b4f30c71c4fb1e5511dcf9 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Fri, 20 Dec 2024 09:07:56 +0000 Subject: [PATCH 33/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index de1c6db94..2d3c8a628 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -8b92713befedfe333570533bc8ca2e3a7bce4085 +a09686d660338aaecb77b78881a7e4b25ff5a5ca From f399c2cd7db8406740274e9b7d2898f8ddb229e9 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 20 Dec 2024 19:28:55 +0000 Subject: [PATCH 34/80] Update github actions - use master branch of inference repository (#58) * Use master branch of inference repo in github action --- .github/workflows/test-cm-based-submission-generation.yml | 2 +- .github/workflows/test-mlperf-inference-dlrm.yml | 2 +- .github/workflows/test-mlperf-inference-gptj.yml | 2 +- .github/workflows/test-mlperf-inference-llama2.yml | 2 +- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- .github/workflows/test-mlperf-inference-resnet50.yml | 4 ++-- .github/workflows/test-mlperf-inference-retinanet.yml | 4 ++-- .github/workflows/test-mlperf-inference-rgat.yml | 2 +- .github/workflows/test-mlperf-inference-sdxl.yaml | 2 +- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- 11 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index d62f8e59b..22f8914d6 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -80,7 +80,7 @@ jobs: fi # Dynamically set the log group to simulate a dynamic step name echo "::group::$description" - cm ${{ matrix.action }} script --tags=generate,inference,submission --adr.submission-checker-src.tags=_branch.dev --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args + cm ${{ matrix.action }} script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 4d7727457..3ed51759d 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --adr.mlperf-implementation.tags=_branch.dev --adr.mlperf-implementation.version=custom --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index db0ed5923..6a1152893 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -26,6 +26,6 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --pull_changes=yes --pull_inference_changes=yes --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index 6a8c1adb8..184940330 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -31,5 +31,5 @@ jobs: pip install "huggingface_hub[cli]" git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index b29341119..597a4cdbc 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -30,5 +30,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 54cb7c91c..f4ed3f4f1 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -46,11 +46,11 @@ jobs: - name: Test MLPerf Inference ResNet50 (Windows) if: matrix.os == 'windows-latest' run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Test MLPerf Inference ResNet50 (Linux/macOS) if: matrix.os != 'windows-latest' run: | - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index a319f6772..f392d2d7d 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -46,11 +46,11 @@ jobs: - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os == 'windows-latest' run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }} --model=retinanet --adr.loadgen.tags=_from-pip --pip_loadgen=yes --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os != 'windows-latest' run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --adr.compiler.tags=gcc --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 259edab72..d2edb5855 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index 03ae46d4e..d1029e80d 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -21,5 +21,5 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --adr.mlperf-implementation.tags=_branch.dev --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 449fd033b..370f1a74d 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -39,6 +39,6 @@ jobs: pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --adr.submission-checker-src.tags=_branch.dev --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index fec9d7dbe..6620c4913 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -27,7 +27,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions From d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 10:01:39 +0000 Subject: [PATCH 35/80] Migrate MLPerf inference unofficial results repo to MLCommons (#59) * Use mlcommons repo for uploading unofficial results of nvidia/intel github actions * Fix format * Fix rgat download path, added libbz2 deps for draw-graph-from-json * Support windows for pull-git-repo * Fix libbz2-dev detect * Added separate installation options for libbz2-dev and bzip2 * Restrict libbz2-dev install only for ubuntu (install-python-src) * Update VERSION --- ...intel-mlperf-inference-implementations.yml | 4 +- ...vidia-mlperf-inference-implementations.yml | 4 +- VERSION | 2 +- automation/script/module.py | 18 ++++++-- .../_cm.yaml | 42 ++++++++++++++++++- .../customize.py | 0 .../run.sh | 0 script/get-generic-sys-util/_cm.yaml | 20 +++++++-- script/get-ml-model-rgat/_cm.yaml | 13 +++--- script/get-ml-model-rgat/customize.py | 7 ++-- script/install-python-src/_cm.yaml | 3 ++ script/pull-git-repo/customize.py | 3 -- script/pull-git-repo/run.bat | 26 ++++++++++++ 13 files changed, 117 insertions(+), 25 deletions(-) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/_cm.yaml (94%) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/customize.py (100%) rename script/{get-dataset-mlperf-inference-igbh => get-dataset-igbh}/run.sh (100%) create mode 100644 script/pull-git-repo/run.bat diff --git a/.github/workflows/test-intel-mlperf-inference-implementations.yml b/.github/workflows/test-intel-mlperf-inference-implementations.yml index 9063af791..166a1a77c 100644 --- a/.github/workflows/test-intel-mlperf-inference-implementations.yml +++ b/.github/workflows/test-intel-mlperf-inference-implementations.yml @@ -22,5 +22,5 @@ jobs: export CM_REPOS=$HOME/GH_CM pip install --upgrade cm4mlops pip install tabulate - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 370f1a74d..83e357613 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -39,6 +39,6 @@ jobs: pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=gateoverflow@cm4mlops --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name diff --git a/VERSION b/VERSION index fcbaa8478..6769f67e2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.14 +0.6.15 diff --git a/automation/script/module.py b/automation/script/module.py index f0347bbeb..fe0582648 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1798,9 +1798,16 @@ def _run(self, i): tmp_curdir = os.getcwd() if env.get('CM_OUTDIRNAME', '') != '': - if not os.path.exists(env['CM_OUTDIRNAME']): - os.makedirs(env['CM_OUTDIRNAME']) - os.chdir(env['CM_OUTDIRNAME']) + if os.path.isabs(env['CM_OUTDIRNAME']) or recursion: + c_outdirname = env['CM_OUTDIRNAME'] + else: + c_outdirname = os.path.join( + env['CM_TMP_CURRENT_PATH'], env['CM_OUTDIRNAME']) + env['CM_OUTDIRNAME'] = c_outdirname + + if not os.path.exists(c_outdirname): + os.makedirs(c_outdirname) + os.chdir(c_outdirname) # Check if pre-process and detect if 'preprocess' in dir(customize_code) and not fake_run: @@ -5860,7 +5867,10 @@ def convert_env_to_script(env, os_info, start_script=None): key = key[1:] # Append the existing environment variable to the new value - env_value = f"{env_separator.join(env_value)}{env_separator}{os_info['env_var'].replace('env_var', key)}" + env_value = f"""{ + env_separator.join(env_value)}{env_separator}{ + os_info['env_var'].replace( + 'env_var', key)}""" # Replace placeholders in the platform-specific environment command env_command = os_info['set_env'].replace( diff --git a/script/get-dataset-mlperf-inference-igbh/_cm.yaml b/script/get-dataset-igbh/_cm.yaml similarity index 94% rename from script/get-dataset-mlperf-inference-igbh/_cm.yaml rename to script/get-dataset-igbh/_cm.yaml index eacd5be5c..1b7c86ae7 100644 --- a/script/get-dataset-mlperf-inference-igbh/_cm.yaml +++ b/script/get-dataset-igbh/_cm.yaml @@ -1,4 +1,4 @@ -alias: get-dataset-mlperf-inference-igbh +alias: get-dataset-igbh automation_alias: script automation_uid: 5b4e0237da074764 cache: true @@ -37,6 +37,8 @@ prehook_deps: CM_DOWNLOAD_FILENAME: node_feat.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_feat + force_env_keys: + - CM_OUTDIRNAME force_cache: true enable_if_env: CM_DATASET_IGBH_TYPE: @@ -54,6 +56,8 @@ prehook_deps: CM_DOWNLOAD_FILENAME: node_label_19.npy CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_19 + force_env_keys: + - CM_OUTDIRNAME force_cache: true enable_if_env: CM_DATASET_IGBH_TYPE: @@ -72,6 +76,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,node_label_2K force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -89,6 +95,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper/ extra_cache_tags: dataset,igbh,paper,paper_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -107,6 +115,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__cites__paper/ extra_cache_tags: dataset,igbh,paper_cites_paper,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -125,6 +135,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,author_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -142,6 +154,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author/ extra_cache_tags: dataset,igbh,author,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -160,6 +174,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,conference_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -177,6 +193,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/conference/ extra_cache_tags: dataset,igbh,conference,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -195,6 +213,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,institute_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -212,6 +232,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/institute/ extra_cache_tags: dataset,igbh,institute,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -230,6 +252,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,journal_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -247,6 +271,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/journal/ extra_cache_tags: dataset,igbh,journal,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -265,6 +291,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,fos_id_index_mapping force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -282,6 +310,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/fos/ extra_cache_tags: dataset,igbh,fos,node_feat force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -300,6 +330,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/author__affiliated_to__institute/ extra_cache_tags: dataset,igbh,author_affiliated_to_institute,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -318,6 +350,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__published__journal/ extra_cache_tags: dataset,igbh,paper_published_journal,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -336,6 +370,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__topic__fos/ extra_cache_tags: dataset,igbh,paper_topic_fos,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -354,6 +390,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__venue__conference/ extra_cache_tags: dataset,igbh,paper_venue_conference,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' @@ -372,6 +410,8 @@ prehook_deps: CM_DOWNLOAD_PATH: <<>>/full/processed/paper__written_by__author/ extra_cache_tags: dataset,igbh,paper_written_by_author,edge_index force_cache: true + force_env_keys: + - CM_OUTDIRNAME enable_if_env: CM_DATASET_IGBH_TYPE: - 'full' diff --git a/script/get-dataset-mlperf-inference-igbh/customize.py b/script/get-dataset-igbh/customize.py similarity index 100% rename from script/get-dataset-mlperf-inference-igbh/customize.py rename to script/get-dataset-igbh/customize.py diff --git a/script/get-dataset-mlperf-inference-igbh/run.sh b/script/get-dataset-igbh/run.sh similarity index 100% rename from script/get-dataset-mlperf-inference-igbh/run.sh rename to script/get-dataset-igbh/run.sh diff --git a/script/get-generic-sys-util/_cm.yaml b/script/get-generic-sys-util/_cm.yaml index 1d45c2c28..b75e24bbc 100644 --- a/script/get-generic-sys-util/_cm.yaml +++ b/script/get-generic-sys-util/_cm.yaml @@ -212,21 +212,33 @@ variations: brew: '' dnf: boost-devel yum: boost-devel - libbz2-dev: + bzip2: env: - CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_NAME: bzip2 CM_SYS_UTIL_VERSION_CMD_OVERRIDE: bzcat --version 2>&1 | grep bzip > tmp-ver.out CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) CM_TMP_VERSION_DETECT_GROUP_NUMBER: 1 new_env_keys: + - CM_BZIP2_VERSION + state: + bzip2: + apt: bzip2 + brew: bzip2 + dnf: bzip2 + yum: bzip2 + libbz2-dev: + env: + CM_SYS_UTIL_NAME: libbz2_dev + CM_SYS_UTIL_VERSION_CMD: dpkg -s libbz2-dev | grep 'Version' + CM_SYS_UTIL_VERSION_RE: ([0-9]+(\.[0-9]+)+) + CM_TMP_VERSION_DETECT_GROUP_NUMBER: 0 + new_env_keys: - CM_LIBBZ2_DEV_VERSION state: libbz2_dev: apt: libbz2-dev - brew: bzip2 dnf: libbzip2-devel yum: libbzip2-devel - zlib-devel: libbz2-devel libev-dev: env: CM_SYS_UTIL_NAME: libev_dev diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index c4ecc56e0..bcec6df1c 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -21,12 +21,14 @@ prehook_deps: CM_TMP_REQUIRE_DOWNLOAD: - 'yes' env: - CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_PATH - extra_cache_tags: rgat,gnn,model + CM_DOWNLOAD_FINAL_ENV_NAME: RGAT_DIR_PATH + extra_cache_tags: rgat,gnn,model,ml-model force_cache: true names: - - dae - tags: download-and-extract + - download-file + tags: download,file + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL @@ -55,7 +57,7 @@ variations: group: download-source rclone: adr: - dae: + download-file: tags: _rclone env: CM_DOWNLOAD_TOOL: rclone @@ -65,3 +67,4 @@ variations: env: CM_ML_MODEL_STARTING_WEIGHTS_FILENAME: https://github.com/mlcommons/inference/tree/master/graph/R-GAT#download-model-using-rclone CM_DOWNLOAD_URL: mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt + CM_DOWNLOAD_FILENAME: RGAT diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index df810a5ab..d920a8711 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -20,9 +20,10 @@ def postprocess(i): env = i['env'] if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': - env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = os.path.join( - env['CM_ML_MODEL_PATH'], "RGAT.pt") - elif env.get('CM_ML_MODEL_PATH', '') == '': + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env.get( + 'RGAT_CHECKPOINT_PATH', os.path.join(env['RGAT_DIR_PATH'], "RGAT.pt")) + + if env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] env['RGAT_CHECKPOINT_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] diff --git a/script/install-python-src/_cm.yaml b/script/install-python-src/_cm.yaml index c0a618346..5aeed2a6b 100644 --- a/script/install-python-src/_cm.yaml +++ b/script/install-python-src/_cm.yaml @@ -16,6 +16,9 @@ deps: - tags: detect,cpu - tags: get,generic-sys-util,_libffi-dev - tags: get,generic-sys-util,_libbz2-dev + enable_if_env: + CM_HOST_OS_FLAVOR: + - ubuntu - tags: get,generic-sys-util,_libssl-dev - enable_if_env: CM_HOST_OS_FLAVOR: diff --git a/script/pull-git-repo/customize.py b/script/pull-git-repo/customize.py index 55a581bb5..3dfd21e26 100644 --- a/script/pull-git-repo/customize.py +++ b/script/pull-git-repo/customize.py @@ -7,9 +7,6 @@ def preprocess(i): os_info = i['os_info'] - if os_info['platform'] == 'windows': - return {'return': 1, 'error': 'Windows is not supported in this script yet'} - env = i['env'] meta = i['meta'] diff --git a/script/pull-git-repo/run.bat b/script/pull-git-repo/run.bat new file mode 100644 index 000000000..8642fce0e --- /dev/null +++ b/script/pull-git-repo/run.bat @@ -0,0 +1,26 @@ +@echo off +setlocal enabledelayedexpansion + +REM Save the current directory +set "CUR_DIR=%CD%" +set "SCRIPT_DIR=%CM_TMP_CURRENT_SCRIPT_PATH%" + +REM Change to the specified path +set "path=%CM_GIT_CHECKOUT_PATH%" +echo cd %path% + +cd /d "%path%" +if errorlevel 1 ( + echo Failed to change directory to %path% + exit /b %errorlevel% +) + +REM Execute the Git pull command +echo %CM_GIT_PULL_CMD% +call %CM_GIT_PULL_CMD% +REM Don't fail if there are local changes +REM if errorlevel 1 exit /b %errorlevel% + +REM Return to the original directory +cd /d "%CUR_DIR%" +endlocal From 2b1e23c9adc298d5bb6eeacdc567bda04ed14155 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 10:01:53 +0000 Subject: [PATCH 36/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 2d3c8a628..8db05eabf 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -a09686d660338aaecb77b78881a7e4b25ff5a5ca +d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 From 3439a72cdc9aaec2bdbaf81683eb4eeb63a0cdee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 10:15:53 +0000 Subject: [PATCH 37/80] Create reset-fork.yml --- .github/workflows/reset-fork.yml | 42 ++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .github/workflows/reset-fork.yml diff --git a/.github/workflows/reset-fork.yml b/.github/workflows/reset-fork.yml new file mode 100644 index 000000000..396c56f48 --- /dev/null +++ b/.github/workflows/reset-fork.yml @@ -0,0 +1,42 @@ +name: Reset Current Branch to Upstream After Squash Merge + +on: + workflow_dispatch: + inputs: + branch: + description: 'Branch to reset (leave blank for current branch)' + required: false + default: '' + +jobs: + reset-branch: + runs-on: ubuntu-latest + + steps: + - name: Checkout Repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Detect Current Branch + if: ${{ inputs.branch == '' }} + run: echo "branch=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_ENV + + - name: Use Input Branch + if: ${{ inputs.branch != '' }} + run: echo "branch=${{ inputs.branch }}" >> $GITHUB_ENV + + - name: Add Upstream Remote + run: | + git remote add upstream https://github.com/mlcommons/mlperf-automations.git + git fetch upstream + - name: Reset Branch to Upstream + run: | + git checkout ${{ env.branch }} + git reset --hard upstream/${{ env.branch }} + if: success() + + - name: Force Push to Origin + run: | + git push origin ${{ env.branch }} --force-with-lease + if: success() From 5ddfc95490ded9d680e2d70fa2617a34b4f68e3c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:15:54 +0000 Subject: [PATCH 38/80] Update pyproject.toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e55cac00a..18cdea502 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,2 +1,2 @@ [build-system] -requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@084d4d6171a6e7ae9582a63777fbd19afa19947a#egg=cmind&subdirectory=cm"] +requires = ["setuptools>=60", "wheel", "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#egg=cmind&subdirectory=cm"] From f5eb712934119192e9b562cf6d06987c7ec8106a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:16:54 +0000 Subject: [PATCH 39/80] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 6769f67e2..c4c2d2b11 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.15 +0.6.16 From 17833dfe8f1c751ca25fa2d97e55bfc1b8c12366 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 12:17:06 +0000 Subject: [PATCH 40/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 8db05eabf..39ee5566a 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -d2db3b4da03b8d3ea413c9e1a1bd3a3963a6f911 +f5eb712934119192e9b562cf6d06987c7ec8106a From cfd76e1ca790263392fe4ee524bf987c880b685c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 12:48:12 +0000 Subject: [PATCH 41/80] Fix scc24 github action (#61) * Update test-scc24-sdxl.yaml * Fix scc24 github action --- .github/workflows/test-scc24-sdxl.yaml | 16 +++++++++------- script/app-mlperf-inference/_cm.yaml | 1 - 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 6620c4913..1dd73836d 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,14 +2,15 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "20 01 * * *" + - cron: "40 12 * * *" jobs: build_reference: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr ] env: - CM_DOCKER_REPO: mlperf-automations + CM_DOCKER_REPO: mlcommons@mlperf-automations + CM_DOCKER_REPO_BRANCH: dev strategy: fail-fast: false matrix: @@ -27,8 +28,8 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --pull_inference_changes=yes --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=reference --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions @@ -36,7 +37,8 @@ jobs: if: github.repository_owner == 'gateoverflow' runs-on: [ self-hosted, linux, x64, GO-spr] env: - CM_DOCKER_REPO: mlperf-automations + CM_DOCKER_REPO: mlcommons@mlperf-automations + CM_DOCKER_REPO_BRANCH: dev strategy: fail-fast: false matrix: @@ -54,7 +56,7 @@ jobs: pip install --upgrade cm4mlops pip install tabulate cm pull repo - cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean - cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean + cm run script --tags=run-mlperf,inference,_find-performance,_r4.1-dev,_short,_scc24-base --pull_changes=yes --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --pull_changes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --hw_name=go-spr --custom_system_nvidia=yes --clean + cm run script --tags=run-mlperf,inference,_r4.1-dev,_short,_scc24-base --model=sdxl --implementation=nvidia --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --precision=${{ matrix.precision }} --docker --docker_it=no --docker_cm_repo=$CM_DOCKER_REPO --docker_cm_repo_branch=$CM_DOCKER_REPO_BRANCH --docker_dt=yes --quiet --results_dir=$HOME/scc_gh_action_results --submission_dir=$HOME/scc_gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean cm run script --tags=generate,inference,submission --clean --run-checker --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=open --category=datacenter --run_style=test --adr.submission-checker.tags=_short-run --quiet --submitter=MLCommons --submission_dir=$HOME/scc_gh_action_submissions --results_dir=$HOME/scc_gh_action_results/test_results cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/cm4mlperf-inference --repo_branch=mlperf-inference-results-scc24 --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/scc_gh_action_submissions diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index f3ef84523..9ba86635c 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1797,7 +1797,6 @@ docker: pre_run_cmds: #- cm pull repo && cm run script --tags=get,git,repo,_repo.https://github.com/GATEOverflow/inference_results_v4.0.git --update - cm pull repo - - cm rm cache --tags=inference,src -f mounts: - "${{ CM_DATASET_IMAGENET_PATH }}:${{ CM_DATASET_IMAGENET_PATH }}" - "${{ CM_DATASET_OPENIMAGES_PATH }}:${{ CM_DATASET_OPENIMAGES_PATH }}" From d0c6c3eb97402c8f97b1c7bf70eaa0fb86902951 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:00:17 +0000 Subject: [PATCH 42/80] Fix dangling softlink issue with nvidia-mlperf-inference-bert (#64) * Remove destination for dangling symbolic links (mlperf-inference-nvidia-bert) * Fix format nvidia-mlperf-inference code --- ...vidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- .../app-mlperf-inference-nvidia/customize.py | 30 +++++++++++-------- .../get-mlperf-inference-utils/customize.py | 2 +- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 83e357613..986fc2569 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "15 02 * * *" #to be adjusted + - cron: "08 13 * * *" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 1dd73836d..7f402ce7d 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "40 12 * * *" + - cron: "56 12 * * *" jobs: build_reference: diff --git a/script/app-mlperf-inference-nvidia/customize.py b/script/app-mlperf-inference-nvidia/customize.py index 36324cd0c..f9eb06822 100644 --- a/script/app-mlperf-inference-nvidia/customize.py +++ b/script/app-mlperf-inference-nvidia/customize.py @@ -37,14 +37,14 @@ def preprocess(i): make_command = env['MLPERF_NVIDIA_RUN_COMMAND'] if make_command == "prebuild": - cmds.append(f"make prebuild NETWORK_NODE=SUT") + cmds.append(f"""make prebuild NETWORK_NODE=SUT""") if env['CM_MODEL'] == "resnet50": target_data_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'data', 'imagenet') if not os.path.exists(target_data_path): cmds.append( - f"ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}") + f"""ln -sf {env['CM_DATASET_IMAGENET_PATH']} {target_data_path}""") model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], @@ -53,11 +53,11 @@ def preprocess(i): 'resnet50_v1.onnx') if not os.path.exists(os.path.dirname(model_path)): - cmds.append(f"mkdir -p {os.path.dirname(model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(model_path)}""") if not os.path.exists(model_path): cmds.append( - f"ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}") + f"""ln -sf {env['CM_ML_MODEL_FILE_WITH_PATH']} {model_path}""") model_name = "resnet50" elif "bert" in env['CM_MODEL']: @@ -83,17 +83,17 @@ def preprocess(i): 'vocab.txt') if not os.path.exists(os.path.dirname(fp32_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp32_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp32_model_path)}""") if not os.path.exists(fp32_model_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_FP32_PATH']} {fp32_model_path}""") if not os.path.exists(int8_model_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_LARGE_INT8_PATH']} {int8_model_path}""") if not os.path.exists(vocab_path): cmds.append( - f"cp -r {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}") + f"""cp -r --remove-destination {env['CM_ML_MODEL_BERT_VOCAB_FILE_WITH_PATH']} {vocab_path}""") model_name = "bert" model_path = fp32_model_path @@ -112,9 +112,9 @@ def preprocess(i): # cmds.append("make download_data BENCHMARKS='stable-diffusion-xl'") env['CM_REQUIRE_COCO2014_DOWNLOAD'] = 'yes' cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv") + f"""cp -r \\$CM_DATASET_PATH_ROOT/captions/captions.tsv {target_data_path}/captions_5k_final.tsv""") cmds.append( - f"cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt") + f"""cp -r \\$CM_DATASET_PATH_ROOT/latents/latents.pt {target_data_path}/latents.pt""") fp16_model_path = os.path.join( env['MLPERF_SCRATCH_PATH'], 'models', @@ -124,7 +124,7 @@ def preprocess(i): 'stable_diffusion_fp16') if not os.path.exists(os.path.dirname(fp16_model_path)): - cmds.append(f"mkdir -p {os.path.dirname(fp16_model_path)}") + cmds.append(f"""mkdir -p {os.path.dirname(fp16_model_path)}""") if not os.path.exists(fp16_model_path): if os.path.islink(fp16_model_path): @@ -687,11 +687,15 @@ def preprocess(i): '') # will be ignored during build engine if "stable-diffusion" in env["CM_MODEL"]: - extra_build_engine_options_string += f" --model_path {os.path.join(env['MLPERF_SCRATCH_PATH'], 'models', 'SDXL/')}" + extra_build_engine_options_string += f""" --model_path { + os.path.join( + env['MLPERF_SCRATCH_PATH'], + 'models', + 'SDXL/')}""" run_config += " --no_audit_verify" - cmds.append(f"make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'") + cmds.append(f"""make {make_command} RUN_ARGS=' --benchmarks={model_name} --scenarios={scenario} {test_mode_string} {run_config} {extra_build_engine_options_string} {extra_run_options_string}'""") run_cmd = " && ".join(cmds) env['CM_MLPERF_RUN_CMD'] = run_cmd diff --git a/script/get-mlperf-inference-utils/customize.py b/script/get-mlperf-inference-utils/customize.py index 6f7f0a49b..179342fc7 100644 --- a/script/get-mlperf-inference-utils/customize.py +++ b/script/get-mlperf-inference-utils/customize.py @@ -15,7 +15,7 @@ def preprocess(i): quiet = (env.get('CM_QUIET', False) == 'yes') - utils_path = i['run_script_input']['path'] + utils_path = env['CM_TMP_CURRENT_SCRIPT_PATH'] env['+PYTHONPATH'] = [utils_path] From 188708bd8e944a9c05175db7fd34dee6d7c1d5fc Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:00:55 +0000 Subject: [PATCH 43/80] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index c4c2d2b11..fa209468a 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.16 +0.6.17 From 26cf83375a3f55b8c7aaf9bebb7cf5b7508614f8 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Sat, 21 Dec 2024 22:01:06 +0000 Subject: [PATCH 44/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index 39ee5566a..d4ddba84d 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -f5eb712934119192e9b562cf6d06987c7ec8106a +188708bd8e944a9c05175db7fd34dee6d7c1d5fc From 7f48c88e72c6036b555bb80d01f853bc468e8b06 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 21 Dec 2024 22:11:16 +0000 Subject: [PATCH 45/80] Support pull_inference_changes in run-mlperf-inference-app (#65) * Update test-scc24-sdxl.yaml * Update test-nvidia-mlperf-inference-implementations.yml * Added pull_inference_changes support to run-mlperf-inference-app --- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- script/run-mlperf-inference-app/_cm.yaml | 6 ++++++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 986fc2569..f25dab3d9 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 13 * * *" #to be adjusted + - cron: "08 23 * * *" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index 7f402ce7d..b637b0226 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "56 12 * * *" + - cron: "56 22 * * *" jobs: build_reference: diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 8fe9b88d1..202da39d4 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -137,6 +137,12 @@ deps: - names: - inference-src tags: get,mlcommons,inference,src +- tags: pull,git,repo + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] From b051bb1858fc61de02fc68765fc11155fe457b2a Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 03:45:09 +0530 Subject: [PATCH 46/80] Added pull_inference_changes support to run-mlperf-inference-app --- script/run-mlperf-inference-app/_cm.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index 202da39d4..a2e3688e7 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -138,11 +138,11 @@ deps: - inference-src tags: get,mlcommons,inference,src - tags: pull,git,repo - env: - CM_GIT_CHECKOUT_PATH: '<<>>' - enable_if_env: - CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: - - 'yes' + env: + CM_GIT_CHECKOUT_PATH: '<<>>' + enable_if_env: + CM_MLPERF_INFERENCE_PULL_SRC_CHANGES: + - 'yes' - tags: get,sut,description skip_if_env: CM_MLPERF_USE_DOCKER: [ on ] From 7bc5f0d8059435374d8e789879754640707ae557 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 22:42:29 +0000 Subject: [PATCH 47/80] Fix github action failures (#68) * Update test-nvidia-mlperf-inference-implementations.yml * Update test-mlperf-inference-mixtral.yml * Fix submission generation github action * Set predeps:False for mlperf-inference-submission-generation * Added version support for submission generation --- .../test-cm-based-submission-generation.yml | 15 +++++++++++---- .../workflows/test-mlperf-inference-mixtral.yml | 1 + ...st-nvidia-mlperf-inference-implementations.yml | 3 ++- automation/script/module.py | 2 +- .../generate-mlperf-inference-submission/_cm.yaml | 2 ++ 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index 22f8914d6..b0e91421a 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -80,19 +80,26 @@ jobs: fi # Dynamically set the log group to simulate a dynamic step name echo "::group::$description" - cm ${{ matrix.action }} script --tags=generate,inference,submission --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args + cm ${{ matrix.action }} script --tags=generate,inference,submission --version=v4.1 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? + exit $? || echo "STEP_FAILED=true" >> $GITHUB_ENV echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then # For cases 5 and 6, exit status should be 0 if cm command fails, 1 if it succeeds if [[ ${exit_status} -ne 0 ]]; then - exit 0 + echo "STEP_FAILED=false" >> $GITHUB_ENV else - exit ${exit_status} + echo "STEP_FAILED=true" >> $GITHUB_ENV fi else # For other cases, exit with the original status - test ${exit_status} -eq 0 || exit ${exit_status} + test ${exit_status} -eq 0 || echo "STEP_FAILED=true" >> $GITHUB_ENV fi echo "::endgroup::" + - name: Fail if Step Failed + if: env.STEP_FAILED == 'true' + continue-on-error: false + run: | + echo "Manually failing the workflow because the step failed." + exit 1 diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 597a4cdbc..a12f58c5f 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -10,6 +10,7 @@ on: jobs: build_reference: if: github.repository_owner == 'gateoverflow' + timeout-minutes: 1440 runs-on: [ self-hosted, phoenix, linux, x64 ] strategy: fail-fast: false diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index f25dab3d9..fec9fd549 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,11 +2,12 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 23 * * *" #to be adjusted + - cron: "08 01 * * *" #to be adjusted jobs: run_nvidia: if: github.repository_owner == 'gateoverflow' + timeout-minutes: 1440 runs-on: - self-hosted - linux diff --git a/automation/script/module.py b/automation/script/module.py index fe0582648..eda2920c7 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -1635,7 +1635,7 @@ def _run(self, i): 'self': self } - # Check if pre-process and detect + # Check and run predeps in customize.py if str(meta.get('predeps', 'True')).lower() not in ["0", "false", "no"] and os.path.isfile( path_to_customize_py): # possible duplicate execution - needs fix r = utils.load_python_module( diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml index 064cbc307..5e43dd787 100644 --- a/script/generate-mlperf-inference-submission/_cm.yaml +++ b/script/generate-mlperf-inference-submission/_cm.yaml @@ -7,6 +7,7 @@ default_env: CM_MLPERF_RUN_STYLE: valid CM_MLPERF_SUBMISSION_DIR_SHARED: 'yes' CM_RUN_MLPERF_ACCURACY: 'on' +predeps: False deps: - names: - python @@ -84,6 +85,7 @@ input_mapping: sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA tar: CM_TAR_SUBMISSION_DIR get_platform_details: CM_GET_PLATFORM_DETAILS + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION post_deps: - enable_if_env: CM_RUN_MLPERF_ACCURACY: From 225220c7d9bb7e66e5b9a1e1ebfc3e0180fbd094 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 22 Dec 2024 22:52:05 +0000 Subject: [PATCH 48/80] Update test-cm4mlops-wheel-ubuntu.yml --- .github/workflows/test-cm4mlops-wheel-ubuntu.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/test-cm4mlops-wheel-ubuntu.yml b/.github/workflows/test-cm4mlops-wheel-ubuntu.yml index 4a37a5290..6ec9905d6 100644 --- a/.github/workflows/test-cm4mlops-wheel-ubuntu.yml +++ b/.github/workflows/test-cm4mlops-wheel-ubuntu.yml @@ -5,7 +5,6 @@ on: branches: - main - dev - - mlperf-inference paths: - '.github/workflows/test-cm4mlops-wheel-ubuntu.yml' - 'setup.py' @@ -16,7 +15,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, ubuntu-20.04] - python-version: ['3.7', '3.8', '3.11', '3.12'] + python-version: ['3.8', '3.11', '3.12'] exclude: - os: ubuntu-latest python-version: "3.8" From bb79019139303a8b103c741526cc7afa59bd8370 Mon Sep 17 00:00:00 2001 From: sahilavaran <139779393+sahilavaran@users.noreply.github.com> Date: Mon, 23 Dec 2024 12:47:00 +0000 Subject: [PATCH 49/80] support --outdirname for ml models, partially fixed #63 (#71) --- script/get-ml-model-3d-unet-kits19/_cm.yaml | 8 ++++++++ script/get-ml-model-bert-large-squad/_cm.yaml | 4 ++++ script/get-ml-model-dlrm-terabyte/_cm.yaml | 2 ++ script/get-ml-model-gptj/_cm.yaml | 2 ++ script/get-ml-model-huggingface-zoo/_cm.yaml | 2 ++ script/get-ml-model-llama2/_cm.yaml | 2 ++ script/get-ml-model-mixtral/_cm.yaml | 2 ++ script/get-ml-model-retinanet-nvidia/_cm.yaml | 2 ++ script/get-ml-model-retinanet/_cm.yaml | 4 ++++ script/get-ml-model-stable-diffusion/_cm.yaml | 2 ++ script/get-ml-model-tiny-resnet/_cm.yaml | 2 ++ 11 files changed, 32 insertions(+) diff --git a/script/get-ml-model-3d-unet-kits19/_cm.yaml b/script/get-ml-model-3d-unet-kits19/_cm.yaml index 7dc7f5b06..658f306a7 100644 --- a/script/get-ml-model-3d-unet-kits19/_cm.yaml +++ b/script/get-ml-model-3d-unet-kits19/_cm.yaml @@ -43,6 +43,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128_dynbatch.onnx @@ -60,6 +62,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download,file,download-file,_wget + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch.ptc @@ -73,6 +77,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_pytorch_checkpoint.pth @@ -92,6 +98,8 @@ variations: extra_cache_tags: 3d-unet,medical-imaging force-cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME env: CM_ML_MODEL_ACCURACY: '0.86170' CM_ML_MODEL_FILE: 3dunet_kits19_128x128x128.tf diff --git a/script/get-ml-model-bert-large-squad/_cm.yaml b/script/get-ml-model-bert-large-squad/_cm.yaml index e5b4d11bb..e81819279 100644 --- a/script/get-ml-model-bert-large-squad/_cm.yaml +++ b/script/get-ml-model-bert-large-squad/_cm.yaml @@ -13,6 +13,8 @@ new_env_keys: - CM_ML_MODEL* post_deps: - tags: get,dataset-aux,squad-vocab + force_env_keys: + - CM_OUTDIRNAME prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH @@ -25,6 +27,8 @@ prehook_deps: CM_ML_MODEL_BERT_PACKED: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-ml-model-dlrm-terabyte/_cm.yaml b/script/get-ml-model-dlrm-terabyte/_cm.yaml index 6227a9a17..4d5c93f1b 100644 --- a/script/get-ml-model-dlrm-terabyte/_cm.yaml +++ b/script/get-ml-model-dlrm-terabyte/_cm.yaml @@ -25,6 +25,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-ml-model-gptj/_cm.yaml b/script/get-ml-model-gptj/_cm.yaml index 25e8deca4..7d645976a 100644 --- a/script/get-ml-model-gptj/_cm.yaml +++ b/script/get-ml-model-gptj/_cm.yaml @@ -29,6 +29,8 @@ prehook_deps: names: - dae tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_DOWNLOAD_URL diff --git a/script/get-ml-model-huggingface-zoo/_cm.yaml b/script/get-ml-model-huggingface-zoo/_cm.yaml index d10c3f448..154675bc1 100644 --- a/script/get-ml-model-huggingface-zoo/_cm.yaml +++ b/script/get-ml-model-huggingface-zoo/_cm.yaml @@ -38,6 +38,8 @@ variations: - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ML_MODEL_PATH tags: get,git,repo,_lfs + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _repo.https://huggingface.co/: - CM_MODEL_ZOO_STUB diff --git a/script/get-ml-model-llama2/_cm.yaml b/script/get-ml-model-llama2/_cm.yaml index 2ff45866b..fe082718e 100644 --- a/script/get-ml-model-llama2/_cm.yaml +++ b/script/get-ml-model-llama2/_cm.yaml @@ -26,6 +26,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: LLAMA2_CHECKPOINT_PATH: LLAMA2 checkpoint path tags: diff --git a/script/get-ml-model-mixtral/_cm.yaml b/script/get-ml-model-mixtral/_cm.yaml index 2542d4dc7..5b61ef9ca 100644 --- a/script/get-ml-model-mixtral/_cm.yaml +++ b/script/get-ml-model-mixtral/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo + force_env_keys: + - CM_OUTDIRNAME print_env_at_the_end: MIXTRAL_CHECKPOINT_PATH: MIXTRAL checkpoint path tags: diff --git a/script/get-ml-model-retinanet-nvidia/_cm.yaml b/script/get-ml-model-retinanet-nvidia/_cm.yaml index 4e114e43d..7f6880322 100644 --- a/script/get-ml-model-retinanet-nvidia/_cm.yaml +++ b/script/get-ml-model-retinanet-nvidia/_cm.yaml @@ -14,6 +14,8 @@ deps: - tags: get,mlperf,training,src,_nvidia-retinanet - tags: get,mlperf,inference,src - tags: get,ml-model,retinanet,_pytorch,_fp32,_weights + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_TORCH_DEVICE: cpu tags: get,generic-python-lib,_torch diff --git a/script/get-ml-model-retinanet/_cm.yaml b/script/get-ml-model-retinanet/_cm.yaml index 90e937000..8da05da0e 100644 --- a/script/get-ml-model-retinanet/_cm.yaml +++ b/script/get-ml-model-retinanet/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: CM_TMP_ML_MODEL_RETINANET_NO_NMS: - 'yes' tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL @@ -80,6 +82,8 @@ variations: extra_cache_tags: retinanet,training,patch,file force_cache: true tags: download,file,_url.https://raw.githubusercontent.com/arjunsuresh/ck-qaic/main/package/model-onnx-mlperf-retinanet-no-nms/remove-nms-and-extract-priors.patch + force_env_keys: + - CM_OUTDIRNAME - env: CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_MLPERF_TRAINING_REPO_PATCHED_PATH CM_GIT_PATCH_FILEPATHS: <<>> diff --git a/script/get-ml-model-stable-diffusion/_cm.yaml b/script/get-ml-model-stable-diffusion/_cm.yaml index b2326daff..ae9ee2757 100644 --- a/script/get-ml-model-stable-diffusion/_cm.yaml +++ b/script/get-ml-model-stable-diffusion/_cm.yaml @@ -28,6 +28,8 @@ prehook_deps: names: - hf-zoo tags: get,ml-model,huggingface,zoo,_clone-repo,_model-stub.stabilityai/stable-diffusion-xl-base-1.0 + force_env_keys: + - CM_OUTDIRNAME - enable_if_env: CM_DOWNLOAD_TOOL: - rclone diff --git a/script/get-ml-model-tiny-resnet/_cm.yaml b/script/get-ml-model-tiny-resnet/_cm.yaml index 4f8406e29..791ecccee 100644 --- a/script/get-ml-model-tiny-resnet/_cm.yaml +++ b/script/get-ml-model-tiny-resnet/_cm.yaml @@ -22,6 +22,8 @@ prehook_deps: env: CM_EXTRACT_EXTRACTED_FILENAME: <<>> tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL From a9e8329cf5f036aea3c491f0a375cce2d89b5cd1 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 13:05:24 +0000 Subject: [PATCH 50/80] Update test-cm-based-submission-generation.yml (#73) --- .github/workflows/test-cm-based-submission-generation.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-cm-based-submission-generation.yml b/.github/workflows/test-cm-based-submission-generation.yml index b0e91421a..65262dea6 100644 --- a/.github/workflows/test-cm-based-submission-generation.yml +++ b/.github/workflows/test-cm-based-submission-generation.yml @@ -82,7 +82,6 @@ jobs: echo "::group::$description" cm ${{ matrix.action }} script --tags=generate,inference,submission --version=v4.1 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args exit_status=$? - exit $? || echo "STEP_FAILED=true" >> $GITHUB_ENV echo "Exit status for the job ${description} ${exit_status}" if [[ "${{ matrix.case }}" == "case-5" || "${{ matrix.case }}" == "case-6" ]]; then # For cases 5 and 6, exit status should be 0 if cm command fails, 1 if it succeeds @@ -93,7 +92,11 @@ jobs: fi else # For other cases, exit with the original status - test ${exit_status} -eq 0 || echo "STEP_FAILED=true" >> $GITHUB_ENV + if [[ ${exit_status} -eq 0 ]]; then + echo "STEP_FAILED=false" >> $GITHUB_ENV + else + echo "STEP_FAILED=true" >> $GITHUB_ENV + fi fi echo "::endgroup::" - name: Fail if Step Failed From 7dcef66c48436c29b6faae8f6b00ee4f81265617 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 14:03:09 +0000 Subject: [PATCH 51/80] Fix exit code for docker run failures (#74) * Update test-cm-based-submission-generation.yml * Force exit code = 1, for docker failures with exit code=512 --- script/run-docker-container/customize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 9703080d0..869386f91 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -314,6 +314,8 @@ def postprocess(i): print('') docker_out = os.system(CMD) if docker_out != 0: + if docker_out % 256 == 0: + docker_out = 1 return {'return': docker_out, 'error': 'docker run failed'} return {'return': 0} From d28df7ea880337a024f9c3854d3fdd14224b8e8e Mon Sep 17 00:00:00 2001 From: sahilavaran <139779393+sahilavaran@users.noreply.github.com> Date: Mon, 23 Dec 2024 15:07:11 +0000 Subject: [PATCH 52/80] Support --outdirname for datasets fixes #63 (#75) * support --outdirname for ml models, partially fixed #63 * support --outdirname for ml datasets fixes #63 --- script/get-dataset-imagenet-aux/_cm.yaml | 2 ++ script/get-dataset-imagenet-calibration/_cm.yaml | 2 ++ script/get-dataset-imagenet-val/_cm.yaml | 2 ++ script/get-dataset-mlperf-inference-mixtral/_cm.yaml | 4 +++- script/get-dataset-openimages-annotations/_cm.yaml | 2 ++ script/get-dataset-openimages-calibration/_cm.yaml | 2 ++ script/get-dataset-openorca/_cm.yaml | 2 ++ script/get-dataset-squad-vocab/_cm.yaml | 2 ++ script/get-dataset-squad/_cm.yaml | 2 ++ 9 files changed, 19 insertions(+), 1 deletion(-) diff --git a/script/get-dataset-imagenet-aux/_cm.yaml b/script/get-dataset-imagenet-aux/_cm.yaml index 242b53abc..c5944aedf 100644 --- a/script/get-dataset-imagenet-aux/_cm.yaml +++ b/script/get-dataset-imagenet-aux/_cm.yaml @@ -14,6 +14,8 @@ prehook_deps: extra_cache_tags: imagenet-aux,dataset-aux force_cache: true tags: download-and-extract,_extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-imagenet-calibration/_cm.yaml b/script/get-dataset-imagenet-calibration/_cm.yaml index 741d7e205..7e499146a 100644 --- a/script/get-dataset-imagenet-calibration/_cm.yaml +++ b/script/get-dataset-imagenet-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: imagenet-calibration,imagenet,calibration names: diff --git a/script/get-dataset-imagenet-val/_cm.yaml b/script/get-dataset-imagenet-val/_cm.yaml index 0b9923927..0a23afac1 100644 --- a/script/get-dataset-imagenet-val/_cm.yaml +++ b/script/get-dataset-imagenet-val/_cm.yaml @@ -35,6 +35,8 @@ prehook_deps: env: CM_EXTRACT_TO_FOLDER: imagenet-2012-val tags: download-and-extract,file,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env: - CM_DAE_EXTRA_TAGS update_tags_from_env_with_prefix: diff --git a/script/get-dataset-mlperf-inference-mixtral/_cm.yaml b/script/get-dataset-mlperf-inference-mixtral/_cm.yaml index f8684eef5..566f7bb05 100644 --- a/script/get-dataset-mlperf-inference-mixtral/_cm.yaml +++ b/script/get-dataset-mlperf-inference-mixtral/_cm.yaml @@ -8,9 +8,11 @@ new_env_keys: prehook_deps: - env: CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_PREPROCESSED_PATH - extra_cache_tags: mixtral,get-mixtral-dataset + extra_cache_tags: mixtral,get-mixtral-dataset force_cache: true tags: download-and-extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-openimages-annotations/_cm.yaml b/script/get-dataset-openimages-annotations/_cm.yaml index a96e7f58e..16158cef6 100644 --- a/script/get-dataset-openimages-annotations/_cm.yaml +++ b/script/get-dataset-openimages-annotations/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: retinanet,get,dataset-openimages-annotations force_cache: true tags: download-and-extract,_wget,_extract + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-openimages-calibration/_cm.yaml b/script/get-dataset-openimages-calibration/_cm.yaml index b8bd73e12..6edd3716c 100644 --- a/script/get-dataset-openimages-calibration/_cm.yaml +++ b/script/get-dataset-openimages-calibration/_cm.yaml @@ -10,6 +10,8 @@ category: "AI/ML datasets" deps: - tags: download,file + force_env_keys: + - CM_OUTDIRNAME force_cache: true extra_cache_tags: openimages-calibration,openimages,calibration names: diff --git a/script/get-dataset-openorca/_cm.yaml b/script/get-dataset-openorca/_cm.yaml index 861c49575..c860b0213 100644 --- a/script/get-dataset-openorca/_cm.yaml +++ b/script/get-dataset-openorca/_cm.yaml @@ -15,6 +15,8 @@ deps: names: - openorca-src tags: get,git,repo,_lfs,_repo.https://huggingface.co/datasets/Open-Orca/OpenOrca + force_env_keys: + - CM_OUTDIRNAME env: CM_DATASET: OPENORCA new_env_keys: diff --git a/script/get-dataset-squad-vocab/_cm.yaml b/script/get-dataset-squad-vocab/_cm.yaml index a6ec2e902..aa1bad21c 100644 --- a/script/get-dataset-squad-vocab/_cm.yaml +++ b/script/get-dataset-squad-vocab/_cm.yaml @@ -12,6 +12,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad-vocab force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL diff --git a/script/get-dataset-squad/_cm.yaml b/script/get-dataset-squad/_cm.yaml index a9dd6ed94..d47fc9ce3 100644 --- a/script/get-dataset-squad/_cm.yaml +++ b/script/get-dataset-squad/_cm.yaml @@ -16,6 +16,8 @@ prehook_deps: extra_cache_tags: bert,get,dataset-squad force_cache: true tags: download-and-extract,_wget + force_env_keys: + - CM_OUTDIRNAME update_tags_from_env_with_prefix: _url.: - CM_PACKAGE_URL From cf575d03d1856bb36cb31934416b30fd1afed3ed Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 23 Dec 2024 22:05:15 +0000 Subject: [PATCH 53/80] Support version in preprocess-submission, cleanups for coco2014 script (#76) * Dont use 'install' sub directory for coco2014 * Update test-scc24-sdxl.yaml * Update test-mlperf-inference-mixtral.yml * Support version in preprocess-mlperf-inference-submission --- .github/workflows/test-mlperf-inference-mixtral.yml | 2 +- .../test-nvidia-mlperf-inference-implementations.yml | 2 +- .github/workflows/test-scc24-sdxl.yaml | 2 +- script/get-dataset-coco2014/customize.py | 8 ++++---- script/get-dataset-coco2014/run.sh | 3 +-- script/preprocess-mlperf-inference-submission/_cm.yaml | 1 + .../preprocess-mlperf-inference-submission/customize.py | 5 ++++- 7 files changed, 13 insertions(+), 10 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index a12f58c5f..04a944fd4 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -5,7 +5,7 @@ name: MLPerf inference MIXTRAL-8x7B on: schedule: - - cron: "08 23 * * *" # 30th minute and 20th hour => 20:30 UTC => 2 AM IST + - cron: "59 19 * * *" # 30th minute and 20th hour => 20:30 UTC => 2 AM IST jobs: build_reference: diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index fec9fd549..2bbccae6b 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 01 * * *" #to be adjusted + - cron: "08 01 * * */3" #to be adjusted jobs: run_nvidia: diff --git a/.github/workflows/test-scc24-sdxl.yaml b/.github/workflows/test-scc24-sdxl.yaml index b637b0226..65e2e7510 100644 --- a/.github/workflows/test-scc24-sdxl.yaml +++ b/.github/workflows/test-scc24-sdxl.yaml @@ -2,7 +2,7 @@ name: MLPerf inference SDXL (SCC) on: schedule: - - cron: "56 22 * * *" + - cron: "34 19 * * *" jobs: build_reference: diff --git a/script/get-dataset-coco2014/customize.py b/script/get-dataset-coco2014/customize.py index e231126a5..b03e75049 100644 --- a/script/get-dataset-coco2014/customize.py +++ b/script/get-dataset-coco2014/customize.py @@ -27,14 +27,14 @@ def postprocess(i): os.getcwd(), 'install', 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": - env['CM_DATASET_PATH_ROOT'] = os.path.join(os.getcwd(), 'install') + env['CM_DATASET_PATH_ROOT'] = os.getcwd() # env['CM_DATASET_PATH'] = os.path.join(os.getcwd(), 'install', 'validation', 'data') env['CM_DATASET_CAPTIONS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'captions') + os.getcwd(), 'captions') env['CM_DATASET_LATENTS_DIR_PATH'] = os.path.join( - os.getcwd(), 'install', 'latents') + os.getcwd(), 'latents') else: env['CM_CALIBRATION_DATASET_PATH'] = os.path.join( - os.getcwd(), 'install', 'calibration', 'data') + os.getcwd(), 'calibration', 'data') return {'return': 0} diff --git a/script/get-dataset-coco2014/run.sh b/script/get-dataset-coco2014/run.sh index 61b9ffe52..3685b161c 100644 --- a/script/get-dataset-coco2014/run.sh +++ b/script/get-dataset-coco2014/run.sh @@ -5,8 +5,7 @@ python3() { export -f python3 CUR=${PWD} -mkdir -p install -INSTALL_DIR=${CUR}/install +INSTALL_DIR=${CUR} cd ${CM_RUN_DIR} diff --git a/script/preprocess-mlperf-inference-submission/_cm.yaml b/script/preprocess-mlperf-inference-submission/_cm.yaml index 40ff0c669..eb5f959b4 100644 --- a/script/preprocess-mlperf-inference-submission/_cm.yaml +++ b/script/preprocess-mlperf-inference-submission/_cm.yaml @@ -22,6 +22,7 @@ deps: input_mapping: input: CM_MLPERF_INFERENCE_SUBMISSION_DIR submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + version: CM_MLPERF_SUBMISSION_CHECKER_VERSION submitter: CM_MLPERF_SUBMITTER tags: - run diff --git a/script/preprocess-mlperf-inference-submission/customize.py b/script/preprocess-mlperf-inference-submission/customize.py index fc91b08a8..f4ea74c8d 100644 --- a/script/preprocess-mlperf-inference-submission/customize.py +++ b/script/preprocess-mlperf-inference-submission/customize.py @@ -29,8 +29,11 @@ def preprocess(i): print(f"Cleaning {submission_processed}") shutil.rmtree(submission_processed) + version = env.get('CM_MLPERF_SUBMISSION_CHECKER_VERSION', '') + x_version = ' --version ' + version + ' ' if version != '' else '' + CMD = env['CM_PYTHON_BIN'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "tools", "submission", - "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + "preprocess_submission.py") + "' --input '" + submission_dir + "' --submitter '" + submitter + "' --output '" + submission_processed + "'" + x_version env['CM_RUN_CMD'] = CMD return {'return': 0} From 1fc32abb06bbad6dc5dc6593f50495d353f2496b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 14:56:55 +0530 Subject: [PATCH 54/80] Fixed stable-diffusion-xl name in SUT configs --- .../pytorch-framework/default-config.yaml | 38 ------------------- .../tensorrt-framework/default-config.yaml | 30 --------------- .../default-config.yaml | 5 +-- .../tensorrt-framework/default-config.yaml | 38 ------------------- .../default-config.yaml | 2 +- 5 files changed, 3 insertions(+), 110 deletions(-) delete mode 100644 script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml delete mode 100644 script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml delete mode 100644 script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml diff --git a/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml deleted file mode 100644 index 040ccbdcc..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/IntelSPR.24c/intel-implementation/cpu-device/pytorch-framework/default-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- - resnet50: - Offline: - target_qps: 1000.0 - Server: - target_qps: 500.0 - retinanet: - Offline: - target_qps: 50.0 - Server: - target_qps: 30 - bert-99: - Offline: - target_qps: 100 - bert-99.9: - Offline: - target_qps: 100 - 3d-unet-99: - Offline: - target_qps: 1.0 - 3d-unet-99.9: - Offline: - target_qps: 1.0 - gptj-99.9: - Offline: - target_qps: 0.5 - Server: - target_qps: 0.3 - gptj-99: - Offline: - target_qps: 0.5 - Server: - target_qps: 0.3 - sdxl: - Offline: - target_qps: 0.1 - Server: - target_qps: 0.1 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml deleted file mode 100644 index f7a8477a8..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml +++ /dev/null @@ -1,30 +0,0 @@ -3d-unet-99: - Offline: - target_qps: 4.0 - SingleStream: - target_latency: 400 -3d-unet-99.9: - Offline: - target_qps: 4.0 - SingleStream: - target_latency: 400 -bert-99: - Offline: - target_qps: 4000.0 - Server: - target_qps: 3800.0 -bert-99.9: - Offline: - target_qps: 2000.0 - Server: - target_qps: 1600.0 -resnet50: - Offline: - target_qps: '42959.4' - Server: - target_qps: 35000.0 -retinanet: - Offline: - target_qps: 850.0 - Server: - target_qps: 630.0 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index a9ad05a50..d764ab24d 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x1/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -28,9 +28,8 @@ retinanet: target_qps: 850.0 Server: target_qps: 630.0 -sdxl: +stable-diffusion-xl: Offline: target_qps: 0.7 Server: - target_qps: 0.3 - + target_qps: 0.3 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml deleted file mode 100644 index 7b24138ac..000000000 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/default-config.yaml +++ /dev/null @@ -1,38 +0,0 @@ ---- - resnet50: - Offline: - target_qps: 43000.0 - Server: - target_qps: 37000.0 - retinanet: - Offline: - target_qps: 650.0 - Server: - target_qps: 600 - bert-99: - Offline: - target_qps: 4000 - bert-99.9: - Offline: - target_qps: 4000 - 3d-unet-99: - Offline: - target_qps: 2.0 - 3d-unet-99.9: - Offline: - target_qps: 2.0 - gptj-99.9: - Offline: - target_qps: 4 - Server: - target_qps: 3.5 - gptj-99: - Offline: - target_qps: 4 - Server: - target_qps: 3.5 - sdxl: - Offline: - target_qps: 2 - Server: - target_qps: 1 diff --git a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml index 4820e8b52..294b2eda7 100644 --- a/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml +++ b/script/get-mlperf-inference-sut-configs/configs/RTX4090x2/nvidia_original-implementation/gpu-device/tensorrt-framework/framework-version-default/default-config.yaml @@ -35,7 +35,7 @@ target_qps: 8 Server: target_qps: 7 - sdxl: + stable-diffusion-xl: Offline: target_qps: 1.3 Server: From 79fb471a5e685a4a7cfb1835a249ddf51a9bc005 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 13:19:04 +0000 Subject: [PATCH 55/80] Fix tensorrt detect on aarch64 --- script/get-tensorrt/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-tensorrt/customize.py b/script/get-tensorrt/customize.py index addc7322c..263c645e8 100644 --- a/script/get-tensorrt/customize.py +++ b/script/get-tensorrt/customize.py @@ -13,7 +13,7 @@ def preprocess(i): # Not enforcing dev requirement for now if env.get('CM_TENSORRT_TAR_FILE_PATH', '') == '' and env.get( - 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR', '') != 'aarch64': + 'CM_TENSORRT_REQUIRE_DEV1', '') != 'yes' and env.get('CM_HOST_PLATFORM_FLAVOR_', '') != 'aarch64': if os_info['platform'] == 'windows': extra_pre = '' From 51896966a62a0e11a3b5e7421d4a90929364e2f3 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 20:38:22 +0530 Subject: [PATCH 56/80] Added torch deps for get-ml-model-gptj-nvidia --- script/get-ml-model-gptj/_cm.yaml | 4 ++++ script/get-ml-model-gptj/run-nvidia.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/script/get-ml-model-gptj/_cm.yaml b/script/get-ml-model-gptj/_cm.yaml index 7d645976a..25b2ef981 100644 --- a/script/get-ml-model-gptj/_cm.yaml +++ b/script/get-ml-model-gptj/_cm.yaml @@ -161,6 +161,10 @@ variations: - python3 tags: get,python3 - tags: get,generic-python-lib,_package.safetensors + - tags: get,generic-python-lib,_torch + names: + - torch + - pytorch rclone: add_deps_recursive: dae: diff --git a/script/get-ml-model-gptj/run-nvidia.sh b/script/get-ml-model-gptj/run-nvidia.sh index 27e5a675c..b16ee45da 100644 --- a/script/get-ml-model-gptj/run-nvidia.sh +++ b/script/get-ml-model-gptj/run-nvidia.sh @@ -17,5 +17,5 @@ export DOCKER_RUN_ARGS=" -v ${CM_NVIDIA_MLPERF_SCRATCH_PATH}:/mnt" make -C docker run LOCAL_USER=1 test $? -eq 0 || exit $? -${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 +PYTHONPATH='' ${CM_PYTHON_BIN_WITH_PATH} ${CM_MLPERF_INFERENCE_NVIDIA_CODE_PATH}/code/gptj/tensorrt/onnx_tune.py --fp8-scalers-path=${CM_NVIDIA_MLPERF_SCRATCH_PATH}/models/GPTJ-6B/fp8-quantized-ammo/GPTJ-FP8-quantized/rank0.safetensors --scaler 1.005 --index 15 test $? -eq 0 || exit $? From 76796b4c3966b04011c3cb6118412516c90ba50b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 24 Dec 2024 15:15:53 +0000 Subject: [PATCH 57/80] Update VERSION --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index fa209468a..724e8d94e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.17 +0.6.18 From a90475d2de72bf0622cebe8d5ca8eb8c9d872fbd Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Tue, 24 Dec 2024 15:16:10 +0000 Subject: [PATCH 58/80] Updated git_commit_hash.txt --- git_commit_hash.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git_commit_hash.txt b/git_commit_hash.txt index d4ddba84d..b21cfb83e 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -188708bd8e944a9c05175db7fd34dee6d7c1d5fc +76796b4c3966b04011c3cb6118412516c90ba50b From 3551660b68ffcff303ae7539ae9a62d34b19bc7e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 25 Dec 2024 12:15:26 +0000 Subject: [PATCH 59/80] Fix coco2014 sample ids path --- script/get-dataset-coco2014/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/get-dataset-coco2014/customize.py b/script/get-dataset-coco2014/customize.py index b03e75049..3f80e76f0 100644 --- a/script/get-dataset-coco2014/customize.py +++ b/script/get-dataset-coco2014/customize.py @@ -24,7 +24,7 @@ def postprocess(i): env = i['env'] if env.get('CM_GENERATE_SAMPLE_ID', '') == "yes": env['CM_COCO2014_SAMPLE_ID_PATH'] = os.path.join( - os.getcwd(), 'install', 'sample_ids.txt') + os.getcwd(), 'sample_ids.txt') print(env['CM_COCO2014_SAMPLE_ID_PATH']) if env.get('CM_DATASET_CALIBRATION', '') == "no": env['CM_DATASET_PATH_ROOT'] = os.getcwd() From c4653788262678cd87478aaa8d5ae5a12f01978e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 12:06:26 +0000 Subject: [PATCH 60/80] Fixes for podman support (#79) * Fix coco2014 sample ids path * Fixes for podman, #33 * [Automated Commit] Format Codebase * Fixed rgat checkpoint path * Fixed rgat checkpoint path * [Automated Commit] Format Codebase --- automation/script/module.py | 51 ++++++++++++++---------- automation/script/module_misc.py | 11 ++++- script/get-ml-model-rgat/_cm.yaml | 4 +- script/get-ml-model-rgat/customize.py | 16 ++++++-- script/run-docker-container/_cm.yaml | 1 + script/run-docker-container/customize.py | 4 ++ 6 files changed, 59 insertions(+), 28 deletions(-) diff --git a/automation/script/module.py b/automation/script/module.py index eda2920c7..f37dc9830 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -507,28 +507,7 @@ def _run(self, i): if os.environ.get(key, '') != '' and env.get(key, '') == '': env[key] = os.environ[key] - # Check path/input/output in input and pass to env - for key in self.input_flags_converted_to_tmp_env: - value = i.get(key, '').strip() - if value != '': - env['CM_TMP_' + key.upper()] = value - - for key in self.input_flags_converted_to_env: - value = i.get( - key, - '').strip() if isinstance( - i.get( - key, - ''), - str) else i.get( - key, - '') - if value: - env[f"CM_{key.upper()}"] = value - - r = update_env_with_values(env) - if r['return'] > 0: - return r + r = self._update_env_from_input(env, i) ####################################################################### # Check if we want to skip cache (either by skip_cache or by fake_run) @@ -2294,6 +2273,34 @@ def _run(self, i): return rr + ########################################################################## + + def _update_env_from_input(self, env, i): + # Check path/input/output in input and pass to env + for key in self.input_flags_converted_to_tmp_env: + value = i.get(key, '').strip() + if value != '': + env['CM_TMP_' + key.upper()] = value + + for key in self.input_flags_converted_to_env: + value = i.get( + key, + '').strip() if isinstance( + i.get( + key, + ''), + str) else i.get( + key, + '') + if value: + env[f"CM_{key.upper()}"] = value + + r = update_env_with_values(env) + if r['return'] > 0: + return r + + return {'return': 0} + ########################################################################## def _fix_cache_paths(self, env): cm_repos_path = os.environ.get( diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 5de7f0402..cf6e94597 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -2071,6 +2071,14 @@ def docker(i): continue ''' + r = script_automation._update_env_from_input(env, i) + if r['return'] > 0: + return r + + # mount outdirname path + if env.get('CM_OUTDIRNAME', '') != '': + mounts.append(f"""{env['CM_OUTDIRNAME']}:{env['CM_OUTDIRNAME']}""") + # Check if need to update/map/mount inputs and env r = process_inputs({'run_cmd_arc': i_run_cmd_arc, 'docker_settings': docker_settings, @@ -2409,7 +2417,8 @@ def docker(i): print(final_run_cmd) print('') - docker_recreate_image = 'yes' if not norecreate_docker_image else 'no' + docker_recreate_image = 'yes' if str(norecreate_docker_image).lower() not in [ + "yes", "true", "1"] else 'no' if i.get('docker_push_image', '') in ['True', True, 'yes']: env['CM_DOCKER_PUSH_IMAGE'] = 'yes' diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index bcec6df1c..54e5c119c 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -3,13 +3,13 @@ automation_alias: script automation_uid: 5b4e0237da074764 cache: true category: AI/ML models +docker: + fake_run_deps: True env: CM_ML_MODEL: RGAT CM_ML_MODEL_DATASET: ICBH input_mapping: checkpoint: RGAT_CHECKPOINT_PATH - download_path: CM_DOWNLOAD_PATH - to: CM_DOWNLOAD_PATH new_env_keys: - CM_ML_MODEL_* - CM_ML_MODEL_RGAT_CHECKPOINT_PATH diff --git a/script/get-ml-model-rgat/customize.py b/script/get-ml-model-rgat/customize.py index d920a8711..3cda5a7a8 100644 --- a/script/get-ml-model-rgat/customize.py +++ b/script/get-ml-model-rgat/customize.py @@ -7,10 +7,17 @@ def preprocess(i): os_info = i['os_info'] env = i['env'] + download_dir = env.get('CM_OUTDIRNAME', '') + path = env.get('RGAT_CHECKPOINT_PATH', '').strip() if path == '' or not os.path.exists(path): - env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' + if download_dir != '' and os.path.exists( + os.path.join(download_dir, "RGAT", "RGAT.pt")): + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + download_dir, "RGAT", "RGAT.pt") + else: + env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes' return {'return': 0} @@ -19,9 +26,12 @@ def postprocess(i): env = i['env'] + if env.get('RGAT_CHECKPOINT_PATH', '') == '': + env['RGAT_CHECKPOINT_PATH'] = os.path.join( + env['RGAT_DIR_PATH'], "RGAT.pt") + if env.get('CM_ML_MODEL_RGAT_CHECKPOINT_PATH', '') == '': - env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env.get( - 'RGAT_CHECKPOINT_PATH', os.path.join(env['RGAT_DIR_PATH'], "RGAT.pt")) + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] = env['RGAT_CHECKPOINT_PATH'] if env.get('CM_ML_MODEL_PATH', '') == '': env['CM_ML_MODEL_PATH'] = env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH'] diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index 5135070b9..8a04e5b43 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -68,3 +68,4 @@ prehook_deps: CM_DOCKER_CONTAINER_ID: - on tags: build,docker,image +- tags: get,docker diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 869386f91..2157d595f 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -167,6 +167,10 @@ def postprocess(i): if env.get('CM_DOCKER_EXTRA_RUN_ARGS', '') != '': run_opts += env['CM_DOCKER_EXTRA_RUN_ARGS'] + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_PODMAN_MAP_USER_ID', '').lower() not in ["no", "0", "false"]: + run_opts += " --userns=keep-id" + if env.get('CM_DOCKER_PORT_MAPS', []): for ports in env['CM_DOCKER_PORT_MAPS']: port_map_cmds.append(ports) From c3550d2d24ed509ac3f996555a22517f9c65109e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 17:00:43 +0000 Subject: [PATCH 61/80] Not use SHELL command in CM docker (#82) * Dont use SHELL command in dockerfile --- script/build-dockerfile/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 9c6012aa4..baf0860f8 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -172,7 +172,7 @@ def preprocess(i): shell = get_value(env, config, 'SHELL', 'CM_DOCKER_IMAGE_SHELL') if shell: - f.write('SHELL ' + shell + EOL) + # f.write('SHELL ' + shell + EOL) f.write(EOL) for arg in config['ARGS_DEFAULT']: From f79e2f3ff8d53fe9da78000a4e03c96ce653d421 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 17:35:22 +0000 Subject: [PATCH 62/80] Support adding dependent CM script commands in CM dockerfile --- .../_cm.yaml | 3 --- script/build-dockerfile/customize.py | 20 +++++++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 45401431f..1c1a89eee 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -625,9 +625,6 @@ deps: enable_if_env: CM_MODEL: - rgat - skip_if_any_env: - CM_DATASET_IGBH_PATH: - - "on" skip_if_env: CM_RUN_STATE_DOCKER: - 'yes' diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index baf0860f8..8f451ebb6 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -379,6 +379,26 @@ def preprocess(i): if run_cmd_extra != '': x += ' ' + run_cmd_extra + if env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '') != '' and str(env.get( + 'CM_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')).lower() in ["yes", "1", "true"]: + cm_input = {'action': 'run', + 'automation': 'script', + 'tags': f"""{env['CM_DOCKER_RUN_SCRIPT_TAGS']}""", + 'print_deps': True, + 'quiet': True, + 'silent': True, + 'fake_run': True, + 'fake_deps': True + } + r = self_module.cmind.access(cm_input) + if r['return'] > 0: + return r + print_deps = r['new_state']['print_deps'] + fake_run_str = " --fake_run" if env.get('CM_DOCKER_FAKE_DEPS') else "" + cmds = ["RUN " + dep for dep in print_deps] + for cmd in cmds: + f.write(cmd + fake_run_str + EOL) + f.write(x + EOL) # fake_run to install the dependent scripts and caching them From 6ba3117e564f526a2a3d5cb393d964c4f936f5ee Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 27 Dec 2024 23:23:22 +0000 Subject: [PATCH 63/80] Fixes for igbh dataset detection (#85) * Support adding dependent CM script commands in CM dockerfile * Fix dataset-igbh register in cache * [Automated Commit] Format Codebase * Fix dataset-igbh register in cache * Fix dataset-igbh register in cache --- script/get-dataset-igbh/_cm.yaml | 80 ++++++++++++++-------------- script/get-dataset-igbh/customize.py | 15 +++++- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/script/get-dataset-igbh/_cm.yaml b/script/get-dataset-igbh/_cm.yaml index 1b7c86ae7..8e5c7b4cd 100644 --- a/script/get-dataset-igbh/_cm.yaml +++ b/script/get-dataset-igbh/_cm.yaml @@ -41,8 +41,8 @@ prehook_deps: - CM_OUTDIRNAME force_cache: true enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-feat @@ -60,8 +60,8 @@ prehook_deps: - CM_OUTDIRNAME force_cache: true enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-label19 @@ -79,8 +79,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-node-label2k @@ -98,8 +98,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-id-index-mapping @@ -118,8 +118,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - edge-index @@ -138,8 +138,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-id-index-mapping @@ -157,8 +157,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-node-feat @@ -177,8 +177,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - conference-id-index-mapping @@ -196,8 +196,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - conference-node-feat @@ -216,8 +216,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - institute-id-index-mapping @@ -235,8 +235,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - institute-node-feat @@ -255,8 +255,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - journal-id-index-mapping @@ -274,8 +274,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - journal-node-feat @@ -294,8 +294,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - fos-id-index-mapping @@ -313,8 +313,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - fos-node-feat @@ -333,8 +333,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - author-to-institute-edge-index @@ -353,8 +353,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-published-journal-edge-index @@ -373,8 +373,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-topic-fos-edge-index @@ -393,8 +393,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-venue-conference-edge-index @@ -413,8 +413,8 @@ prehook_deps: force_env_keys: - CM_OUTDIRNAME enable_if_env: - CM_DATASET_IGBH_TYPE: - - 'full' + CM_DATASET_IGBH_FULL_DOWNLOAD: + - 'yes' names: - dae - paper-written-by-author-edge-index diff --git a/script/get-dataset-igbh/customize.py b/script/get-dataset-igbh/customize.py index 8f789bcad..de85bd900 100644 --- a/script/get-dataset-igbh/customize.py +++ b/script/get-dataset-igbh/customize.py @@ -19,6 +19,11 @@ def preprocess(i): graph_folder = os.path.join( env['CM_MLPERF_INFERENCE_SOURCE'], 'graph', 'R-GAT') + if env.get('CM_DATASET_IGBH_PATH', + '') != '': # skip download, just register in cache + env['CM_DATASET_IGBH_OUT_PATH'] = env['CM_DATASET_IGBH_PATH'] + return {'return': 0} + download_loc = env.get('CM_DATASET_IGBH_OUT_PATH', os.getcwd()) env['CM_DATASET_IGBH_DOWNLOAD_LOCATION'] = download_loc @@ -31,14 +36,20 @@ def preprocess(i): run_cmd += x_sep + env['CM_PYTHON_BIN_WITH_PATH'] + \ f" tools/download_igbh_test.py --target-path {download_loc} " + else: + env['CM_DATASET_IGBH_FULL_DOWNLOAD'] = 'yes' + # split seeds run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} " + f"""{ + env['CM_PYTHON_BIN_WITH_PATH']} tools/split_seeds.py --path {download_loc} --dataset_size { + env['CM_DATASET_IGBH_SIZE']} """ # compress graph(for glt implementation) if env.get('CM_IGBH_GRAPH_COMPRESS', '') == "yes": run_cmd += x_sep + \ - f"{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']}" + f"""{env['CM_PYTHON_BIN_WITH_PATH']} tools/compress_graph.py --path {download_loc} --dataset_size {env['CM_DATASET_IGBH_SIZE']} --layout {env['CM_IGBH_GRAPH_COMPRESS_LAYOUT']} + """ env['CM_RUN_CMD'] = run_cmd From c52956b27fa8d06ec8db53f885e1f05021e379e9 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 28 Dec 2024 22:32:38 +0000 Subject: [PATCH 64/80] Copied mlperf automotive CM scripts (#86) * Copied mlperf-automotive scripts * [Automated Commit] Format Codebase * Fixes to github actions for the mlperf automation repo changes * Fixed missing links * Run abtf test from mlperf-automations repo * Run abtf test from mlperf-automations repo * Fix formatting error --- .../workflows/run-individual-script-tests.yml | 6 +- .../test-mlperf-inference-abtf-poc.yml | 6 +- .../test-mlperf-inference-resnet50.yml | 2 +- .../test-mlperf-inference-retinanet.yml | 2 +- .../workflows/test-mlperf-inference-rgat.yml | 2 +- .../workflows/test-mlperf-inference-sdxl.yaml | 2 +- .../README-extra.md | 1 + .../_cm.yaml | 469 +++++++++++++ .../customize.py | 253 +++++++ .../ref/LICENSE.md | 177 +++++ .../ref/README.md | 2 + .../ref/python/__init__.py | 0 .../ref/python/backend.py | 23 + .../ref/python/backend_pytorch_native.py | 95 +++ .../ref/python/cognata.py | 351 ++++++++++ .../ref/python/cognata_labels.py | 49 ++ .../ref/python/dataset.py | 303 ++++++++ .../ref/python/main.py | 659 ++++++++++++++++++ .../user.conf | 6 + script/app-mlperf-automotive/_cm.yaml | 287 ++++++++ script/app-mlperf-automotive/customize.py | 103 +++ .../README-extra.md | 62 ++ script/get-dataset-cognata-mlcommons/_cm.yaml | 161 +++++ .../checksums/cognata_poc.txt | 41 ++ .../customize.py | 449 ++++++++++++ .../README-extra.md | 5 + script/get-ml-model-abtf-ssd-pytorch/_cm.yaml | 174 +++++ .../customize.py | 49 ++ .../_cm.yaml | 39 ++ .../customize.py | 40 ++ .../run.bat | 1 + .../run.sh | 27 + script/run-mlperf-automotive-app/_cm.yaml | 248 +++++++ script/run-mlperf-automotive-app/customize.py | 403 +++++++++++ .../test-cm-core/src/script/process_tests.py | 38 + 35 files changed, 4525 insertions(+), 10 deletions(-) create mode 100644 script/app-mlperf-automotive-mlcommons-python/README-extra.md create mode 100644 script/app-mlperf-automotive-mlcommons-python/_cm.yaml create mode 100644 script/app-mlperf-automotive-mlcommons-python/customize.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/README.md create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/ref/python/main.py create mode 100644 script/app-mlperf-automotive-mlcommons-python/user.conf create mode 100644 script/app-mlperf-automotive/_cm.yaml create mode 100644 script/app-mlperf-automotive/customize.py create mode 100644 script/get-dataset-cognata-mlcommons/README-extra.md create mode 100644 script/get-dataset-cognata-mlcommons/_cm.yaml create mode 100644 script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt create mode 100644 script/get-dataset-cognata-mlcommons/customize.py create mode 100644 script/get-ml-model-abtf-ssd-pytorch/README-extra.md create mode 100644 script/get-ml-model-abtf-ssd-pytorch/_cm.yaml create mode 100644 script/get-ml-model-abtf-ssd-pytorch/customize.py create mode 100644 script/get-mlperf-automotive-scratch-space/_cm.yaml create mode 100644 script/get-mlperf-automotive-scratch-space/customize.py create mode 100644 script/get-mlperf-automotive-scratch-space/run.bat create mode 100644 script/get-mlperf-automotive-scratch-space/run.sh create mode 100644 script/run-mlperf-automotive-app/_cm.yaml create mode 100644 script/run-mlperf-automotive-app/customize.py create mode 100644 script/test-cm-core/src/script/process_tests.py diff --git a/.github/workflows/run-individual-script-tests.yml b/.github/workflows/run-individual-script-tests.yml index c9fd7f599..560d704bf 100644 --- a/.github/workflows/run-individual-script-tests.yml +++ b/.github/workflows/run-individual-script-tests.yml @@ -3,10 +3,10 @@ name: Individual CM script Tests on: pull_request: - branches: [ "main", "mlperf-inference", "dev" ] + branches: [ "main", "dev" ] paths: - 'script/**_cm.json' - - 'script/**_cm.yml' + - 'script/**_cm.yaml' jobs: run-script-tests: @@ -34,4 +34,4 @@ jobs: done python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 tests/script/process_tests.py ${{ steps.getfile.outputs.files }} + DOCKER_CM_REPO=${{ github.event.pull_request.head.repo.html_url }} DOCKER_CM_REPO_BRANCH=${{ github.event.pull_request.head.ref }} TEST_INPUT_INDEX=${{ matrix.test-input-index }} python3 script/test-cm-core/src/script/process_tests.py ${{ steps.getfile.outputs.files }} diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml index ad8636ad0..44b0d6b6d 100644 --- a/.github/workflows/test-mlperf-inference-abtf-poc.yml +++ b/.github/workflows/test-mlperf-inference-abtf-poc.yml @@ -1,10 +1,10 @@ # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: MLPerf inference ABTF POC Test +name: MLPerf Automotive POC Test on: pull_request: - branches: [ "main", "mlperf-inference" ] + branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-abtf-poc.yml' - '**' @@ -55,7 +55,7 @@ jobs: run: | pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - cm pull repo mlcommons@cm4abtf --branch=poc + #cm pull repo mlcommons@cm4abtf --branch=poc - name: Install Docker on macos if: runner.os == 'macOS-deactivated' diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index f4ed3f4f1..9bcc53e8f 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index f392d2d7d..3df5ea51b 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -64,4 +64,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from Retinanet GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index d2edb5855..94a2e174b 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -45,4 +45,4 @@ jobs: git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-sdxl.yaml b/.github/workflows/test-mlperf-inference-sdxl.yaml index d1029e80d..402424b54 100644 --- a/.github/workflows/test-mlperf-inference-sdxl.yaml +++ b/.github/workflows/test-mlperf-inference-sdxl.yaml @@ -22,4 +22,4 @@ jobs: python3 -m pip install cm4mlops cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/script/app-mlperf-automotive-mlcommons-python/README-extra.md b/script/app-mlperf-automotive-mlcommons-python/README-extra.md new file mode 100644 index 000000000..582991f6d --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/README-extra.md @@ -0,0 +1 @@ +# CM script diff --git a/script/app-mlperf-automotive-mlcommons-python/_cm.yaml b/script/app-mlperf-automotive-mlcommons-python/_cm.yaml new file mode 100644 index 000000000..b22f119d6 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/_cm.yaml @@ -0,0 +1,469 @@ +alias: app-mlperf-automotive-mlcommons-python +uid: 621240c5d30a437c + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- demo +- run-mlperf-inference +- object-detection +- abtf-model + + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: 'on' + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Duplicate CM environment variables to the ones used in native apps +env_key_mappings: + CM_HOST_: HOST_ + CM_ML_: ML_ + CM_MLPERF_TVM: MLPERF_TVM + CM_MLPERF_DELETE: MLPERF_DELETE + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_DATASET_* + - CM_HW_NAME + - CM_COGNATA_ACCURACY_DUMP_FILE + - CM_OUTPUT_PREDICTIONS_PATH + - CM_ML_MODEL_* + - CM_MAX_EXAMPLES + +new_state_keys: + - mlperf-inference-implementation + - CM_SUT_* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + + # CUDA + - tags: get,cuda + enable_if_env: + USE_CUDA: + - yes + names: + - cuda + + + + ######################################################################## + # Install ML engines via CM + + ## Onnx CPU Runtime + - tags: get,generic-python-lib,_onnxruntime + names: + - ml-engine-onnxruntime + - onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Onnx CUDA Runtime + - tags: get,generic-python-lib,_onnxruntime_gpu + names: + - ml-engine-onnxruntime-cuda + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + - tvm-onnx + CM_MLPERF_DEVICE: + - gpu + skip_if_env: + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + + ## resnet50 and 3d-unet need both onnxruntime and onnxruntime_gpu on cuda + - tags: get,generic-python-lib,_onnxruntime + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + - tags: get,generic-python-lib,_onnxruntime_gpu + env: + CM_GENERIC_PYTHON_PIP_UNINSTALL_DEPS: "" + enable_if_env: + CM_MLPERF_BACKEND: + - onnxruntime + CM_MLPERF_DEVICE: + - gpu + CM_MODEL: + - 3d-unet-99 + - 3d-unet-99.9 + - resnet50 + + ## Pytorch (CPU) + - tags: get,generic-python-lib,_torch + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + - rocm + + ## Pytorch (CUDA) + - tags: get,generic-python-lib,_torch_cuda + names: + - ml-engine-pytorch + - pytorch + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## Torchvision (CPU) + - tags: get,generic-python-lib,_torchvision + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + CM_MLPERF_DEVICE: + - cpu + + ## Torchvision (CUDA) + - tags: get,generic-python-lib,_torchvision_cuda + names: + - ml-engine-torchvision + enable_if_env: + CM_MLPERF_BACKEND: + - pytorch + - tvm-pytorch + - ray + CM_MLPERF_DEVICE: + - gpu + + ## tensorrt + - tags: get,generic-python-lib,_tensorrt + names: + - ml-engine-tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## torch_tensorrt + - tags: get,generic-python-lib,_torch_tensorrt + names: + - ml-engine-torch_tensorrt + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + ## Ray + - tags: get,generic-python-lib,_ray + names: + - ray + enable_if_env: + CM_MLPERF_BACKEND: + - ray + + + + ## Tensorflow + - tags: get,generic-python-lib,_tensorflow + names: + - ml-engine-tensorflow + - tensorflow + enable_if_env: + CM_MLPERF_BACKEND: + - tf + - tflite + + # Install MLPerf inference dependencies + + + + # Creates user conf for given SUT + - tags: generate,user-conf,mlperf,inference + names: + - user-conf-generator + + + # Install MLPerf loadgen + - tags: get,generic-python-lib,_package.mlcommons-loadgen + enable_if_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "off" + names: + - loadgen + - mlperf-inference-loadgen + + - tags: get,loadgen + enable_if_any_env: + CM_MLPERF_LOADGEN_BUILD_FROM_SRC: + - "on" + names: + - loadgen + - mlperf-inference-loadgen + - mlperf-inference-loadgen-from-src + + +# +# # Download MLPerf inference source +# - tags: get,mlcommons,inference,src +# env: +# CM_GET_MLPERF_IMPLEMENTATION_ONLY: 'yes' +# names: +# - mlperf-implementation + + - tags: get,generic-python-lib,_package.psutil + + + + +prehook_deps: + - names: + - remote-run-cmds + tags: remote,run,cmds + enable_if_env: + CM_ASSH_RUN_COMMANDS: + - "on" + + + +posthook_deps: + - names: + - mlperf-runner + tags: benchmark-mlperf + skip_if_env: + CM_MLPERF_SKIP_RUN: + - "on" + + +post_deps: + - tags: save,mlperf,inference,state + names: + - save-mlperf-inference-state + + +docker: + real_run: false + +# Variations to customize dependencies +variations: + # Implementation + python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + add_deps_recursive: + imagenet-preprocessed: + tags: _NCHW + openimages-preprocessed: + tags: _NCHW + ml-model: + tags: raw,_pytorch + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + + + + +# retinanet: +# group: models +# deps: +# - tags: get,generic-python-lib,_opencv-python +# - tags: get,generic-python-lib,_numpy +# - tags: get,generic-python-lib,_pycocotools +# +# env: +# CM_MODEL: retinanet +# CM_MLPERF_USE_MLCOMMONS_RUN_SCRIPT: 'yes' +# CM_MLPERF_LOADGEN_MAX_BATCHSIZE: '1' + + + abtf-demo-model: + group: models + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-mvp + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + abtf-poc-model: + group: models + default: true + deps: + - tags: get,generic-python-lib,_opencv-python + - tags: get,generic-python-lib,_numpy + version_max: "1.26.4" + version_max_usable: "1.26.4" + - tags: get,generic-python-lib,_pycocotools + - tags: get,generic-python-lib,_package.torchmetrics + - tags: get,generic-python-lib,_package.faster-coco-eval + version_max: "1.5.7" + version_max_usable: "1.5.7" + names: + - cocoeval + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + skip_if_env: + CM_RUN_STATE_DOCKER: + - 'yes' + names: + - raw-dataset-mlcommons-cognata + - tags: get,ml-model,abtf-ssd-pytorch,_abtf-poc + names: + - ml-model-abtf + + env: + CM_MODEL: retinanet + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + singlestream: + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_MLPERF_LOADGEN_MAX_BATCHSIZE: 1 + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + + mvp_demo: + env: diff --git a/script/app-mlperf-automotive-mlcommons-python/customize.py b/script/app-mlperf-automotive-mlcommons-python/customize.py new file mode 100644 index 000000000..58ee04e1f --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/customize.py @@ -0,0 +1,253 @@ +from cmind import utils +import os +import json +import shutil +import subprocess + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_MLPERF_SKIP_RUN', '') == "yes": + return {'return': 0} + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + if env.get('CM_MLPERF_POWER', '') == "yes": + power = "yes" + else: + power = "no" + + rerun = True if env.get("CM_RERUN", "") != '' else False + + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "accuracy" + + if 'CM_MODEL' not in env: + return { + 'return': 1, 'error': "Please select a variation specifying the model to run"} + + # if env['CM_MODEL'] == "resnet50": + # cmd = "cp " + os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") + " " + os.path.join(env['CM_DATASET_PATH'], + # "val_map.txt") + # ret = os.system(cmd) + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = " " + \ + env.get('CM_MLPERF_LOADGEN_EXTRA_OPTIONS', '') + " " + + if 'CM_MLPERF_LOADGEN_QPS' not in env: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = "" + else: + env['CM_MLPERF_LOADGEN_QPS_OPT'] = " --qps " + \ + env['CM_MLPERF_LOADGEN_QPS'] + + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += env['CM_MLPERF_LOADGEN_QPS_OPT'] + + if 'CM_NUM_THREADS' not in env: + if 'CM_MINIMIZE_THREADS' in env: + env['CM_NUM_THREADS'] = str(int(env['CM_HOST_CPU_TOTAL_CORES']) // + (int(env.get('CM_HOST_CPU_SOCKETS', '1')) * int(env.get('CM_HOST_CPU_TOTAL_CORES', '1')))) + else: + env['CM_NUM_THREADS'] = env.get('CM_HOST_CPU_TOTAL_CORES', '1') + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '' and not env.get( + 'CM_MLPERF_MODEL_SKIP_BATCHING', False): + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --max-batchsize " + \ + str(env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE']) + + if env.get('CM_MLPERF_LOADGEN_BATCH_SIZE', '') != '': + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --batch-size " + \ + str(env['CM_MLPERF_LOADGEN_BATCH_SIZE']) + + if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get( + 'CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid": + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \ + env['CM_MLPERF_LOADGEN_QUERY_COUNT'] + + print("Using MLCommons Inference source from '" + + env['CM_MLPERF_INFERENCE_SOURCE'] + "'") + + if 'CM_MLPERF_CONF' not in env: + env['CM_MLPERF_CONF'] = os.path.join( + env['CM_MLPERF_INFERENCE_SOURCE'], "mlperf.conf") + + x = "" if os_info['platform'] == 'windows' else "'" + if "llama2-70b" in env['CM_MODEL']: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf-conf " + \ + x + env['CM_MLPERF_CONF'] + x + else: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --mlperf_conf " + \ + x + env['CM_MLPERF_CONF'] + x + + env['MODEL_DIR'] = env.get('CM_ML_MODEL_PATH') + if not env['MODEL_DIR']: + env['MODEL_DIR'] = os.path.dirname( + env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get('CM_ML_MODEL_FILE_WITH_PATH'))) + + RUN_CMD = "" + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + scenario_extra_options = '' + + NUM_THREADS = env['CM_NUM_THREADS'] + if int(NUM_THREADS) > 2 and env['CM_MLPERF_DEVICE'] == "gpu": + NUM_THREADS = "2" # Don't use more than 2 threads when run on GPU + + if env['CM_MODEL'] in ['resnet50', 'retinanet', 'stable-diffusion-xl']: + scenario_extra_options += " --threads " + NUM_THREADS + + ml_model_name = env['CM_MODEL'] + if 'CM_MLPERF_USER_CONF' in env: + user_conf_path = env['CM_MLPERF_USER_CONF'] + x = "" if os_info['platform'] == 'windows' else "'" + scenario_extra_options += " --user_conf " + x + user_conf_path + x + + mode = env['CM_MLPERF_LOADGEN_MODE'] + mode_extra_options = "" + + # Grigori blocked for ABTF to preprocess data set on the fly for now + # we can later move it to a separate script to preprocess data set + +# if 'CM_DATASET_PREPROCESSED_PATH' in env and env['CM_MODEL'] in [ 'resnet50', 'retinanet' ]: +# #dataset_options = " --use_preprocessed_dataset --preprocessed_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# if env.get('CM_MLPERF_LAST_RELEASE') not in [ "v2.0", "v2.1" ]: +# dataset_options = " --use_preprocessed_dataset --cache_dir "+env['CM_DATASET_PREPROCESSED_PATH'] +# else: +# dataset_options = "" +# if env['CM_MODEL'] == "retinanet": +# dataset_options += " --dataset-list "+ env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# elif env['CM_MODEL'] == "resnet50": +# dataset_options += " --dataset-list "+ os.path.join(env['CM_DATASET_AUX_PATH'], "val.txt") +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# if 'CM_DATASET_PREPROCESSED_PATH' in env: +# env['DATA_DIR'] = env.get('CM_DATASET_PREPROCESSED_PATH') +# else: +# env['DATA_DIR'] = env.get('CM_DATASET_PATH') +# dataset_options = '' + + # Grigori added for ABTF +# dataset_path = env.get('CM_DATASET_PATH') +# env['DATA_DIR'] = dataset_path + +# dataset_options = " --dataset-list " + env['CM_DATASET_ANNOTATIONS_FILE_PATH'] +# dataset_options += " --cache_dir " + os.path.join(script_path, 'preprocessed-dataset') + + dataset_options = '' + + if env.get('CM_MLPERF_EXTRA_DATASET_ARGS', '') != '': + dataset_options += " " + env['CM_MLPERF_EXTRA_DATASET_ARGS'] + + if mode == "accuracy": + mode_extra_options += " --accuracy" + env['CM_OUTPUT_PREDICTIONS_PATH'] = os.path.join( + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'], + env['CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS'], + 'Cognata_Camera_01_8M_png', + 'output') + + elif mode == "performance": + pass + + elif mode == "compliance": + + audit_full_path = env['CM_MLPERF_INFERENCE_AUDIT_PATH'] + mode_extra_options = " --audit '" + audit_full_path + "'" + + if env.get('CM_MLPERF_OUTPUT_DIR', '') == '': + env['CM_MLPERF_OUTPUT_DIR'] = os.getcwd() + + mlperf_implementation = env.get('CM_MLPERF_IMPLEMENTATION', 'reference') + + # Generate CMD + + # Grigori updated for ABTF demo +# cmd, run_dir = get_run_cmd(os_info, env, scenario_extra_options, mode_extra_options, dataset_options, mlperf_implementation) + cmd, run_dir = get_run_cmd_reference( + os_info, env, scenario_extra_options, mode_extra_options, dataset_options, script_path) + + if env.get('CM_NETWORK_LOADGEN', '') == "lon": + + run_cmd = i['state']['mlperf_inference_run_cmd'] + env['CM_SSH_RUN_COMMANDS'] = [] + env['CM_SSH_RUN_COMMANDS'].append( + run_cmd.replace( + "--network=lon", + "--network=sut") + " &") + + env['CM_MLPERF_RUN_CMD'] = cmd + env['CM_RUN_DIR'] = run_dir + env['CM_RUN_CMD'] = cmd + env['CK_PROGRAM_TMP_DIR'] = env.get('CM_ML_MODEL_PATH') # for tvm + + if env.get('CM_HOST_PLATFORM_FLAVOR', '') == "arm64": + env['CM_HOST_PLATFORM_FLAVOR'] = "aarch64" + + if not env.get('CM_COGNATA_ACCURACY_DUMP_FILE'): + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + env['OUTPUT_DIR'], "accuracy.txt") + + return {'return': 0} + + +def get_run_cmd_reference(os_info, env, scenario_extra_options, + mode_extra_options, dataset_options, script_path=None): + + q = '"' if os_info['platform'] == 'windows' else "'" + + ########################################################################## + # Grigori added for ABTF demo + + if env['CM_MODEL'] in ['retinanet']: + + run_dir = os.path.join(script_path, 'ref') + + env['RUN_DIR'] = run_dir + + env['OUTPUT_DIR'] = env['CM_MLPERF_OUTPUT_DIR'] + + cognata_dataset_path = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] +# cognata_dataset_path = env['CM_DATASET_PATH'] # Using open images +# dataset for some tests + + path_to_model = env.get( + 'CM_MLPERF_CUSTOM_MODEL_PATH', + env.get( + 'CM_ML_MODEL_FILE_WITH_PATH', + env.get('CM_ML_MODEL_CODE_WITH_PATH'))) + env['MODEL_FILE'] = path_to_model + + cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " " + os.path.join(run_dir, "python", "main.py") + " --profile " + env['CM_MODEL'] + "-" + env['CM_MLPERF_BACKEND'] + \ + " --model=" + q + path_to_model + q + \ + " --dataset=" + env["CM_MLPERF_VISION_DATASET_OPTION"] + \ + " --dataset-path=" + q + cognata_dataset_path + q + \ + " --cache_dir=" + q + os.path.join(script_path, 'tmp-preprocessed-dataset') + q + \ + " --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + " " + \ + " --output " + q + env['OUTPUT_DIR'] + q + " " + \ + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] + \ + scenario_extra_options + mode_extra_options + dataset_options + + ########################################################################## + + return cmd, run_dir + + +def postprocess(i): + + env = i['env'] + + state = i['state'] + + inp = i['input'] + + return {'return': 0} diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md b/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md new file mode 100644 index 000000000..f433b1a53 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/LICENSE.md @@ -0,0 +1,177 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/README.md b/script/app-mlperf-automotive-mlcommons-python/ref/README.md new file mode 100644 index 000000000..b2dcb039f --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/README.md @@ -0,0 +1,2 @@ +Base code was taken from https://github.com/mlcommons/inference/tree/master/vision/classification_and_detection +and modified to prototype support for Cognata data set and ABTF model. diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py new file mode 100644 index 000000000..955eddb88 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/backend.py @@ -0,0 +1,23 @@ +""" +abstract backend class +""" + + +# pylint: disable=unused-argument,missing-docstring + +class Backend(): + def __init__(self): + self.inputs = [] + self.outputs = [] + + def version(self): + raise NotImplementedError("Backend:version") + + def name(self): + raise NotImplementedError("Backend:name") + + def load(self, model_path, inputs=None, outputs=None): + raise NotImplementedError("Backend:load") + + def predict(self, feed): + raise NotImplementedError("Backend:predict") diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py new file mode 100644 index 000000000..ec5401979 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/backend_pytorch_native.py @@ -0,0 +1,95 @@ +""" +Pytoch native backend +Extended by Grigori Fursin for the ABTF demo +""" +# pylint: disable=unused-argument,missing-docstring +import torch # currently supports pytorch1.0 +import torchvision +import backend + +import os +import sys +import importlib + + +class BackendPytorchNative(backend.Backend): + def __init__(self): + super(BackendPytorchNative, self).__init__() + self.sess = None + self.model = None + self.device = "cuda:0" if torch.cuda.is_available() else "cpu" + + # Grigori added for ABTF model + self.config = None + self.num_classes = None + self.image_size = None + + def version(self): + return torch.__version__ + + def name(self): + return "pytorch-native" + + def image_format(self): + return "NCHW" + + def load(self, model_path, inputs=None, outputs=None): + + # From ABTF code + sys.path.insert(0, os.environ['CM_ML_MODEL_CODE_WITH_PATH']) + + from src.transform import SSDTransformer + from src.utils import generate_dboxes, Encoder, colors, coco_classes + from src.model import SSD, ResNet + + abtf_model_config = os.environ.get('CM_ABTF_ML_MODEL_CONFIG', '') + + num_classes_str = os.environ.get('CM_ABTF_NUM_CLASSES', '').strip() + self.num_classes = int( + num_classes_str) if num_classes_str != '' else 15 + + self.config = importlib.import_module('config.' + abtf_model_config) + self.image_size = self.config.model['image_size'] + + self.model = SSD( + self.config.model, + backbone=ResNet( + self.config.model), + num_classes=self.num_classes) + + checkpoint = torch.load( + model_path, + map_location=torch.device( + self.device)) + + self.model.load_state_dict(checkpoint["model_state_dict"]) + + if self.device.startswith('cuda'): + self.model.cuda() + + self.model.eval() + + self.model = self.model.to(self.device) + + self.inputs = inputs + self.outputs = outputs + + return self + + def predict(self, feed): + # For ABTF + + # Always first element for now (later may stack for batching) + img = feed['image'][0] + + if torch.cuda.is_available(): + img = img.cuda() + + inp = img.unsqueeze(dim=0) + + with torch.no_grad(): + ploc, plabel = self.model(inp) + + output = (ploc, plabel) + + return output diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py new file mode 100644 index 000000000..005fa4e2d --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata.py @@ -0,0 +1,351 @@ +""" +Original code was extended by Grigori Fursin to support cognata data set +""" + +import json +import logging +import os +import time + +import cv2 +from PIL import Image + +import numpy as np +from pycocotools.cocoeval import COCOeval +# import pycoco +import dataset + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("cognata") + + +class Cognata(dataset.Dataset): + def __init__(self, data_path, image_list, name, use_cache=0, image_size=None, + image_format="NHWC", pre_process=None, count=None, cache_dir=None, preprocessed_dir=None, use_label_map=False, threads=os.cpu_count(), + model_config=None, model_num_classes=None, model_image_size=None): # For ABTF + super().__init__() + + self.image_size = image_size + self.image_list = [] + self.label_list = [] + self.image_ids = [] + self.image_sizes = [] + self.count = count + self.use_cache = use_cache + self.data_path = data_path + self.pre_process = pre_process + self.use_label_map = use_label_map + + self.model_config = model_config + self.model_num_classes = model_num_classes + self.model_image_size = model_image_size + self.ignore_classes = None + self.files = None + self.dboxes = None + self.transform = None + self.label_map = None + self.label_info = None + self.image_bin = [] + self.encoder = None + self.targets = [] + + ####################################################################### + # From ABTF source + + import torch + from src.utils import generate_dboxes, Encoder + from src.transform import SSDTransformer + from src.dataset import prepare_cognata + import cognata_labels + import csv + import ast + + self.dboxes = generate_dboxes(model_config.model, model="ssd") + self.transform = SSDTransformer( + self.dboxes, self.model_image_size, val=True) + self.encoder = Encoder(self.dboxes) + + folders = model_config.dataset['folders'] + cameras = model_config.dataset['cameras'] + self.ignore_classes = [2, 25, 31] + if 'ignore_classes' in model_config.dataset: + self.ignore_classes = model_config.dataset['ignore_classes'] + + # Grigori added for tests + # Check if overridden by extrnal environment for tests + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', + '').strip() + if x != '': + folders = x.split(';') if ';' in x else [x] + + x = os.environ.get( + 'CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip() + if x != '': + cameras = x.split(';') if ';' in x else [x] + + print('') + print('Cognata folders: {}'.format(str(folders))) + print('Cognata cameras: {}'.format(str(cameras))) + print('') + + # From ABTF source + print('') + print('Scanning Cognata dataset ...') + start = time.time() + files, label_map, label_info = prepare_cognata( + data_path, folders, cameras, self.ignore_classes) + + self.files = files + + print(' Number of files found: {}'.format(len(files))) + print(' Time: {:.2f} sec.'.format(time.time() - start)) + + if os.environ.get( + 'CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS', '') == 'yes': + label_map = cognata_labels.label_map + label_info = cognata_labels.label_info + + self.label_map = label_map + self.label_info = label_info + + if self.model_num_classes is not None: + self.model_num_classes = len(label_map.keys()) + + print('') + print('Preloading and preprocessing Cognata dataset on the fly ...') + + start = time.time() + + idx = 0 + + for f in self.files: + + image_name = self.files[idx]['img'] + + img = Image.open(image_name).convert('RGB') + + width, height = img.size + boxes = [] + boxes2 = [] + labels = [] + gt_boxes = [] + targets = [] + with open(self.files[idx]['ann']) as f: + reader = csv.reader(f) + rows = list(reader) + header = rows[0] + annotations = rows[1:] + bbox_index = header.index('bounding_box_2D') + class_index = header.index('object_class') + distance_index = header.index('center_distance') + for annotation in annotations: + bbox = annotation[bbox_index] + bbox = ast.literal_eval(bbox) + object_width = bbox[2] - bbox[0] + object_height = bbox[3] - bbox[1] + object_area = object_width * object_height + label = ast.literal_eval(annotation[class_index]) + distance = ast.literal_eval(annotation[distance_index]) + if object_area < 50 or int( + label) in self.ignore_classes or object_height < 8 or object_width < 8 or distance > 300: + continue + label = self.label_map[label] + boxes.append([bbox[0] / width, bbox[1] / height, + bbox[2] / width, bbox[3] / height]) + boxes2.append([bbox[0], bbox[1], bbox[2], bbox[3]]) + gt_boxes.append( + [bbox[0], bbox[1], bbox[2], bbox[3], label, 0, 0]) + labels.append(label) + + boxes = torch.tensor(boxes) + boxes2 = torch.tensor(boxes2) + labels = torch.tensor(labels) + gt_boxes = torch.tensor(gt_boxes) + + targets.append({'boxes': boxes2.to(device='cpu'), + 'labels': labels.to(device='cpu', + dtype=torch.int32)}) + + img, (height, width), boxes, labels = self.transform( + img, (height, width), boxes, labels, max_num=500) + + _, height, width = img.shape + + self.image_bin.append(img) + self.image_ids.append(idx) + self.image_list.append(image_name) + self.image_sizes.append((height, width)) + + self.label_list.append((labels, boxes)) + + self.targets.append(targets) + + # limit the dataset if requested + idx += 1 + if self.count is not None and idx >= self.count: + break + + print(' Time: {:.2f} sec.'.format(time.time() - start)) + print('') + + return + + def get_item(self, nr): + """Get image by number in the list.""" + + return self.image_bin[nr], self.label_list[nr] + + def get_item_loc(self, nr): + + return self.files[nr]['img'] + + # Grigori added here to be able to return Torch tensor and not Numpy + + def get_samples(self, id_list): + + data = [self.image_list_inmemory[idx] for idx in id_list] + labels = [self.label_list[idx] for idx in id_list] + + return data, labels + + +class PostProcessCognata: + """ + Post processing for tensorflow ssd-mobilenet style models + """ + + def __init__(self): + self.results = [] + self.good = 0 + self.total = 0 + self.content_ids = [] + self.use_inv_map = False + + def add_results(self, results): + self.results.extend(results) + + def __call__(self, results, ids, expected=None, result_dict=None, ): + + # Dummy + processed_results = [] + return processed_results + + def start(self): + self.results = [] + self.good = 0 + self.total = 0 + + def finalize(self, result_dict, ds=None, output_dir=None): + + # To be improved + + from torchmetrics.detection.mean_ap import MeanAveragePrecision + metric = MeanAveragePrecision( + iou_type="bbox", + class_metrics=True, + backend='faster_coco_eval') + + result_dict["good"] += self.good + result_dict["total"] += self.total + + preds = [] + targets = [] + # For now batch_size = 1 + for idx in range(0, len(self.results)): + preds.append(self.results[idx][0]) + id = self.results[idx][0]['id'] + targets.append(ds.targets[id][0]) + metric.update(preds, targets) + + metrics = metric.compute() + + print('=================================================') + import pprint + pp = pprint.PrettyPrinter(indent=4) + pp.pprint(metrics) + print('=================================================') + + classes = metrics['classes'].tolist() + map_per_classes = metrics['map_per_class'].tolist() + + final_map = {} + for c in range(0, len(classes)): + final_map[ds.label_info[classes[c]]] = float(map_per_classes[c]) + + result_dict["mAP"] = float(metrics['map']) + result_dict["mAP_classes"] = final_map + + +class PostProcessCognataPt(PostProcessCognata): + """ + Post processing required by ssd-resnet34 / pytorch + """ + + def __init__(self, nms_threshold, max_output, + score_threshold, height, width): + super().__init__() + self.nms_threshold = nms_threshold + self.max_output = max_output + self.score_threshold = score_threshold + self.height = height + self.width = width + + def __call__(self, results, ids, expected=None, result_dict=None): + # results come as: + # detection_boxes,detection_classes,detection_scores + + import torch + + processed_results = [] + + # For now 1 result (batch 1) - need to add support for batch size > 1 + # later + ploc = results[0] + plabel = results[1] + + # Get predictions (from cognata_eval) +# ploc, plabel = model(img) + ploc, plabel = ploc.float(), plabel.float() + + preds = [] + + for i in range(ploc.shape[0]): + dts = [] + labels = [] + scores = [] + + ploc_i = ploc[i, :, :].unsqueeze(0) + plabel_i = plabel[i, :, :].unsqueeze(0) + + result = self.encoder.decode_batch( + ploc_i, plabel_i, self.nms_threshold, self.max_output)[0] + + loc, label, prob = [r.cpu().numpy() for r in result] + for loc_, label_, prob_ in zip(loc, label, prob): + if label_ in expected[i][0]: + self.good += 1 + self.total += 1 + dts.append([loc_[0] * + self.width, loc_[1] * + self.height, loc_[2] * + self.width, loc_[3] * + self.height,]) + labels.append(label_) + scores.append(prob_) + + dts = torch.tensor(dts, device='cpu') + labels = torch.tensor(labels, device='cpu', dtype=torch.int32) + scores = torch.tensor(scores, device='cpu') + preds.append({'boxes': dts, 'labels': labels, + 'scores': scores, 'id': ids[i]}) + + # Only batch size supported + idx = 0 + + processed_results.append(preds) + + # self.total += 1 + + return processed_results diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py new file mode 100644 index 000000000..b77f09b0e --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/cognata_labels.py @@ -0,0 +1,49 @@ +label_map = { + 0: 0, + 28: 1, + 29: 2, + 33: 3, + 8: 4, + 36: 5, + 13: 6, + 11: 7, + 35: 8, + 14: 9, + 9: 10, + 48: 11, + 10: 12, + 46: 13, + 44: 14} +label_info = {0: 'background', 1: 'Traffic_light', 2: 'Props', 3: 'TrafficSign', + 4: 'Car', 5: 'Van', 6: 'Rider', 7: 'Motorcycle', 8: 'Bicycle', + 9: 'Pedestrian', 10: 'Truck', 11: 'PersonalMobility', 12: 'Bus', 13: 'Trailer', 14: 'Animal'} + +colors = [None, (39, 129, 113), (164, 80, 133), (83, 122, 114), (99, 81, 172), (95, 56, 104), (37, 84, 86), + (14, 89, 122), + (80, 7, 65), (10, 102, 25), (90, 185, 109), (106, 110, + 132), (169, 158, 85), (188, 185, 26), (103, 1, 17), + (82, 144, 81), (92, 7, 184), (49, 81, 155), (179, + 177, 69), (93, 187, 158), (13, 39, 73), (12, 50, 60), + (16, 179, 33), (112, 69, 165), (15, 139, 63), (33, 191, + 159), (182, 173, 32), (34, 113, 133), (90, 135, 34), + (53, 34, 86), (141, 35, 190), (6, 171, 8), (118, 76, + 112), (89, 60, 55), (15, 54, 88), (112, 75, 181), + (42, 147, 38), (138, 52, 63), (128, 65, 149), (106, + 103, 24), (168, 33, 45), (28, 136, 135), (86, 91, 108), + (52, 11, 76), (142, 6, 189), (57, 81, 168), (55, 19, + 148), (182, 101, 89), (44, 65, 179), (1, 33, 26), + (122, 164, 26), (70, 63, 134), (137, 106, 82), (120, + 118, 52), (129, 74, 42), (182, 147, 112), (22, 157, 50), + (56, 50, 20), (2, 22, 177), (156, 100, 106), (21, + 35, 42), (13, 8, 121), (142, 92, 28), (45, 118, 33), + (105, 118, 30), (7, 185, 124), (46, 34, 146), (105, + 184, 169), (22, 18, 5), (147, 71, 73), (181, 64, 91), + (31, 39, 184), (164, 179, 33), (96, 50, 18), (95, 15, + 106), (113, 68, 54), (136, 116, 112), (119, 139, 130), + (31, 139, 34), (66, 6, 127), (62, 39, 2), (49, 99, + 180), (49, 119, 155), (153, 50, 183), (125, 38, 3), + (129, 87, 143), (49, 87, 40), (128, 62, 120), (73, 85, + 148), (28, 144, 118), (29, 9, 24), (175, 45, 108), + (81, 175, 64), (178, 19, 157), (74, 188, 190), (18, + 114, 2), (62, 128, 96), (21, 3, 150), (0, 6, 95), + (2, 20, 184), (122, 37, 185)] diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py new file mode 100644 index 000000000..9b8af84f5 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/dataset.py @@ -0,0 +1,303 @@ +""" +dataset related classes and methods +""" + +# pylint: disable=unused-argument,missing-docstring + +import logging +import sys +import time + +import cv2 +import numpy as np + + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("dataset") + + +class Item(): + def __init__(self, label, img, idx): + self.label = label + self.img = img + self.idx = idx + self.start = time.time() + + +def usleep(sec): + if sys.platform == 'win32': + # on windows time.sleep() doesn't work to well + import ctypes + kernel32 = ctypes.windll.kernel32 + timer = kernel32.CreateWaitableTimerA( + ctypes.c_void_p(), True, ctypes.c_void_p()) + delay = ctypes.c_longlong(int(-1 * (10 * 1000000 * sec))) + kernel32.SetWaitableTimer( + timer, + ctypes.byref(delay), + 0, + ctypes.c_void_p(), + ctypes.c_void_p(), + False) + kernel32.WaitForSingleObject(timer, 0xffffffff) + else: + time.sleep(sec) + + +class Dataset(): + def __init__(self): + self.arrival = None + self.image_list = [] + self.label_list = [] + self.image_list_inmemory = {} + self.last_loaded = -1 + + def preprocess(self, use_cache=True): + raise NotImplementedError("Dataset:preprocess") + + def get_item_count(self): + return len(self.image_list) + + def get_list(self): + raise NotImplementedError("Dataset:get_list") + + def load_query_samples(self, sample_list): + self.image_list_inmemory = {} + for sample in sample_list: + self.image_list_inmemory[sample], _ = self.get_item(sample) + self.last_loaded = time.time() + + def unload_query_samples(self, sample_list): + if sample_list: + for sample in sample_list: + if sample in self.image_list_inmemory: + del self.image_list_inmemory[sample] + else: + self.image_list_inmemory = {} + + def get_samples(self, id_list): + data = np.array([self.image_list_inmemory[id] for id in id_list]) + return data, self.label_list[id_list] + + def get_item_loc(self, id): + raise NotImplementedError("Dataset:get_item_loc") + + +# +# Post processing +# +class PostProcessCommon: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + n = len(results[0]) + for idx in range(0, n): + result = results[0][idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +class PostProcessArgMax: + def __init__(self, offset=0): + self.offset = offset + self.good = 0 + self.total = 0 + + def __call__(self, results, ids, expected=None, result_dict=None): + processed_results = [] + results = np.argmax(results[0], axis=1) + n = results.shape[0] + for idx in range(0, n): + result = results[idx] + self.offset + processed_results.append([result]) + if result == expected[idx]: + self.good += 1 + self.total += n + return processed_results + + def add_results(self, results): + pass + + def start(self): + self.good = 0 + self.total = 0 + + def finalize(self, results, ds=False, output_dir=None): + results["good"] = self.good + results["total"] = self.total + + +# +# pre-processing +# + +def center_crop(img, out_height, out_width): + height, width, _ = img.shape + left = int((width - out_width) / 2) + right = int((width + out_width) / 2) + top = int((height - out_height) / 2) + bottom = int((height + out_height) / 2) + img = img[top:bottom, left:right] + return img + + +def resize_with_aspectratio( + img, out_height, out_width, scale=87.5, inter_pol=cv2.INTER_LINEAR): + height, width, _ = img.shape + new_height = int(100. * out_height / scale) + new_width = int(100. * out_width / scale) + if height > width: + w = new_width + h = int(new_height * height / width) + else: + h = new_height + w = int(new_width * width / height) + img = cv2.resize(img, (w, h), interpolation=inter_pol) + return img + + +def pre_process_vgg(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + cv2_interpol = cv2.INTER_AREA + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2_interpol) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + # normalize image + means = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img -= means + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_mobilenet(img, dims=None, need_transpose=False): + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + + output_height, output_width, _ = dims + img = resize_with_aspectratio( + img, + output_height, + output_width, + inter_pol=cv2.INTER_LINEAR) + img = center_crop(img, output_height, output_width) + img = np.asarray(img, dtype='float32') + + img /= 255.0 + img -= 0.5 + img *= 2 + + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_imagenet_pytorch(img, dims=None, need_transpose=False): + from PIL import Image + import torchvision.transforms.functional as F + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + img = Image.fromarray(img) + img = F.resize(img, 256, Image.BILINEAR) + img = F.center_crop(img, 224) + img = F.to_tensor(img) + img = F.normalize( + img, mean=[ + 0.485, 0.456, 0.406], std=[ + 0.229, 0.224, 0.225], inplace=False) + if not need_transpose: + img = img.permute(1, 2, 0) # NHWC + img = np.asarray(img, dtype='float32') + return img + + +def maybe_resize(img, dims): + img = np.array(img, dtype=np.float32) + if len(img.shape) < 3 or img.shape[2] != 3: + # some images might be grayscale + img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB) + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + if dims is not None: + im_height, im_width, _ = dims + img = cv2.resize(img, (im_width, im_height), + interpolation=cv2.INTER_LINEAR) + return img + + +def pre_process_coco_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img = np.asarray(img, dtype=np.uint8) + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_pt_mobilenet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img -= 127.5 + img /= 127.5 + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img + + +def pre_process_coco_resnet34(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([0.485, 0.456, 0.406], dtype=np.float32) + std = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + img = img / 255. - mean + img = img / std + + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_coco_resnet34_tf(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + mean = np.array([123.68, 116.78, 103.94], dtype=np.float32) + img = img - mean + if need_transpose: + img = img.transpose([2, 0, 1]) + + return img + + +def pre_process_openimages_retinanet(img, dims=None, need_transpose=False): + img = maybe_resize(img, dims) + img /= 255. + # transpose if needed + if need_transpose: + img = img.transpose([2, 0, 1]) + return img diff --git a/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py b/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py new file mode 100644 index 000000000..e4462da8c --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/ref/python/main.py @@ -0,0 +1,659 @@ +""" +mlperf inference benchmarking tool +""" + +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import argparse +import array +import collections +import json +import logging +import os +import sys +import threading +import time +from queue import Queue +from PIL import Image +import mlperf_loadgen as lg +import numpy as np +import cv2 +import glob +import dataset +import cognata +import cognata_labels + +# import imagenet +# import coco +# import openimages + +logging.basicConfig(level=logging.INFO) +log = logging.getLogger("main") + +NANO_SEC = 1e9 +MILLI_SEC = 1000 + +# pylint: disable=missing-docstring + +# the datasets we support +SUPPORTED_DATASETS = { + "cognata-4mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 1440, 2560), + {"image_size": [1440, 2560, 3]}), + "cognata-8mp-pt": + (cognata.Cognata, None, cognata.PostProcessCognataPt(0.5, 200, 0.05, 2160, 3840), + {"image_size": [2160, 3840, 3]}) +} + +# pre-defined command line options so simplify things. They are used as defaults and can be +# overwritten from command line + +SUPPORTED_PROFILES = { + "defaults": { + "dataset": "imagenet", + "backend": "tensorflow", + "cache": 0, + "max-batchsize": 32, + }, + + # retinanet + "retinanet-pytorch": { + "inputs": "image", + "outputs": "boxes,labels,scores", + "dataset": "openimages-800-retinanet", + "backend": "pytorch-native", + "model-name": "retinanet", + }, +} + +SCENARIO_MAP = { + "SingleStream": lg.TestScenario.SingleStream, + "MultiStream": lg.TestScenario.MultiStream, + "Server": lg.TestScenario.Server, + "Offline": lg.TestScenario.Offline, +} + +last_timeing = [] + + +def get_args(): + """Parse commandline.""" + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + choices=SUPPORTED_DATASETS.keys(), + help="dataset") + parser.add_argument( + "--dataset-path", + required=True, + help="path to the dataset") + parser.add_argument("--dataset-list", help="path to the dataset list") + parser.add_argument( + "--data-format", + choices=[ + "NCHW", + "NHWC"], + help="data format") + parser.add_argument( + "--profile", + choices=SUPPORTED_PROFILES.keys(), + help="standard profiles") + parser.add_argument("--scenario", default="SingleStream", + help="mlperf benchmark scenario, one of " + str(list(SCENARIO_MAP.keys()))) + parser.add_argument( + "--max-batchsize", + type=int, + help="max batch size in a single inference") + parser.add_argument("--model", required=True, help="model file") + parser.add_argument("--output", default="output", help="test results") + parser.add_argument("--inputs", help="model inputs") + parser.add_argument("--outputs", help="model outputs") + parser.add_argument("--backend", help="runtime to use") + parser.add_argument( + "--model-name", + help="name of the mlperf model, ie. resnet50") + parser.add_argument( + "--threads", + default=os.cpu_count(), + type=int, + help="threads") + parser.add_argument("--qps", type=int, help="target qps") + parser.add_argument("--cache", type=int, default=0, help="use cache") + parser.add_argument( + "--cache_dir", + type=str, + default=None, + help="dir path for caching") + parser.add_argument( + "--preprocessed_dir", + type=str, + default=None, + help="dir path for storing preprocessed images (overrides cache_dir)") + parser.add_argument( + "--use_preprocessed_dataset", + action="store_true", + help="use preprocessed dataset instead of the original") + parser.add_argument( + "--accuracy", + action="store_true", + help="enable accuracy pass") + parser.add_argument( + "--find-peak-performance", + action="store_true", + help="enable finding peak performance pass") + parser.add_argument( + "--debug", + action="store_true", + help="debug, turn traces on") + + # file to use mlperf rules compliant parameters + parser.add_argument( + "--mlperf_conf", + default="../../mlperf.conf", + help="mlperf rules config") + # file for user LoadGen settings such as target QPS + parser.add_argument( + "--user_conf", + default="user.conf", + help="user config for user LoadGen settings such as target QPS") + # file for LoadGen audit settings + parser.add_argument( + "--audit_conf", + default="audit.config", + help="config for LoadGen audit settings") + + # below will override mlperf rules compliant settings - don't use for + # official submission + parser.add_argument("--time", type=int, help="time to scan in seconds") + parser.add_argument("--count", type=int, help="dataset items to use") + parser.add_argument( + "--performance-sample-count", + type=int, + help="performance sample count") + parser.add_argument( + "--max-latency", + type=float, + help="mlperf max latency in pct tile") + parser.add_argument( + "--samples-per-query", + default=8, + type=int, + help="mlperf multi-stream samples per query") + args = parser.parse_args() + + # don't use defaults in argparser. Instead we default to a dict, override that with a profile + # and take this as default unless command line give + defaults = SUPPORTED_PROFILES["defaults"] + + if args.profile: + profile = SUPPORTED_PROFILES[args.profile] + defaults.update(profile) + for k, v in defaults.items(): + kc = k.replace("-", "_") + if getattr(args, kc) is None: + setattr(args, kc, v) + if args.inputs: + args.inputs = args.inputs.split(",") + if args.outputs: + args.outputs = args.outputs.split(",") + + if args.scenario not in SCENARIO_MAP: + parser.error("valid scanarios:" + str(list(SCENARIO_MAP.keys()))) + return args + + +def get_backend(backend): + if backend == "null": + from backend_null import BackendNull + backend = BackendNull() + elif backend == "pytorch": + from backend_pytorch import BackendPytorch + backend = BackendPytorch() + elif backend == "pytorch-native": + from backend_pytorch_native import BackendPytorchNative + backend = BackendPytorchNative() + else: + raise ValueError("unknown backend: " + backend) + return backend + + +class Item: + """An item that we queue for processing by the thread pool.""" + + def __init__(self, query_id, content_id, img, label=None): + self.query_id = query_id + self.content_id = content_id + self.img = img + self.label = label + self.start = time.time() + + +class RunnerBase: + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + self.take_accuracy = False + self.ds = ds + self.model = model + self.post_process = post_proc + self.threads = threads + self.take_accuracy = False + self.max_batchsize = max_batchsize + self.result_timing = [] + self.proc_results = [] + + def handle_tasks(self, tasks_queue): + pass + + def start_run(self, result_dict, take_accuracy): + self.result_dict = result_dict + self.result_timing = [] + self.take_accuracy = take_accuracy + self.post_process.start() + + def run_one_item(self, qitem): + # run the prediction + processed_results = [] + try: + results = self.model.predict({self.model.inputs[0]: qitem.img}) + + processed_results = self.post_process( + results, qitem.content_id, qitem.label, self.result_dict) + if self.take_accuracy: + self.post_process.add_results(processed_results) + + self.result_timing.append(time.time() - qitem.start) + + except Exception as ex: # pylint: disable=broad-except + src = [self.ds.get_item_loc(i) for i in qitem.content_id] + log.error("thread: failed on contentid=%s, %s", src, ex) + # since post_process will not run, fake empty responses + processed_results = [[]] * len(qitem.query_id) + finally: + response_array_refs = [] + response = [] + for idx, query_id in enumerate(qitem.query_id): + + # Temporal hack for Cognata to add only boxes - fix + processed_results2 = [x['boxes'].numpy() + for x in processed_results[idx]] + self.proc_results.append([{'boxes': x['boxes'].tolist(), 'scores': x['scores'].tolist(), 'labels': x['labels'].tolist(), 'id': x['id']} + for x in processed_results[idx]]) + response_array = array.array("B", np.array( + processed_results2, np.float32).tobytes()) + response_array_refs.append(response_array) + bi = response_array.buffer_info() + response.append(lg.QuerySampleResponse(query_id, bi[0], bi[1])) + lg.QuerySamplesComplete(response) + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.run_one_item(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + data, label = self.ds.get_samples(idx[i:i + bs]) + self.run_one_item( + Item(query_id[i:i + bs], idx[i:i + bs], data, label)) + + def finish(self): + pass + + +class QueueRunner(RunnerBase): + def __init__(self, model, ds, threads, post_proc=None, max_batchsize=128): + super().__init__(model, ds, threads, post_proc, max_batchsize) + self.tasks = Queue(maxsize=threads * 4) + self.workers = [] + self.result_dict = {} + + for _ in range(self.threads): + worker = threading.Thread( + target=self.handle_tasks, args=( + self.tasks,)) + worker.daemon = True + self.workers.append(worker) + worker.start() + + def handle_tasks(self, tasks_queue): + """Worker thread.""" + while True: + qitem = tasks_queue.get() + if qitem is None: + # None in the queue indicates the parent want us to exit + tasks_queue.task_done() + break + self.run_one_item(qitem) + tasks_queue.task_done() + + def enqueue(self, query_samples): + idx = [q.index for q in query_samples] + query_id = [q.id for q in query_samples] + + if len(query_samples) < self.max_batchsize: + data, label = self.ds.get_samples(idx) + self.tasks.put(Item(query_id, idx, data, label)) + else: + bs = self.max_batchsize + for i in range(0, len(idx), bs): + ie = i + bs + + data, label = self.ds.get_samples(idx[i:ie]) + self.tasks.put(Item(query_id[i:ie], idx[i:ie], data, label)) + + def finish(self): + # exit all threads + for _ in self.workers: + self.tasks.put(None) + for worker in self.workers: + worker.join() + + +def add_results(final_results, name, result_dict, + result_list, took, show_accuracy=False): + percentiles = [50., 80., 90., 95., 99., 99.9] + buckets = np.percentile(result_list, percentiles).tolist() + buckets_str = ",".join(["{}:{:.4f}".format(p, b) + for p, b in zip(percentiles, buckets)]) + + if result_dict["total"] == 0: + result_dict["total"] = len(result_list) + # this is what we record for each run + result = { + "took": took, + "mean": np.mean(result_list), + "percentiles": {str(k): v for k, v in zip(percentiles, buckets)}, + "qps": len(result_list) / took, + "count": len(result_list), + "good_items": result_dict["good"], + "total_items": result_dict["total"], + } + acc_str = "" + if show_accuracy: + result["accuracy"] = 100. * result_dict["good"] / result_dict["total"] + acc_str = ", acc={:.3f}%".format(result["accuracy"]) + if "mAP" in result_dict: + result["mAP"] = 100. * result_dict["mAP"] + acc_str += ", mAP={:.3f}%".format(result["mAP"]) + if os.environ.get('CM_COGNATA_ACCURACY_DUMP_FILE', '') != '': + accuracy_file = os.environ['CM_COGNATA_ACCURACY_DUMP_FILE'] + with open(accuracy_file, "w") as f: + f.write("{:.3f}%".format(result["mAP"])) + + if "mAP_classes" in result_dict: + result['mAP_per_classes'] = result_dict["mAP_classes"] + acc_str += ", mAP_classes={}".format(result_dict["mAP_classes"]) + + # add the result to the result dict + final_results[name] = result + + # to stdout + print("{} qps={:.2f}, mean={:.4f}, time={:.3f}{}, queries={}, tiles={}".format( + name, result["qps"], result["mean"], took, acc_str, + len(result_list), buckets_str)) + + print('======================================================================') + +######################################################################### + + +def main(): + print('======================================================================') + + global last_timeing + args = get_args() + + log.info(args) + + # Find backend + backend = get_backend(args.backend) + + # Load model to backend (Grigori moved here before dataset + # since we get various info about pre-processing from the model) + + print('') + print('Loading model ...') + print('') + + model = backend.load(args.model, inputs=args.inputs, outputs=args.outputs) + +# print (model.num_classes) +# print (model.image_size) + + # --count applies to accuracy mode only and can be used to limit the number of images + # for testing. + count_override = False + count = args.count + if count: + count_override = True + + # dataset to use + wanted_dataset, pre_proc, post_proc, kwargs = SUPPORTED_DATASETS[args.dataset] +# if args.use_preprocessed_dataset: +# pre_proc=None + + print('') + print('Loading dataset and preprocessing if needed ...') + print('* Dataset path: {}'.format(args.dataset_path)) + print('* Preprocessed cache path: {}'.format(args.cache_dir)) + print('') + + ds = wanted_dataset(data_path=args.dataset_path, + image_list=args.dataset_list, + name=args.dataset, + pre_process=pre_proc, + use_cache=args.cache, + count=count, + cache_dir=args.cache_dir, + preprocessed_dir=args.preprocessed_dir, + threads=args.threads, + model_config=model.config, # For ABTF + model_num_classes=model.num_classes, # For ABTF + model_image_size=model.image_size, # For ABTF + **kwargs) + + # For ABTF - maybe find cleaner way + post_proc.encoder = ds.encoder + + final_results = { + "runtime": model.name(), + "version": model.version(), + "time": int(time.time()), + "args": vars(args), + "cmdline": str(args), + } + + mlperf_conf = os.path.abspath(args.mlperf_conf) + if not os.path.exists(mlperf_conf): + log.error("{} not found".format(mlperf_conf)) + sys.exit(1) + + user_conf = os.path.abspath(args.user_conf) + if not os.path.exists(user_conf): + log.error("{} not found".format(user_conf)) + sys.exit(1) + + audit_config = os.path.abspath(args.audit_conf) + + if args.output: + output_dir = os.path.abspath(args.output) + os.makedirs(output_dir, exist_ok=True) + os.chdir(output_dir) + + # + # make one pass over the dataset to validate accuracy + # + count = ds.get_item_count() + + # warmup + if os.environ.get('CM_ABTF_ML_MODEL_SKIP_WARMUP', + '').strip().lower() != 'yes': + ds.load_query_samples([0]) + for _ in range(5): + img, _ = ds.get_samples([0]) + _ = backend.predict({backend.inputs[0]: img}) + ds.unload_query_samples(None) + + scenario = SCENARIO_MAP[args.scenario] + runner_map = { + lg.TestScenario.SingleStream: RunnerBase, + lg.TestScenario.MultiStream: QueueRunner, + lg.TestScenario.Server: QueueRunner, + lg.TestScenario.Offline: QueueRunner + } + + runner = runner_map[scenario]( + model, + ds, + args.threads, + post_proc=post_proc, + max_batchsize=args.max_batchsize) + + def issue_queries(query_samples): + runner.enqueue(query_samples) + + def flush_queries(): + pass + + log_output_settings = lg.LogOutputSettings() + log_output_settings.outdir = output_dir + log_output_settings.copy_summary_to_stdout = False + log_settings = lg.LogSettings() + log_settings.enable_trace = args.debug + log_settings.log_output = log_output_settings + + settings = lg.TestSettings() + settings.FromConfig(mlperf_conf, args.model_name, args.scenario) + settings.FromConfig(user_conf, args.model_name, args.scenario) + settings.scenario = scenario + settings.mode = lg.TestMode.PerformanceOnly + if args.accuracy: + settings.mode = lg.TestMode.AccuracyOnly + if args.find_peak_performance: + settings.mode = lg.TestMode.FindPeakPerformance + + if args.time: + # override the time we want to run + settings.min_duration_ms = args.time * MILLI_SEC + settings.max_duration_ms = args.time * MILLI_SEC + + if args.qps: + qps = float(args.qps) + settings.server_target_qps = qps + settings.offline_expected_qps = qps + + if count_override: + settings.min_query_count = count + settings.max_query_count = count + + if args.samples_per_query: + settings.multi_stream_samples_per_query = args.samples_per_query + + if args.max_latency: + settings.server_target_latency_ns = int(args.max_latency * NANO_SEC) + settings.multi_stream_expected_latency_ns = int( + args.max_latency * NANO_SEC) + + performance_sample_count = args.performance_sample_count if args.performance_sample_count else min( + count, 500) + sut = lg.ConstructSUT(issue_queries, flush_queries) + qsl = lg.ConstructQSL( + count, + performance_sample_count, + ds.load_query_samples, + ds.unload_query_samples) + + log.info("starting {}".format(scenario)) + result_dict = {"good": 0, "total": 0, "scenario": str(scenario)} + runner.start_run(result_dict, args.accuracy) + + lg.StartTestWithLogSettings(sut, qsl, settings, log_settings, audit_config) + + if not last_timeing: + last_timeing = runner.result_timing + if args.accuracy: + post_proc.finalize(result_dict, ds, output_dir=args.output) + + add_results(final_results, "{}".format(scenario), + result_dict, last_timeing, time.time() - ds.last_loaded, args.accuracy) + + runner.finish() + lg.DestroyQSL(qsl) + lg.DestroySUT(sut) + # + # write final results + # + if args.output: + with open("results.json", "w") as f: + json.dump(final_results, f, sort_keys=True, indent=4) + if args.accuracy: + print('Saving model output examples ...') + files = glob.glob( + os.path.join( + args.dataset_path, + '10002_Urban_Clear_Morning', + 'Cognata_Camera_01_8M_png', + '*.png')) + files = sorted(files) + for pred_batch in runner.proc_results: + for pred in pred_batch: + f = files[pred['id']] + cls_threshold = 0.3 + img = Image.open(f).convert("RGB") + loc, label, prob = np.array( + pred['boxes']), np.array( + pred['labels']), np.array( + pred['scores']) + best = np.argwhere(prob > cls_threshold).squeeze(axis=1) + + loc = loc[best] + label = label[best] + prob = prob[best] + + # Update input image with boxes and predictions + output_img = cv2.imread(f) + if len(loc) > 0: + + loc = loc.astype(np.int32) + + for box, lb, pr in zip(loc, label, prob): + category = cognata_labels.label_info[lb] + color = cognata_labels.colors[lb] + + xmin, ymin, xmax, ymax = box + + cv2.rectangle( + output_img, (xmin, ymin), (xmax, ymax), color, 2) + + text_size = cv2.getTextSize( + category + " : %.2f" % + pr, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] + + cv2.rectangle( + output_img, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) + + cv2.putText( + output_img, category + " : %.2f" % pr, + (xmin, ymin + + text_size[1] + + 4), cv2.FONT_HERSHEY_PLAIN, 1, + (255, 255, 255), 1) + output = "{}_prediction.jpg".format(f[:-4]) + + d1 = os.path.join(os.path.dirname(output), 'output') + if not os.path.isdir(d1): + os.makedirs(d1) + + d2 = os.path.basename(output) + + output = os.path.join(d1, d2) + cv2.imwrite(output, output_img) + with open("preds.json", "w") as f: + json.dump(runner.proc_results, f, indent=4) + + +if __name__ == "__main__": + main() diff --git a/script/app-mlperf-automotive-mlcommons-python/user.conf b/script/app-mlperf-automotive-mlcommons-python/user.conf new file mode 100644 index 000000000..edffe6912 --- /dev/null +++ b/script/app-mlperf-automotive-mlcommons-python/user.conf @@ -0,0 +1,6 @@ +# Please set these fields depending on the performance of your system to +# override default LoadGen settings. +*.SingleStream.target_latency = 10 +*.MultiStream.target_latency = 80 +*.Server.target_qps = 1.0 +*.Offline.target_qps = 1.0 diff --git a/script/app-mlperf-automotive/_cm.yaml b/script/app-mlperf-automotive/_cm.yaml new file mode 100644 index 000000000..cfb22101f --- /dev/null +++ b/script/app-mlperf-automotive/_cm.yaml @@ -0,0 +1,287 @@ +alias: app-mlperf-automotive +uid: f7488ce376484fd2 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: "Modular MLPerf inference benchmark pipeline for ABTF model" + + +# User-friendly tags to find this CM script +tags: +- app +- app-mlperf-inference +- mlperf-inference +- abtf-inference + +predeps: no + +# Default environment +default_env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_LOADGEN_SCENARIO: Offline + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + CM_TEST_QUERY_COUNT: '10' + CM_MLPERF_QUANTIZATION: off + CM_MLPERF_SUT_NAME_IMPLEMENTATION_PREFIX: reference + CM_MLPERF_SUT_NAME_RUN_CONFIG_SUFFIX: '' + + +# Map script inputs to environment variables +input_mapping: + device: CM_MLPERF_DEVICE + count: CM_MLPERF_LOADGEN_QUERY_COUNT + docker: CM_RUN_DOCKER_CONTAINER + hw_name: CM_HW_NAME + imagenet_path: IMAGENET_PATH + max_batchsize: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + mode: CM_MLPERF_LOADGEN_MODE + num_threads: CM_NUM_THREADS + threads: CM_NUM_THREADS + dataset: CM_MLPERF_VISION_DATASET_OPTION + model: CM_MLPERF_CUSTOM_MODEL_PATH + output_dir: OUTPUT_BASE_DIR + power: CM_MLPERF_POWER + power_server: CM_MLPERF_POWER_SERVER_ADDRESS + ntp_server: CM_MLPERF_POWER_NTP_SERVER + max_amps: CM_MLPERF_POWER_MAX_AMPS + max_volts: CM_MLPERF_POWER_MAX_VOLTS + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + scenario: CM_MLPERF_LOADGEN_SCENARIO + test_query_count: CM_TEST_QUERY_COUNT + clean: CM_MLPERF_CLEAN_SUBMISSION_DIR + dataset_args: CM_MLPERF_EXTRA_DATASET_ARGS + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + output: CM_MLPERF_OUTPUT_DIR + +# Env keys which are exposed to higher level scripts +new_env_keys: + - CM_MLPERF_* + - CM_OUTPUT_PREDICTIONS_PATH + +new_state_keys: + - cm-mlperf-inference-results* + +# Dependencies on other CM scripts +deps: + + # Detect host OS features + - tags: detect,os + + # Detect host CPU features + - tags: detect,cpu + + # Install system dependencies on a given host + - tags: get,sys-utils-cm + + # Detect/install python + - tags: get,python + names: + - python + - python3 + + # Use cmind inside CM scripts + - tags: get,generic-python-lib,_package.cmind + + - tags: get,mlperf,inference,utils + + +docker: + cm_repo: gateoverflow@cm4mlops + use_host_group_id: True + use_host_user_id: True + real_run: false + interactive: True + cm_repos: 'cm pull repo mlcommons@cm4abtf --checkout=poc' + deps: + - tags: get,abtf,scratch,space + mounts: + - "${{ CM_ABTF_SCRATCH_PATH_DATASETS }}:${{ CM_ABTF_SCRATCH_PATH_DATASETS }}" + + +# Variations to customize dependencies +variations: + + # Implementation + mlcommons-python: + group: implementation + default: true + env: + CM_MLPERF_PYTHON: 'yes' + CM_MLPERF_IMPLEMENTATION: reference + prehook_deps: + - names: + - python-reference-abtf-inference + - abtf-inference-implementation + tags: run-mlperf-inference,demo,abtf-model + skip_if_env: + CM_SKIP_RUN: + - yes + + + # Execution modes + fast: + group: execution-mode + env: + CM_FAST_FACTOR: '5' + CM_OUTPUT_FOLDER_NAME: fast_results + CM_MLPERF_RUN_STYLE: fast + + test: + group: execution-mode + default: true + env: + CM_OUTPUT_FOLDER_NAME: test_results + CM_MLPERF_RUN_STYLE: test + + valid: + group: execution-mode + env: + CM_OUTPUT_FOLDER_NAME: valid_results + CM_MLPERF_RUN_STYLE: valid + + + # ML engine + onnxruntime: + group: framework + env: + CM_MLPERF_BACKEND: onnxruntime + add_deps_recursive: + abtf-inference-implementation: + tags: _onnxruntime + + + onnxruntime,cpu: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + + onnxruntime,cuda: + env: + CM_MLPERF_BACKEND_VERSION: <<>> + ONNXRUNTIME_PREFERRED_EXECUTION_PROVIDER: "CUDAExecutionProvider" + + + pytorch: + group: framework + default: true + env: + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_BACKEND_VERSION: <<>> + add_deps_recursive: + abtf-inference-implementation: + tags: _pytorch + + + abtf-demo-model: + env: + CM_MODEL: retinanet + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-demo-model + + abtf-poc-model: + env: + CM_MODEL: retinanet + default: true + group: models + add_deps_recursive: + abtf-inference-implementation: + tags: _abtf-poc-model + docker: + deps: + - tags: get,dataset,raw,mlcommons-cognata,_abtf-poc + names: + - raw-dataset-mlcommons-cognata + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_IN_HOST: + - yes + + mounts: + - "${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}:${{ CM_DATASET_MLCOMMONS_COGNATA_PATH }}" + + + # Target devices + cpu: + group: device + default: true + env: + CM_MLPERF_DEVICE: cpu + CUDA_VISIBLE_DEVICES: '' + USE_CUDA: no + USE_GPU: no + add_deps_recursive: + abtf-inference-implementation: + tags: _cpu + + cuda: + group: device + env: + CM_MLPERF_DEVICE: gpu + USE_CUDA: yes + USE_GPU: yes + add_deps_recursive: + abtf-inference-implementation: + tags: _cuda + docker: + all_gpus: 'yes' + base_image: nvcr.io/nvidia/pytorch:24.03-py3 + + + + # Loadgen scenarios + offline: + env: + CM_MLPERF_LOADGEN_SCENARIO: Offline + add_deps_recursive: + abtf-inference-implementation: + tags: _offline + multistream: + env: + CM_MLPERF_LOADGEN_SCENARIO: MultiStream + add_deps_recursive: + abtf-inference-implementation: + tags: _multistream + singlestream: + group: loadgen-scenario + default: true + env: + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + add_deps_recursive: + abtf-inference-implementation: + tags: _singlestream + server: + env: + CM_MLPERF_LOADGEN_SCENARIO: Server + add_deps_recursive: + abtf-inference-implementation: + tags: _server + + mvp-demo: + env: + CM_ABTF_MVP_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' + + poc-demo: + env: + CM_ABTF_POC_DEMO: yes + CM_MLPERF_VISION_DATASET_OPTION: cognata-8mp-pt + CM_ABTF_ML_MODEL_CONFIG: baseline_8MP_ss_scales_fm1_5x5_all + CM_ABTF_NUM_CLASSES: 15 + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: 10002_Urban_Clear_Morning + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: Cognata_Camera_01_8M + CM_ABTF_ML_MODEL_TRAINING_FORCE_COGNATA_LABELS: 'yes' + CM_ABTF_ML_MODEL_SKIP_WARMUP: 'yes' diff --git a/script/app-mlperf-automotive/customize.py b/script/app-mlperf-automotive/customize.py new file mode 100644 index 000000000..070f2b3c1 --- /dev/null +++ b/script/app-mlperf-automotive/customize.py @@ -0,0 +1,103 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import mlperf_utils +from log_parser import MLPerfLog + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if 'cmd' in i['input']: + state['mlperf_inference_run_cmd'] = "cm run script " + \ + " ".join(i['input']['cmd']) + + state['mlperf-inference-implementation'] = {} + + run_state = i['run_script_input']['run_state'] + state['mlperf-inference-implementation']['script_id'] = run_state['script_id'] + \ + ":" + ",".join(run_state['script_variation_tags']) + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + inp = i['input'] + os_info = i['os_info'] + + xsep = '^' if os_info['platform'] == 'windows' else '\\' + + env['CMD'] = '' + + # if env.get('CM_MLPERF_USER_CONF', '') == '': + # return {'return': 0} + + output_dir = env['CM_MLPERF_OUTPUT_DIR'] + mode = env['CM_MLPERF_LOADGEN_MODE'] + + model = env['CM_MODEL'] + model_full_name = env.get('CM_ML_MODEL_FULL_NAME', model) + + scenario = env['CM_MLPERF_LOADGEN_SCENARIO'] + + if not os.path.exists(output_dir) or not os.path.exists( + os.path.join(output_dir, "mlperf_log_summary.txt")): + # No output, fake_run? + return {'return': 0} + + mlperf_log = MLPerfLog(os.path.join(output_dir, "mlperf_log_detail.txt")) + if mode == "performance": + result = mlperf_log['result_mean_latency_ns'] / 1000000 + elif mode == "accuracy": + if not env.get( + 'CM_COGNATA_ACCURACY_DUMP_FILE'): # can happen while reusing old runs + env['CM_COGNATA_ACCURACY_DUMP_FILE'] = os.path.join( + output_dir, "accuracy.txt") + acc = "" + if os.path.exists(env['CM_COGNATA_ACCURACY_DUMP_FILE']): + with open(env['CM_COGNATA_ACCURACY_DUMP_FILE'], "r") as f: + acc = f.readline() + result = acc + else: + return {'return': 1, 'error': f"Unknown mode {mode}"} + + valid = {'performance': True, 'accuracy': True} # its POC + power_result = None # No power measurement in POC + + # result, valid, power_result = mlperf_utils.get_result_from_log(env['CM_MLPERF_LAST_RELEASE'], model, scenario, output_dir, mode) + + if not state.get('cm-mlperf-inference-results'): + state['cm-mlperf-inference-results'] = {} + if not state.get('cm-mlperf-inference-results-last'): + state['cm-mlperf-inference-results-last'] = {} + if not state['cm-mlperf-inference-results'].get( + state['CM_SUT_CONFIG_NAME']): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ].get(model): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME']][model] = {} + if not state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model].get(scenario): + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario] = {} + + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode] = result + state['cm-mlperf-inference-results'][state['CM_SUT_CONFIG_NAME'] + ][model][scenario][mode + '_valid'] = valid.get(mode, False) + + state['cm-mlperf-inference-results-last'][mode] = result + state['cm-mlperf-inference-results-last'][mode + + '_valid'] = valid.get(mode, False) + + return {'return': 0} diff --git a/script/get-dataset-cognata-mlcommons/README-extra.md b/script/get-dataset-cognata-mlcommons/README-extra.md new file mode 100644 index 000000000..0bb16ad46 --- /dev/null +++ b/script/get-dataset-cognata-mlcommons/README-extra.md @@ -0,0 +1,62 @@ +Examples: + +### Check flags + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --help +``` + +### Import already downloaded dataset + +Note that this automation will attempt to install aria2 tool via sudo apt on Ubuntu. + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=${HOME}/datasets/cognata -j --private_url="{ADD PRIVATE URL FOR COGNATA} FOR FULL AUTOMATION" +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --import=D:\Work2\cognata -j +``` + +### Download dataset to CM cache + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata +``` + +### Find dataset in CM cache + +```bash +cm show cache --tags=dataset,mlcommons-cognata + +cm rm cache --tags=dataset,mlcommons-cognata +``` + +### Download dataset to some local directory + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=${HOME}/datasets/cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=%userprofile%\datasets\cognata -j +cm run script --tags=get,raw,dataset,mlcommons-cognata --path=D:\Work2\cognata-downloaded -j + +``` + +### Download subsets of this dataset + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +cm run script --tags=get,raw,dataset,mlcommons-cognata --serial_numbers=10002_Urban_Clear_Morning --group_names=Cognata_Camera_01_8M --file_names=Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip +``` + +Compact way to download the ABTF demo data set to the CM cache: + +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata,_abtf-demo +``` + +or to specific path +```bash +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=./cognata +cm run script --tags=get,raw,dataset,mlcommons-cognata _abtf-demo" --path=.\cognata +``` diff --git a/script/get-dataset-cognata-mlcommons/_cm.yaml b/script/get-dataset-cognata-mlcommons/_cm.yaml new file mode 100644 index 000000000..1b8155d7b --- /dev/null +++ b/script/get-dataset-cognata-mlcommons/_cm.yaml @@ -0,0 +1,161 @@ +# Written by Grigori Fursin + +alias: get-dataset-cognata-mlcommons +uid: 464ce21f2dce464e + +automation_alias: script +automation_uid: 5b4e0237da074764 + +developers: "Grigori Fursin, Radoyeh Shojaei" + +tags: +- get +- raw +- dataset +- cognata +- mlcommons-cognata +- ml-task--object-detection +- ml-task--image-segmentation + +min_cm_version: '2.2.0' + +private: true + +cache: false + +category: AI/ML datasets +category_sort: 8500 + + +input_mapping: + update: CM_DATASET_MLCOMMONS_COGNATA_UPDATE + import: CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH + private_url: CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL + serial_numbers: CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS + group_names: CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES + file_names: CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES + +env: + CM_DATASET: MLCOMMONS_COGNATA + CM_DATASET_MLCOMMONS_COGNATA_KEY1: "Dataset 1.0" + + + + +deps: +# Prepare dummy CM cache entry to manage dataset +- names: + - custom-cache-entry-mlcommons-cognata-dataset + tags: create,custom,cache,entry + extra_cache_tags: dataset,cognata,mlcommons-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'off' + env_key: DATASET_MLCOMMONS_COGNATA + # this script will prepare env CM_CUSTOM_CACHE_ENTRY_{env_key}_PATH + + +prehook_deps: +- names: + - gdrive-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - gdrive + tags: download,file,_gdown,_url.https://drive.google.com/drive/folders/1FS-qLbzB5htgMnfry6z4gx8J_ZH_7MsJ?usp=drive_link + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --folder" + CM_DOWNLOAD_FILENAME: 10002_Urban_Clear_Morning + CM_DOWNLOAD_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - rclone-downloader-cognata + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'no' + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: + - rclone + tags: download-and-extract,file,_extract,_rclone,_url.https://automotive.mlcommons-storage.org/Cognata_Dataset_PoC_Demo%2F10002_Urban_Clear_Morning.zip + env: + CM_RCLONE_COPY_USING: copyurl + CM_RCLONE_CONFIG_CMD: '' + CM_DOWNLOAD_CHECKSUM: '76389b05b0ee1e08d354d3c1b696b8c0' + CM_EXTRACT_EXTRACTED_CHECKSUM_FILE: "<<>>" + CM_DOWNLOAD_PATH: <<>> + CM_EXTRACT_PATH: <<>> + CM_EXTRACT_EXTRACTED_FILENAME: 10002_Urban_Clear_Morning + CM_DAE_FINAL_ENV_NAME: CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH + force_cache: true + extra_cache_tags: abtf,cognata,poc,dataset + +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Python package to read/write Excel files +- tags: get,generic-python-lib,_package.openpyxl + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + +# Tool to download large files +- tags: get,aria2 + skip_if_env: + CM_DATASET_MLCOMMONS_COGNATA_IMPORTED: + - 'yes' + enable_if_env: + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: + - 'on' + + +variations: + abtf-demo: + group: dataset-type + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "Cognata_Camera_01_8M_ann.zip;Cognata_Camera_01_8M_ann_laneline.zip;Cognata_Camera_01_8M.zip" + + abtf-poc: + group: dataset-type + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS: "10002_Urban_Clear_Morning" + CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES: "Cognata_Camera_01_8M" + CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES: "" + + rclone: + group: download-tool + default: true + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: rclone + + gdrive: + group: download-tool + env: + CM_DATASET_MLCOMMONS_COGNATA_DOWNLOAD_TOOL: gdrive + +new_env_keys: +- CM_DATASET_MLCOMMONS_COGNATA* + +print_env_at_the_end: + CM_DATASET_MLCOMMONS_COGNATA_PATH: Path to Cognata dataset diff --git a/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt b/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt new file mode 100644 index 000000000..b119faf77 --- /dev/null +++ b/script/get-dataset-cognata-mlcommons/checksums/cognata_poc.txt @@ -0,0 +1,41 @@ +9791a229f8fcd33de5db5ad5a4d8ca93 ./Cognata_Camera_01_8M_ann/0000001666.csv +eb03eead214fad6e9a8964b9b0fdb2a6 ./Cognata_Camera_01_8M_ann/0000003333.csv +72fa733da768e725cd562fd4472ada05 ./Cognata_Camera_01_8M_ann/0000005000.csv +5b30693ca5a339d68244ab5eaf1bb13a ./Cognata_Camera_01_8M_ann/0000006666.csv +644060ed86d5ac1ae58e25dc2762f294 ./Cognata_Camera_01_8M_ann/0000008333.csv +20cf4818e7b68f50101bf19614c36fee ./Cognata_Camera_01_8M_ann/0000010000.csv +056637bd394a898899445bd9d9d638c4 ./Cognata_Camera_01_8M_ann/0000011666.csv +5c092bdb26838c22e6c970b85838a8c6 ./Cognata_Camera_01_8M_ann/0000013333.csv +58c0b653fc17e74d590e6a8448f37f20 ./Cognata_Camera_01_8M_ann/0000015000.csv +866628a72aeda956ee2c994a06efd67e ./Cognata_Camera_01_8M_ann/0000016666.csv +20181f8d8fb36616974355016292807c ./Cognata_Camera_01_8M_ann/0000018333.csv +0433e6949bea924a6329c17ca9125971 ./Cognata_Camera_01_8M_ann/0000020000.csv +eb910004272cab64fc41a0dfdf521ca8 ./Cognata_Camera_01_8M_ann/0000021666.csv +9c26c4a7360f3aad89bfea862d1bac93 ./Cognata_Camera_01_8M_ann/0000023333.csv +2f4a7ea573a26a59cb740a4072a8fe71 ./Cognata_Camera_01_8M_ann/0000025000.csv +398b5768046964d141d555ad313f9f47 ./Cognata_Camera_01_8M_ann/0000026666.csv +8c0c7fcef25efb87c041d785ee0d87a6 ./Cognata_Camera_01_8M_ann/0000028333.csv +43bf64024584aa1ca42738517a347599 ./Cognata_Camera_01_8M_ann/0000030000.csv +545dadaafca21841fcfd78404e7da7ba ./Cognata_Camera_01_8M_ann/0000031666.csv +77357f1b417fcd548be949a8b8d9131a ./Cognata_Camera_01_8M_ann/0000033333.csv +6b7de80e2b6114645c1a039761a0422b ./Cognata_Camera_01_8M_png/0000001666.png +776e04bb64d2d782012b6923bec62ae6 ./Cognata_Camera_01_8M_png/0000003333.png +aad25fa016258b71490299c53d588f32 ./Cognata_Camera_01_8M_png/0000005000.png +b7acf5249e09817f8d82469737c016bd ./Cognata_Camera_01_8M_png/0000006666.png +6bf8031926f4b3d6b82b30e7055855e5 ./Cognata_Camera_01_8M_png/0000008333.png +c3248feec77175811e8a85e6c88d5424 ./Cognata_Camera_01_8M_png/0000010000.png +ae1f50d358940d335ae89193bd78aca8 ./Cognata_Camera_01_8M_png/0000011666.png +1b98c42fd12819d14e980b72518ddb88 ./Cognata_Camera_01_8M_png/0000013333.png +b3d9b5a2fc5abffbebb5b63b2e1cce0a ./Cognata_Camera_01_8M_png/0000015000.png +3af5f660ed930d853a048a10a715104a ./Cognata_Camera_01_8M_png/0000016666.png +a52ae7aba6c56d6ef0d4d29f4a8267cb ./Cognata_Camera_01_8M_png/0000018333.png +9388222ca6e65beae42cf4a2f4b1d020 ./Cognata_Camera_01_8M_png/0000020000.png +a5d4d3312f5592d64b57b69a0a0edcea ./Cognata_Camera_01_8M_png/0000021666.png +35af823a8177abef9b72846a93063695 ./Cognata_Camera_01_8M_png/0000023333.png +75eb93f99c36135a16df23612c5802d2 ./Cognata_Camera_01_8M_png/0000025000.png +d9a86566fbcaead7f4a43659723014e7 ./Cognata_Camera_01_8M_png/0000026666.png +0ae5d8933fdb9a86e01e9192d2210340 ./Cognata_Camera_01_8M_png/0000028333.png +8b3775e07f902bb3644f7b1eb9de5a69 ./Cognata_Camera_01_8M_png/0000030000.png +5d2584af6566683784e78f7c71968fa7 ./Cognata_Camera_01_8M_png/0000031666.png +ed9f9570448a8fd3af8540169f0df6df ./Cognata_Camera_01_8M_png/0000033333.png +d17ff83e2bbbf012a54ecac2491144a7 ./demo_files.txt diff --git a/script/get-dataset-cognata-mlcommons/customize.py b/script/get-dataset-cognata-mlcommons/customize.py new file mode 100644 index 000000000..d4791c60a --- /dev/null +++ b/script/get-dataset-cognata-mlcommons/customize.py @@ -0,0 +1,449 @@ +from cmind import utils +import os +import json + + +def preprocess(i): + + env = i['env'] + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + cfg = utils.safe_load_json(cm_cache_dataset_path, 'cfg.json')['meta'] + if cfg.get('imported', False): + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + + if env.get('CM_ABTF_SCRATCH_PATH_DATASETS', '') != '': + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata") + env['CM_ABTF_SCRATCH_PATH_DATASET_COGNATA_TMP'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH_DATASETS'], "cognata_tmp") + + env['CM_DATASET_COGNATA_POC_TEXT_MD5_FILE_PATH'] = os.path.join( + i['run_script_input']['path'], 'checksums', 'cognata_poc.txt') + + # Check if user requests path not in CM cache + # + # --path (env CM_TMP_PATH) shows where to store Cognata data set instead of CM cahe + # --import tells CM to import existing Cognata from a given path and skip further download/processing + # + import_path = env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_IMPORT_PATH', + '').strip() + if import_path != '': + if not os.path.isdir(import_path): + return {'return': 1, 'error': 'directory to import this dataset doesn\'t exist: {}'.format( + import_path)} + + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'yes' + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = import_path + + else: + path = env.get('CM_TMP_PATH', '') + if path != '': + env['CM_DATASET_MLCOMMONS_COGNATA_IMPORTED'] = 'no' + + if not os.path.isdir(path): + os.makedirs(path) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = path + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + automation = i['automation'] + cm = automation.cmind + + cur_dir = os.getcwd() + + quiet = (env.get('CM_QUIET', False) == 'yes') + + cm_cache_dataset_path = env.get( + 'CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH', '').strip() + + if not os.path.isdir(cm_cache_dataset_path): + return { + 'return': 1, 'error': 'Dataset corrupted - CM cache path not found: {}'.format(cm_cache_dataset_path)} + + if env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '') == '': + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = os.path.dirname( + env['CM_CUSTOM_CACHE_ENTRY_DATASET_MLCOMMONS_COGNATA_PATH']) + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + return {'return': 0} + + cm_cache_dataset_cfg_file = os.path.join(cm_cache_dataset_path, 'cfg.json') + env['CM_DATASET_MLCOMMONS_COGNATA_CFG_FILE'] = cm_cache_dataset_cfg_file + + cfg = utils.safe_load_json('', cm_cache_dataset_cfg_file)['meta'] + + dataset_path = cfg.get('real_path', '') + dataset_path_requested = env.get('CM_DATASET_MLCOMMONS_COGNATA_PATH', '') + if dataset_path == '': + if dataset_path_requested != '': + dataset_path = dataset_path_requested + else: + dataset_path = os.path.join(cm_cache_dataset_path, 'cognata') + else: + if dataset_path_requested != '': + dataset_path = dataset_path_requested + + cfg['real_path'] = dataset_path + + print('') + print('Used dataset path: {}'.format(dataset_path)) + + env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] = dataset_path + + # If imported, don't process further + if env.get('CM_DATASET_MLCOMMONS_COGNATA_IMPORTED', '') == 'yes': + cfg['imported'] = True + else: + cfg['imported'] = False + + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + if cfg.get('imported', False): + return {'return': 0} + + # If processed once, don't process unless forced + if cfg.get('processed', False): + if not utils.check_if_true_yes_on( + env, 'CM_DATASET_MLCOMMONS_COGNATA_UPDATE'): + print('') + print('Already processed: use --update to update this dataset') + + return {'return': 0} + + # First level dir + dataset_path1 = dataset_path + + if not os.path.isdir(dataset_path1): + os.makedirs(dataset_path1) + + # Check if has license and download URL + dataset_path_secret = os.path.join(dataset_path1, 'secret.json') + + first_url = '' + dataset_meta = {} + + if os.path.isfile(dataset_path_secret): + r = utils.load_json(dataset_path_secret) + if r['return'] > 0: + return r + + dataset_meta = r['meta'] + + first_url = dataset_meta.get('first_url', '').strip() + + if first_url == '': + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_PRIVATE_URL', '').strip() + if x != '': + first_url = x + else: + print('') + first_url = input( + 'Please register at https://mlcommons.org/datasets/cognata and enter private URL: ') + + first_url = first_url.strip() + + if first_url == '': + return {'return': 1, + 'error': 'Private MLCommons Cognata URL was not provided'} + + dataset_meta['first_url'] = first_url + + with open(dataset_path_secret, 'w') as f: + f.write(json.dumps(dataset_meta, indent=2) + '\n') + + ########################################################################## + # Check if first.xlsx exists + file_first_xlsx = 'first.xlsx' + first_xlsx = os.path.join(dataset_path1, file_first_xlsx) + + if not os.path.isfile(first_xlsx): + # Attempting to download file + first_url_export, dummy = google_url_for_export(first_url) + + if first_url_export == '': + return { + 'return': 1, 'error': 'can\'t parse URL for export: {}'.format(first_url)} + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': first_url_export, + 'output_file': file_first_xlsx, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + if not os.path.isfile(first_xlsx): + return {'return': 1, + 'error': 'File {} was not downloaded'.format(first_xlsx)} + + ########################################################################## + # Parse XLSX and check serial number + serial_numbers = [] + for s in env.get( + 'CM_DATASET_MLCOMMONS_COGNATA_SERIAL_NUMBERS', '').strip().split(','): + s = s.strip() + if s != '' and s not in serial_numbers: + serial_numbers.append(s) + + dataset_key = env['CM_DATASET_MLCOMMONS_COGNATA_KEY1'] + url_key = 'Link to Excel File (Download Links)' + serial_key = 'Serial Number' + + r = process_xlsx( + first_xlsx, + dataset_key, + url_key, + serial_key, + serial_numbers) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_first_json = 'first.json' + first_json = os.path.join(dataset_path1, file_first_json) + + if not os.path.isfile(first_json): + with open(first_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sets found'} + + ########################################################################## + print('') + print('Available or selected serial numbers (use --serial_numbers=a,b,c to download specific subsets):') + print('') + for d in data: + s = d[serial_key] + print(s) + + for d in data: + url = d[url_key] + url_export, dummy = google_url_for_export(url) + + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + if not os.path.isdir(dataset_path3): + os.makedirs(dataset_path3) + + if not os.path.isfile(dataset_path2): + + print('') + print('Downloading {} ...'.format(url_export)) + + r = cm.access({'action': 'run', + 'automation': 'script', + 'tags': 'download,file,_wget', + 'verify': 'no', + 'url': url_export, + 'output_file': serial_file, + 'store': dataset_path1}) + if r['return'] > 0: + return r + + ########################################################################## + print('') + print('Processing subsets ...') + + group_names = [] + for s in env.get('CM_DATASET_MLCOMMONS_COGNATA_GROUP_NAMES', + '').strip().split(','): + s = s.strip() + if s != '' and s not in group_names: + group_names.append(s) + + # Check if force some filenames + x = env.get('CM_DATASET_MLCOMMONS_COGNATA_FILE_NAMES', '').strip() + file_names = [] + if x != '': + file_names = x.strip(';') if ';' in x else [x] + + for d in data: + serial_file = d[serial_key] + '.xlsx' + + dataset_path2 = os.path.join(dataset_path1, serial_file) + dataset_path3 = os.path.join(dataset_path1, d[serial_key]) + + print('') + print('Processing {} ...'.format(serial_file)) + + dataset_key = 'File_Data' + url_key = 'File_Link' + serial_key = 'Group_Name' + + r = process_xlsx( + dataset_path2, + dataset_key, + url_key, + serial_key, + group_names) + if r['return'] > 0: + return r + + headers = r['headers'] + data = r['data'] + all_data = r['all_data'] + + if len(all_data) != 0: + file_all_json = 'all.json' + all_json = os.path.join(dataset_path3, file_all_json) + + if not os.path.isfile(all_json): + with open(all_json, 'w') as f: + f.write(json.dumps(all_data, indent=2) + '\n') + + if len(data) == 0: + return {'return': 0, 'error': 'no sub-sets found'} + + for d in data: + file_name = d['File_Name'] + + if len(file_names) > 0 and file_name not in file_names: + continue + + file_name_with_path = os.path.join(dataset_path3, file_name) + file_name_with_path_done = os.path.join( + dataset_path3, file_name) + '.done' + + url = d[url_key] + + print('') + print('Downloading {} ...'.format(file_name)) + + if os.path.isfile(file_name_with_path_done): + print('') + print(' Already processed - skipping ...') + continue + + if os.name == 'nt': + aria2_tool = env['CM_ARIA2_BIN_WITH_PATH'] + else: + aria2_tool = 'aria2c' + + cmd = aria2_tool + \ + ' --async-dns=false -x15 -s15 "{}" --dir "{}" -o "{}"'.format( + url, dataset_path3, file_name) + + print('') + print(cmd) + print('') + + os.system(cmd) + + # Unarchive + print('') + print('Extracting file {} ...'.format(file_name_with_path)) + print('') + + if file_name.endswith('.zip'): + + import zipfile + extractor = zipfile.ZipFile(file_name_with_path, "r") + + elif file_name.endswith('.tar'): + + import tarfile + extractor = tarfile.ZipFile(file_name_with_path, "r") + + else: + extractor = None + + if extractor is not None: + + try: + extractor.extractall(dataset_path3) + extractor.close() + + except Exception as e: + return {'return': 1, + 'error': 'extracting failed: {}'.format(e)} + + # Mark as downloaded + with open(file_name_with_path_done, 'w') as f: + f.write('DONE\n') + + # Remove file + os.remove(file_name_with_path) + + print('') + + # Mark that processed this dataset once correctly + cfg['processed'] = True + utils.save_json(cm_cache_dataset_cfg_file, cfg) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_DATASET_MLCOMMONS_COGNATA_PATH'] + + return {'return': 0} + + +# Prepare Google URL for export +def google_url_for_export(url): + url2 = '' + + j = url.rfind('/') + + if j > 0: + url = url[:j + 1] + url2 = url + 'export' + + return (url2, url) + +# Download Cognata XLSX + + +def process_xlsx(filename, dataset_key, url_key, serial_key, serial_numbers): + import openpyxl + + ex = openpyxl.load_workbook(filename) + + sets = ex[dataset_key] + + headers = {} + + data = [] + all_data = [] + + for row in sets.iter_rows(values_only=True): + lrow = list(row) + + if len(headers) == 0: + for j in range(0, len(lrow)): + headers[j] = str(lrow[j]).strip() + else: + xrow = {} + + for j in range(0, len(lrow)): + xrow[headers[j]] = lrow[j] + + url = str(xrow.get(url_key, '')) + if 'https' in url: + all_data.append(xrow) + + if len(serial_numbers) > 0: + serial_number = xrow.get(serial_key, '') + + if serial_number not in serial_numbers: + continue + + if url != '': + data.append(xrow) + + return {'return': 0, 'headers': headers, + 'data': data, 'all_data': all_data} diff --git a/script/get-ml-model-abtf-ssd-pytorch/README-extra.md b/script/get-ml-model-abtf-ssd-pytorch/README-extra.md new file mode 100644 index 000000000..e08259617 --- /dev/null +++ b/script/get-ml-model-abtf-ssd-pytorch/README-extra.md @@ -0,0 +1,5 @@ +# Example to import local model + +```bash +cm run script --tags=get,ml-model,abtf-ssd-pytorch,_local.test_8mp.pth +``` diff --git a/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml b/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml new file mode 100644 index 000000000..b346288d2 --- /dev/null +++ b/script/get-ml-model-abtf-ssd-pytorch/_cm.yaml @@ -0,0 +1,174 @@ +# Written by Grigori Fursin + +alias: get-ml-model-abtf-ssd-pytorch +uid: b3750a4b222a485d + +automation_alias: script +automation_uid: 5b4e0237da074764 + +cache: true + +category: AI/ML models + +private: true + + +tags: +- get +- ml-model +- abtf-ssd-pytorch +- cmc + + +input_mapping: + model_code_git_url: CM_ABTF_MODEL_CODE_GIT_URL + model_code_git_branch: CM_ABTF_MODEL_CODE_GIT_BRANCH + + +default_env: + CM_ABTF_MODEL_CODE_GIT_URL: https://github.com/mlcommons/abtf-ssd-pytorch + CM_ABTF_MODEL_CODE_GIT_BRANCH: cognata + + +deps: + +- tags: detect,os + +- tags: get,git,repo + names: + - abtf-ssd-pytorch-git-repo + - abtf-ml-model-code-git-repo + skip_if_env: + CM_SKIP_MODEL_CODE_DOWNLOAD: + - 'yes' + env: + CM_GIT_AUTH: 'yes' + CM_GIT_CHECKOUT_PATH_ENV_NAME: CM_ABTF_SSD_PYTORCH + extra_cache_tags: abtf,ssd,pytorch,ml-model,cmc + update_tags_from_env_with_prefix: + _repo.: + - CM_ABTF_MODEL_CODE_GIT_URL + _branch.: + - CM_ABTF_MODEL_CODE_GIT_BRANCH + + +- tags: download,file + env: + CM_DOWNLOAD_CHECKSUM: <<>> + CM_DOWNLOAD_FINAL_ENV_NAME: CM_ML_MODEL_FILE_WITH_PATH + CM_DOWNLOAD_FILENAME: <<>> + CM_VERIFY_SSL: 'no' + force_cache: true + names: + - abtf-ml-model-weights + - abtf-ml-model-weights-download + skip_if_env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: + - 'yes' + update_tags_from_env_with_prefix: + _url.: + - CM_ML_MODEL_URL + + + +new_env_keys: +- CM_ML_MODEL_* + +print_env_at_the_end: + CM_ML_MODEL_FILE_WITH_PATH: Path to the ML model weights + CM_ML_MODEL_CODE_WITH_PATH: Path to the ML model code + + +variations: + e01: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 31d177228308bbe43917c912b01c2d67 + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e1.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/7nqt5z8gplgeaveo933eo/SSD_e1.pth?rlkey=7lyb4qs2hzg491bfprwcuvx54&dl=0 + group: model-weights + + e65: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: f769eb0321ac7fc1c16f982db6131d2f + CM_ML_MODEL_DATASET: coco + CM_ML_MODEL_FILENAME: SSD_e65.pth + CM_ML_MODEL_IMAGE_HEIGHT: '300' + CM_ML_MODEL_IMAGE_WIDTH: '300' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/wkegl2qxvm8cefbqq00o3/SSD_e65.pth?rlkey=ez26jafjdcly665npl6pdqxl8&dl=0 + group: model-weights + + abtf-mvp: + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 1ab66f523715f9564603626e94e59c8c + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + CM_ML_MODEL_URL: https://www.dropbox.com/scl/fi/9un2i2169rgebui4xklnm/baseline_8MP_ss_scales_all_ep60.pth?rlkey=sez3dnjep4waa09s5uy4r3wmk&st=z859czgk&dl=0 + group: model-weights + + abtf-poc: + default_variations: + download-tool: rclone + env: + CM_ML_MODEL: abtf-ssd-pytorch + CM_ML_MODEL_CHECKSUM: 26845c3b9573ce115ef29dca4ae5be14 + CM_ML_MODEL_DATASET: cognata + CM_ML_MODEL_FILENAME: baseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_ML_MODEL_IMAGE_SIZE: '8M' + group: model-weights + + abtf-poc,gdrive: + env: + CM_ML_MODEL_URL: https://drive.google.com/file/d/1kfJR_bs54KONprVd51kZu0PYmmh1wZZa/view + + abtf-poc,rclone: + env: + CM_RCLONE_COPY_USING: copyurl + CM_ML_MODEL_URL: https://automotive.mlcommons-storage.org/SSD_ResNet50%2Fbaseline_8MP_ss_scales_fm1_5x5_all_ep60.pth + CM_RCLONE_CONFIG_CMD: '' + + + local.#: + env: + CM_ML_MODEL_FILENAME: '#' + CM_ML_MODEL_LOCAL: 'yes' + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_weights: + default: true + env: + CM_SKIP_MODEL_WEIGHTS_DOWNLOAD: 'yes' + group: model-weights + + skip_code: + env: + CM_SKIP_MODEL_CODE_DOWNLOAD: 'yes' + + rclone: + group: download-tool + env: + CM_RCLONE_COPY_USING: copyurl + adr: + abtf-ml-model-weights-download: + tags: _rclone + + wget: + group: download-tool + default: true + adr: + abtf-ml-model-weights-download: + tags: _wget + gdown: + group: download-tool + env: + CM_DOWNLOAD_EXTRA_OPTIONS: " --fuzzy" + adr: + abtf-ml-model-weights-download: + tags: _gdown diff --git a/script/get-ml-model-abtf-ssd-pytorch/customize.py b/script/get-ml-model-abtf-ssd-pytorch/customize.py new file mode 100644 index 000000000..10a3a7f83 --- /dev/null +++ b/script/get-ml-model-abtf-ssd-pytorch/customize.py @@ -0,0 +1,49 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + if env.get('CM_ML_MODEL_LOCAL', '') == 'yes': + ml_model = env.get('CM_ML_MODEL_FILENAME', '') + if ml_model == '': + return {'return': 1, 'error': '_local.{model name.pth} is not specified'} + + if not os.path.isabs(ml_model): + ml_model = os.path.join( + env.get( + 'CM_TMP_CURRENT_PATH', + ''), + ml_model) + + if not os.path.isfile(ml_model): + return {'return': 1, + 'error': 'ML model {} is not found'.format(ml_model)} + + env['CM_ML_MODEL_FILE_WITH_PATH'] = ml_model + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + if env.get('CM_ML_MODEL_FILE_WITH_PATH', '') == '': + env['CM_ML_MODEL_FILE_WITH_PATH'] = 'model-weights-skipped' + + env['CM_ML_MODEL_FILE'] = os.path.basename( + env['CM_ML_MODEL_FILE_WITH_PATH']) + + if env.get('CM_ABTF_SSD_PYTORCH', '') == '': + env['CM_ABTF_SSD_PYTORCH'] = 'model-code-skipped' + + env['CM_ML_MODEL_CODE_WITH_PATH'] = env['CM_ABTF_SSD_PYTORCH'] + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_FILE_WITH_PATH'] + + return {'return': 0} diff --git a/script/get-mlperf-automotive-scratch-space/_cm.yaml b/script/get-mlperf-automotive-scratch-space/_cm.yaml new file mode 100644 index 000000000..cefe6da4c --- /dev/null +++ b/script/get-mlperf-automotive-scratch-space/_cm.yaml @@ -0,0 +1,39 @@ +alias: get-mlperf-automotive-scratch-space +automation_alias: script +automation_uid: 5b4e0237da074764 +cache: true +category: MLPerf benchmark support +deps: [] +docker: + run: false +input_description: {} +input_mapping: + scratch_path: CM_ABTF_SCRATCH_PATH +new_env_keys: +- CM_ABTF_SCRATCH_PATH +- CM_ABTF_SCRATCH_PATH_MODELS +- CM_ABTF_SCRATCH_PATH_DATASETS +- CM_ABTF_SCRATCH_VERSION +new_state_keys: [] +post_deps: [] +posthook_deps: [] +prehook_deps: [] +tags: +- get +- abtf +- inference +- scratch +- space +uid: c384b7604e5c47d5 +variations: + version.#: + env: + CM_ABTF_SCRATCH_VERSION: '#' + group: version + version.4_0: + default: true + env: + CM_ABTF_SCRATCH_VERSION: '4_0' + group: version +versions: {} + diff --git a/script/get-mlperf-automotive-scratch-space/customize.py b/script/get-mlperf-automotive-scratch-space/customize.py new file mode 100644 index 000000000..057acd2c4 --- /dev/null +++ b/script/get-mlperf-automotive-scratch-space/customize.py @@ -0,0 +1,40 @@ +from cmind import utils +import os + + +def preprocess(i): + + os_info = i['os_info'] + + env = i['env'] + + meta = i['meta'] + + automation = i['automation'] + + quiet = (env.get('CM_QUIET', False) == 'yes') + + if env.get('CM_ABTF_SCRATCH_PATH', '') == '': + env['CM_ABTF_SCRATCH_PATH'] = os.getcwd() + + return {'return': 0} + + +def postprocess(i): + + env = i['env'] + + env['CM_ABTF_SCRATCH_PATH_MODELS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "models") + env['CM_ABTF_SCRATCH_PATH_DATASETS'] = os.path.join( + env['CM_ABTF_SCRATCH_PATH'], "datasets") + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_MODELS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_MODELS']) + + if not os.path.exists(env['CM_ABTF_SCRATCH_PATH_DATASETS']): + os.makedirs(env['CM_ABTF_SCRATCH_PATH_DATASETS']) + + env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ABTF_SCRATCH_PATH'] + + return {'return': 0} diff --git a/script/get-mlperf-automotive-scratch-space/run.bat b/script/get-mlperf-automotive-scratch-space/run.bat new file mode 100644 index 000000000..648302ca7 --- /dev/null +++ b/script/get-mlperf-automotive-scratch-space/run.bat @@ -0,0 +1 @@ +rem native script diff --git a/script/get-mlperf-automotive-scratch-space/run.sh b/script/get-mlperf-automotive-scratch-space/run.sh new file mode 100644 index 000000000..3a584c10c --- /dev/null +++ b/script/get-mlperf-automotive-scratch-space/run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +#CM Script location: ${CM_TMP_CURRENT_SCRIPT_PATH} + +#To export any variable +#echo "VARIABLE_NAME=VARIABLE_VALUE" >>tmp-run-env.out + +#${CM_PYTHON_BIN_WITH_PATH} contains the path to python binary if "get,python" is added as a dependency + + + +function exit_if_error() { + test $? -eq 0 || exit $? +} + +function run() { + echo "Running: " + echo "$1" + echo "" + if [[ ${CM_FAKE_RUN} != 'yes' ]]; then + eval "$1" + exit_if_error + fi +} + +#Add your run commands here... +# run "$CM_RUN_CMD" diff --git a/script/run-mlperf-automotive-app/_cm.yaml b/script/run-mlperf-automotive-app/_cm.yaml new file mode 100644 index 000000000..942f499e8 --- /dev/null +++ b/script/run-mlperf-automotive-app/_cm.yaml @@ -0,0 +1,248 @@ +alias: run-mlperf-automotive-app +uid: 2a7315d2dff74898 + +automation_alias: script +automation_uid: 5b4e0237da074764 + +category: Modular MLPerf inference benchmark pipeline + +developers: "[Arjun Suresh](https://www.linkedin.com/in/arjunsuresh), [Grigori Fursin](https://cKnowledge.org/gfursin)" + + +clean_output_files: +- open.tar.gz +- summary.csv +- summary.json + +tags: +- run +- run-abtf +- run-abtf-inference +- mlcommons +- inference +- reference + +tags_help: "run-abtf,inference" + +default_env: + CM_MLPERF_IMPLEMENTATION: reference + CM_MLPERF_MODEL: retinanet + CM_MLPERF_RUN_STYLE: test + +input_mapping: + backend: CM_MLPERF_BACKEND + clean: CM_MLPERF_CLEAN_ALL + compliance: CM_MLPERF_LOADGEN_COMPLIANCE + dashboard_wb_project: CM_MLPERF_DASHBOARD_WANDB_PROJECT + dashboard_wb_user: CM_MLPERF_DASHBOARD_WANDB_USER + debug: CM_DEBUG_SCRIPT_BENCHMARK_PROGRAM + device: CM_MLPERF_DEVICE + division: CM_MLPERF_SUBMISSION_DIVISION + docker: CM_MLPERF_USE_DOCKER + dump_version_info: CM_DUMP_VERSION_INFO + save_console_log: CM_SAVE_CONSOLE_LOG + execution_mode: CM_MLPERF_RUN_STYLE + find_performance: CM_MLPERF_FIND_PERFORMANCE_MODE + gh_token: CM_GH_TOKEN + gpu_name: CM_NVIDIA_GPU_NAME + hw_name: CM_HW_NAME + hw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + imagenet_path: IMAGENET_PATH + implementation: CM_MLPERF_IMPLEMENTATION + lang: CM_MLPERF_IMPLEMENTATION + mode: CM_MLPERF_LOADGEN_MODE + model: CM_MLPERF_MODEL + multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY + offline_target_qps: CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS + output_dir: OUTPUT_BASE_DIR + output_summary: MLPERF_INFERENCE_SUBMISSION_SUMMARY + output_tar: MLPERF_INFERENCE_SUBMISSION_TAR_FILE + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT + power: CM_SYSTEM_POWER + precision: CM_MLPERF_MODEL_PRECISION + preprocess_submission: CM_RUN_MLPERF_SUBMISSION_PREPROCESSOR + push_to_github: CM_MLPERF_RESULT_PUSH_TO_GITHUB + readme: CM_MLPERF_README + regenerate_accuracy_file: CM_MLPERF_REGENERATE_ACCURACY_FILE + regenerate_files: CM_REGENERATE_MEASURE_FILES + rerun: CM_RERUN + results_dir: OUTPUT_BASE_DIR + results_git_url: CM_MLPERF_RESULTS_GIT_REPO_URL + run_checker: CM_RUN_SUBMISSION_CHECKER + run_style: CM_MLPERF_RUN_STYLE + scenario: CM_MLPERF_LOADGEN_SCENARIO + server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS + singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY + skip_submission_generation: CM_MLPERF_SKIP_SUBMISSION_GENERATION + skip_truncation: CM_SKIP_TRUNCATE_ACCURACY + submission_dir: CM_MLPERF_INFERENCE_SUBMISSION_DIR + submitter: CM_MLPERF_SUBMITTER + sut_servers: CM_NETWORK_LOADGEN_SUT_SERVERS + sw_notes_extra: CM_MLPERF_SUT_SW_NOTES_EXTRA + system_type: CM_MLPERF_SUBMISSION_SYSTEM_TYPE + target_latency: CM_MLPERF_LOADGEN_TARGET_LATENCY + target_qps: CM_MLPERF_LOADGEN_TARGET_QPS + test_query_count: CM_TEST_QUERY_COUNT + threads: CM_NUM_THREADS + batch_size: CM_MLPERF_LOADGEN_MAX_BATCHSIZE + sut: CM_MLPERF_INFERENCE_SUT_VARIATION + +new_state_keys: +- app_mlperf_inference_* +- cm-mlperf-inference-results* + +deps: +- tags: detect,os + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: detect,cpu + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - python + - python3 + tags: get,python3 + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- names: + - inference-src + tags: get,mlcommons,inference,src + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] +- tags: get,sut,description + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +- tags: get,mlperf,inference,results,dir + names: + - get-mlperf-inference-results-dir + enable_if_env: + CM_MLPERF_USE_DOCKER: [ off ] + skip_if_env: + OUTPUT_BASE_DIR: [ on ] +- tags: install,pip-package,for-cmind-python,_package.tabulate +- tags: get,mlperf,inference,utils + skip_if_env: + CM_MLPERF_USE_DOCKER: [ on ] + +variations: + accuracy-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: accuracy + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + all-modes: + env: + CM_MLPERF_LOADGEN_ALL_MODES: 'yes' + group: mode + + all-scenarios: + env: + CM_MLPERF_LOADGEN_ALL_SCENARIOS: 'yes' + + compliance: + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + + dashboard: + default_gui: false + env: + CM_MLPERF_DASHBOARD: 'on' + + find-performance: + env: + CM_MLPERF_FIND_PERFORMANCE_MODE: 'yes' + CM_MLPERF_LOADGEN_ALL_MODES: 'no' + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_RESULT_PUSH_TO_GITHUB: false + group: submission-generation + + full: + add_deps_recursive: + coco2014-original: + tags: _full + coco2014-preprocessed: + tags: _full + env: + CM_MLPERF_SUBMISSION_GENERATION_STYLE: full + CM_MLPERF_SKIP_SUBMISSION_GENERATION: 'yes' + group: submission-generation-style + + performance-only: + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_MODE: performance + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_SUBMISSION_CHECKER: 'no' + group: submission-generation + + mvp-demo: + default_env: + CM_MLPERF_DEVICE: cpu + + env: + CM_MLPERF_INFERENCE_VERSION: mvp-demo + CM_MLPERF_MODEL: abtf-demo-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: mvp-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + poc-demo: + default_env: + CM_MLPERF_DEVICE: cpu + CM_TEST_QUERY_COUNT: "20" + + env: + CM_MLPERF_INFERENCE_VERSION: poc-demo + CM_MLPERF_MODEL: abtf-poc-model + CM_MLPERF_BACKEND: pytorch + CM_MLPERF_IMPLEMENTATION: mlcommons-python + CM_MLPERF_LOADGEN_SCENARIO: SingleStream + CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS: poc-demo + adr: + compiler: + tags: gcc + group: benchmark-version + + performance-and-accuracy: + default: true + base: + - all-modes + default_variations: + submission-generation-style: full + group: submission-generation + + submission: + base: + - all-modes + default_gui: true + default_variations: + submission-generation-style: full + env: + CM_MLPERF_LOADGEN_COMPLIANCE: 'yes' + CM_MLPERF_SUBMISSION_RUN: 'yes' + CM_RUN_MLPERF_ACCURACY: 'on' + CM_RUN_SUBMISSION_CHECKER: 'yes' + CM_TAR_SUBMISSION_DIR: 'yes' + group: submission-generation + post_deps: + - names: + - submission-generator + enable_if_env: + CM_MLPERF_SKIP_SUBMISSION_GENERATION: + - 'no' + - 'false' + - 'False' + - '0' + tags: generate,mlperf,inference,submission diff --git a/script/run-mlperf-automotive-app/customize.py b/script/run-mlperf-automotive-app/customize.py new file mode 100644 index 000000000..14cb9c237 --- /dev/null +++ b/script/run-mlperf-automotive-app/customize.py @@ -0,0 +1,403 @@ +from cmind import utils +import os +import json +import shutil +import subprocess +import cmind as cm +import copy +from tabulate import tabulate + +summary_ext = ['.csv', '.json', '.xlsx'] + +########################################################################## + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + + inp = i['input'] + state = i['state'] + script_path = i['run_script_input']['path'] + + if env.get('CM_RUN_DOCKER_CONTAINER', '') == "yes": + return {'return': 0} + + dump_version_info = env.get('CM_DUMP_VERSION_INFO', True) + system_meta = state.get('CM_SUT_META', {}) + if system_meta: + env['CM_SUT_META_EXISTS'] = "yes" + + env['CM_MODEL'] = env['CM_MLPERF_MODEL'] + + # Clean MLPerf inference output tar file if non-standard + x = env.get('MLPERF_INFERENCE_SUBMISSION_TAR_FILE', '') + if x != '' and os.path.isfile(x): + os.remove(x) + + # Clean MLPerf inference submission summary files + x = env.get('MLPERF_INFERENCE_SUBMISSION_SUMMARY', '') + if x != '': + for y in summary_ext: + z = x + y + if os.path.isfile(z): + os.remove(z) + + if env.get('CM_MLPERF_SUBMISSION_SYSTEM_TYPE', '') != '': + system_type = env['CM_MLPERF_SUBMISSION_SYSTEM_TYPE'] + system_meta['system_type'] = system_type + + if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') != '': + division = env['CM_MLPERF_SUBMISSION_DIVISION'] + system_meta['division'] = division + + if system_meta.get('division', '') != "closed": + # no compliance runs needed for open division + env["CM_MLPERF_LOADGEN_COMPLIANCE"] = "no" + + clean = False + + if 'CM_MLPERF_CLEAN_ALL' in env: + clean = True + if 'CM_MLPERF_CLEAN_SUBMISSION_DIR' not in env: + env['CM_MLPERF_CLEAN_SUBMISSION_DIR'] = "yes" + if 'CM_RERUN' not in env: + env['CM_RERUN'] = "yes" + + if str(env.get('CM_SYSTEM_POWER', 'no')).lower( + ) != "no" or env.get('CM_MLPERF_POWER', '') == "yes": + power_variation = ",_power" + env['CM_MLPERF_POWER'] = "yes" + else: + power_variation = "" + + if env.get('CM_RUN_STYLE', + '') == "valid" and 'CM_RUN_MLPERF_ACCURACY' not in env: + env['CM_RUN_MLPERF_ACCURACY'] = "on" + + if env.get('CM_MLPERF_INFERENCE_SOURCE', '') != '': + print( + "Using MLCommons Inference source from " + + env['CM_MLPERF_INFERENCE_SOURCE']) + + if 'CM_MLPERF_LOADGEN_EXTRA_OPTIONS' not in env: + env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] = "" + + if 'CM_MLPERF_LOADGEN_MODES' not in env: + if 'CM_MLPERF_LOADGEN_MODE' not in env: + env['CM_MLPERF_LOADGEN_MODE'] = "performance" + + if 'CM_MLPERF_LOADGEN_SCENARIOS' not in env: + if 'CM_MLPERF_LOADGEN_SCENARIO' not in env: + env['CM_MLPERF_LOADGEN_SCENARIO'] = "Offline" + + if env.get('CM_MLPERF_LOADGEN_ALL_SCENARIOS', '') == "yes": + env['CM_MLPERF_LOADGEN_SCENARIOS'] = get_valid_scenarios( + env['CM_MODEL'], + system_meta['system_type'], + env['CM_MLPERF_LAST_RELEASE'], + env['CM_MLPERF_INFERENCE_SOURCE']) + else: + system_meta = {} + env['CM_MLPERF_LOADGEN_SCENARIOS'] = [ + env['CM_MLPERF_LOADGEN_SCENARIO']] + + if env.get('CM_MLPERF_LOADGEN_ALL_MODES', '') == "yes": + env['CM_MLPERF_LOADGEN_MODES'] = ["performance", "accuracy"] + else: + env['CM_MLPERF_LOADGEN_MODES'] = [env['CM_MLPERF_LOADGEN_MODE']] + + if env.get('OUTPUT_BASE_DIR', '') == '': + env['OUTPUT_BASE_DIR'] = env.get( + 'CM_MLPERF_INFERENCE_RESULTS_DIR', os.getcwd()) + + test_list = [] + + variation_implementation = "_" + \ + env.get("CM_MLPERF_IMPLEMENTATION", "reference") + variation_model = ",_" + env["CM_MLPERF_MODEL"] + variation_backend = ",_" + \ + env["CM_MLPERF_BACKEND"] if env.get( + "CM_MLPERF_BACKEND", "") != "" else "" + variation_device = ",_" + \ + env["CM_MLPERF_DEVICE"] if env.get( + "CM_MLPERF_DEVICE", "") != "" else "" + variation_run_style = ",_" + env.get("CM_MLPERF_RUN_STYLE", "test") + variation_reproducibility = ",_" + env["CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS"] if env.get( + "CM_RUN_MLPERF_INFERENCE_APP_DEFAULTS", "") != "" else "" + + if env.get("CM_MLPERF_MODEL_PRECISION", '') != '': + variation_quantization_string = ",_" + env["CM_MLPERF_MODEL_PRECISION"] + else: + variation_quantization_string = "" + + tags = "app,abtf-inference," + variation_implementation + variation_model + variation_backend + variation_device + \ + variation_run_style + variation_reproducibility + \ + variation_quantization_string + power_variation + verbose = inp.get('v', False) + print_env = inp.get('print_env', False) + print_deps = inp.get('print_deps', False) + add_deps_recursive = inp.get('add_deps_recursive', {}) + add_deps = inp.get('add_deps', {}) + ad = inp.get('ad', {}) + adr = inp.get('adr', {}) + adr_from_meta = i['run_script_input'].get('add_deps_recursive') + + for key in adr_from_meta: + add_deps_recursive[key] = adr_from_meta[key] + + if env.get('CM_MLPERF_LOADGEN_MAX_BATCHSIZE', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_batch_size." + \ + env['CM_MLPERF_LOADGEN_MAX_BATCHSIZE'] + + if env.get('CM_MLPERF_INFERENCE_SUT_VARIATION', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += "_" + \ + env['CM_MLPERF_INFERENCE_SUT_VARIATION'] + + if env.get('CM_NETWORK_LOADGEN', '') != '': + if not add_deps_recursive.get('mlperf-inference-implementation', {}): + add_deps_recursive['mlperf-inference-implementation'] = {} + network_variation_tag = f"_network-{env['CM_NETWORK_LOADGEN']}" + if add_deps_recursive['mlperf-inference-implementation'].get( + 'tags', '') == '': + add_deps_recursive['mlperf-inference-implementation']['tags'] = '' + else: + add_deps_recursive['mlperf-inference-implementation']['tags'] += ',' + add_deps_recursive['mlperf-inference-implementation']['tags'] += network_variation_tag + + if env.get('CM_OUTPUT_FOLDER_NAME', '') == '': + env['CM_OUTPUT_FOLDER_NAME'] = env['CM_MLPERF_RUN_STYLE'] + "_results" + + output_dir = os.path.join( + env['OUTPUT_BASE_DIR'], + env['CM_OUTPUT_FOLDER_NAME']) + if clean: + path_to_clean = output_dir + + print('=========================================================') + print('Cleaning results in {}'.format(path_to_clean)) + if os.path.exists(path_to_clean): + shutil.rmtree(path_to_clean) + + print('=========================================================') + + if str(env.get('CM_MLPERF_USE_DOCKER', '') + ).lower() in ["1", "true", "yes"]: + action = "docker" + del (env['OUTPUT_BASE_DIR']) + state = {} + docker_extra_input = {} + + if env.get('CM_HW_NAME'): + del (env['CM_HW_NAME']) + + for k in inp: + if k.startswith("docker_"): + docker_extra_input[k] = inp[k] + inp = {} + else: + action = "run" + + # local_keys = [ 'CM_MLPERF_SKIP_RUN', 'CM_MLPERF_LOADGEN_QUERY_COUNT', + # 'CM_MLPERF_LOADGEN_TARGET_QPS', 'CM_MLPERF_LOADGEN_TARGET_LATENCY' ] + + for scenario in env['CM_MLPERF_LOADGEN_SCENARIOS']: + scenario_tags = tags + ",_" + scenario.lower() + env['CM_MLPERF_LOADGEN_SCENARIO'] = scenario + + if scenario == "Offline": + if env.get('CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_OFFLINE_TARGET_QPS'] + elif scenario == "Server": + if env.get('CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'): + env['CM_MLPERF_LOADGEN_TARGET_QPS'] = env['CM_MLPERF_LOADGEN_SERVER_TARGET_QPS'] + elif scenario == "SingleStream": + if env.get('CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY'] + elif scenario == "MultiStream": + if env.get('CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'): + env['CM_MLPERF_LOADGEN_TARGET_LATENCY'] = env['CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY'] + + for mode in env['CM_MLPERF_LOADGEN_MODES']: + env_copy = copy.deepcopy(env) + env_copy['CM_MLPERF_LOADGEN_MODE'] = mode + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + + print(f"\nRunning loadgen scenario: {scenario} and mode: {mode}") + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'ad': ad, 'adr': copy.deepcopy(adr), 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if action == "docker": + # We run commands interactively inside the docker container + return {'return': 0} + + if env_copy.get('CM_OUTPUT_PREDICTIONS_PATH'): + print( + f"\nOutput predictions can be seen by opening the images inside {env_copy['CM_OUTPUT_PREDICTIONS_PATH']}\n") + + if state.get('docker', {}): + del (state['docker']) + + if env.get("CM_MLPERF_LOADGEN_COMPLIANCE", "") == "yes": + for test in test_list: + env_copy = copy.deepcopy(env) + for key in env_copy: + if isinstance(env_copy[key], str) and env_copy[key].startswith( + "CM_TMP_"): + del env_copy[key] + env_copy['CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] = test + env_copy['CM_MLPERF_LOADGEN_MODE'] = "compliance" + ii = {'action': action, 'automation': 'script', 'tags': scenario_tags, 'quiet': 'true', + 'env': env_copy, 'input': inp, 'state': state, 'add_deps': copy.deepcopy(add_deps), 'add_deps_recursive': + copy.deepcopy(add_deps_recursive), 'adr': copy.deepcopy(adr), 'ad': ad, 'v': verbose, 'print_env': print_env, 'print_deps': print_deps, 'dump_version_info': dump_version_info} + if action == "docker": + for k in docker_extra_input: + ii[k] = docker_extra_input[k] + r = cm.access(ii) + if r['return'] > 0: + return r + if state.get('docker', {}): + del (state['docker']) + + if state.get("cm-mlperf-inference-results"): + # print(state["cm-mlperf-inference-results"]) + for sut in state["cm-mlperf-inference-results"]: # only one sut will be there + # Better to do this in a stand alone CM script with proper deps but + # currently we manage this by modifying the sys path of the python + # executing CM + import mlperf_utils # noqa + + print(sut) + result_table, headers = mlperf_utils.get_result_table( + state["cm-mlperf-inference-results"][sut]) + print(tabulate(result_table, headers=headers, tablefmt="pretty")) + + print( + f"\nThe MLPerf inference results are stored at {output_dir}\n") + + return {'return': 0} + + +def get_valid_scenarios(model, category, mlperf_version, mlperf_path): + + import sys + + submission_checker_dir = os.path.join(mlperf_path, "tools", "submission") + + sys.path.append(submission_checker_dir) + if not os.path.exists(os.path.join( + submission_checker_dir, "submission_checker.py")): + shutil.copy(os.path.join(submission_checker_dir, "submission-checker.py"), os.path.join(submission_checker_dir, + "submission_checker.py")) + + import submission_checker as checker + + if "dlrm-99" in model: + model = model.replace("dlrm-99", "dlrm-v2-99") + if "sdxl" in model: + model = "stable-diffusion-xl" + + config = checker.MODEL_CONFIG + + internal_model_name = config[mlperf_version]["model_mapping"].get( + model, model) + + valid_scenarios = config[mlperf_version]["required-scenarios-" + + category][internal_model_name] + + print( + "Valid Scenarios for " + + model + + " in " + + category + + " category are :" + + str(valid_scenarios)) + + return valid_scenarios + +########################################################################## + + +def postprocess(i): + + env = i['env'] + state = i['state'] + + if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'reference': + x1 = env.get('CM_MLPERF_INFERENCE_SOURCE', '') + x2 = env.get('CM_MLPERF_INFERENCE_CONF_PATH', '') + + if x1 != '' and x2 != '': + print('') + print( + 'Path to the MLPerf inference benchmark reference sources: {}'.format(x1)) + print( + 'Path to the MLPerf inference reference configuration file: {}'.format(x2)) + print('') + + return {'return': 0} + + +########################################################################## + + +def load_md(path, path2, name): + + fn = os.path.join(path, path2, name + '.md') + + s = '' + + if os.path.isfile(fn): + r = utils.load_txt(fn) + if r['return'] > 0: + return r + + s = r['string'] + + return {'return': 0, 'string': s} + +########################################################################## + + +def get_url(url, path, path2, name, text): + + name_md = name + '.md' + fn = os.path.join(path, path2, name_md) + + urlx = '' + url_online = '' + if os.path.isfile(fn): + if not url.endswith('/'): + url += '/' + urlx = url + path2 + '/' + name_md + + url_online = '[{}]({})'.format(text, urlx) + + return {'return': 0, 'url_online': url_online} + +########################################################################## diff --git a/script/test-cm-core/src/script/process_tests.py b/script/test-cm-core/src/script/process_tests.py new file mode 100644 index 000000000..8012d097b --- /dev/null +++ b/script/test-cm-core/src/script/process_tests.py @@ -0,0 +1,38 @@ +import sys +import os +import cmind as cm +import check as checks +import json +import yaml + +files = sys.argv[1:] + +for file in files: + print(file) + if not os.path.isfile(file) or not "script" in file: + continue + if not file.endswith("_cm.json") and not file.endswith("_cm.yaml"): + continue + script_path = os.path.dirname(file) + f = open(file) + if file.endswith(".json"): + data = json.load(f) + elif file.endswith(".yaml"): + data = yaml.safe_load(f) + if data.get('uid', '') == '': + continue # not a CM script meta + uid = data['uid'] + + ii = { + 'action': 'test', 'automation': 'script', 'artifact': uid, 'quiet': 'yes', 'out': 'con' + } + if os.environ.get('DOCKER_CM_REPO', '') != '': + ii['docker_cm_repo'] = os.environ['DOCKER_CM_REPO'] + if os.environ.get('DOCKER_CM_REPO_BRANCH', '') != '': + ii['docker_cm_repo_branch'] = os.environ['DOCKER_CM_REPO_BRANCH'] + if os.environ.get('TEST_INPUT_INDEX', '') != '': + ii['test_input_index'] = os.environ['TEST_INPUT_INDEX'] + print(ii) + r = cm.access(ii) + + checks.check_return(r) From ca9263aff2a56ee495a03382fb678506581d9da9 Mon Sep 17 00:00:00 2001 From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com> Date: Sun, 29 Dec 2024 17:20:51 +0530 Subject: [PATCH 65/80] Generated docker image name - always lower case (#87) * docker image name - always lowercase --- script/build-docker-image/customize.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index d3e62ed08..b8956de6c 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -44,7 +44,8 @@ def preprocess(i): docker_image_name = "cm-script-" + \ env.get('CM_DOCKER_RUN_SCRIPT_TAGS', '').replace( ',', '-').replace('_', '-') - env['CM_DOCKER_IMAGE_NAME'] = docker_image_name + + env['CM_DOCKER_IMAGE_NAME'] = docker_image_name.lower() if env.get("CM_DOCKER_IMAGE_TAG", "") == '': env['CM_DOCKER_IMAGE_TAG'] = "latest" From 664215fd49579d79a2ed3726d5eb33a07dfa5e3d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 29 Dec 2024 16:50:39 +0000 Subject: [PATCH 66/80] Fixes for podman (#88) * Support default root user in podman, fixes #81, #84 * [Automated Commit] Format Codebase * Fix dockerfile WORKDIR * Fixes for podman in build-dockerfile * [Automated Commit] Format Codebase * Fix dockerfile WORKDIR * Explicitly set user home in dockerfile --- automation/script/module_misc.py | 3 +++ script/app-mlperf-inference/_cm.yaml | 26 ++++++++++++++++++++------ script/build-dockerfile/_cm.yaml | 5 +++++ script/build-dockerfile/customize.py | 22 +++++++++++++++++----- script/get-ml-model-rgat/_cm.yaml | 1 - script/run-docker-container/_cm.yaml | 1 + 6 files changed, 46 insertions(+), 12 deletions(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index cf6e94597..505af8d8f 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1902,6 +1902,9 @@ def docker(i): noregenerate_docker_file = i.get('docker_noregenerate', False) norecreate_docker_image = i.get('docker_norecreate', True) + recreate_docker_image = i.get('docker_recreate', False) + if recreate_docker_image: # force recreate + norecreate_docker_image = False if i.get('docker_skip_build', False): noregenerate_docker_file = True diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 9ba86635c..f8a1be0a8 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1775,13 +1775,27 @@ input_description: debug: desc: "Debug MLPerf script" -gui: - title: "CM GUI for the MLPerf inference benchmark" +update_meta_if_env: + - enable_if_env: + CM_CONTAINER_TOOL: + - podman + # podman maps the host userid to the root user inside the container + docker: + use_host_group_id: False + use_host_user_id: False + pass_user_group: False #useful if docker is run by a different user from the one who built it and under the same group + default_env: + CM_DOCKER_USE_DEFAULT_USER: 'yes' + - skip_if_env: + CM_CONTAINER_TOOL: + - podman + docker: + use_host_group_id: True + use_host_user_id: True + pass_user_group: True #useful if docker is run by a different user from the one who built it and under the same group + docker: - use_host_group_id: True - use_host_user_id: True - pass_user_group: True #useful if docker is run by a different user fromt he one who built it and under the same group deps: - tags: get,mlperf,inference,results,dir,local names: @@ -1816,7 +1830,7 @@ docker: skip_run_cmd: 'no' shm_size: '32gb' interactive: True - extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --ulimit=host --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu cm_repo: mlcommons@mlperf-automations cm_repo_branch: dev diff --git a/script/build-dockerfile/_cm.yaml b/script/build-dockerfile/_cm.yaml index f54d3a216..9f91c0775 100644 --- a/script/build-dockerfile/_cm.yaml +++ b/script/build-dockerfile/_cm.yaml @@ -57,6 +57,11 @@ input_mapping: new_env_keys: - CM_DOCKERFILE_* +deps: + - tags: get,docker + names: + - docker + post_deps: - enable_if_env: CM_BUILD_DOCKER_IMAGE: diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 8f451ebb6..d6dbd061e 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -250,7 +250,12 @@ def preprocess(i): docker_user = get_value(env, config, 'USER', 'CM_DOCKER_USER') docker_group = get_value(env, config, 'GROUP', 'CM_DOCKER_GROUP') - if docker_user: + if env.get('CM_CONTAINER_TOOL', '') == 'podman' and env.get( + 'CM_DOCKER_USE_DEFAULT_USER', '') == '': + env['CM_DOCKER_USE_DEFAULT_USER'] = 'yes' + + if docker_user and str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]: f.write('RUN groupadd -g $GID -o ' + docker_group + EOL) @@ -266,14 +271,20 @@ def preprocess(i): ' ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers' + EOL) f.write('USER ' + docker_user + ":" + docker_group + EOL) + f.write('ENV HOME=/home/cmuser' + EOL) + + else: + f.write('ENV HOME=/root' + EOL) dockerfile_env = env.get('CM_DOCKERFILE_ENV', {}) dockerfile_env_input_string = "" for docker_env_key in dockerfile_env: dockerfile_env_input_string = dockerfile_env_input_string + " --env." + \ docker_env_key + "=" + str(dockerfile_env[docker_env_key]) + workdir = get_value(env, config, 'WORKDIR', 'CM_DOCKER_WORKDIR') - if workdir: + if workdir and ("/home/cmuser" not in workdir or str(env.get('CM_DOCKER_USE_DEFAULT_USER', '')).lower() not in [ + "yes", "1", "true"]): f.write('WORKDIR ' + workdir + EOL) f.write(EOL + '# Install python packages' + EOL) @@ -281,9 +292,10 @@ def preprocess(i): docker_use_virtual_python = env.get('CM_DOCKER_USE_VIRTUAL_PYTHON', "yes") if str(docker_use_virtual_python).lower() not in ["no", "0", "false"]: - f.write('RUN {} -m venv /home/cmuser/venv/cm'.format(python) + " " + EOL) - f.write('ENV PATH="/home/cmuser/venv/cm/bin:$PATH"' + EOL) + f.write('RUN {} -m venv $HOME/venv/cm'.format(python) + " " + EOL) + f.write('ENV PATH="$HOME/venv/cm/bin:$PATH"' + EOL) # f.write('RUN . /opt/venv/cm/bin/activate' + EOL) + f.write( 'RUN {} -m pip install '.format(python) + " ".join( @@ -299,7 +311,7 @@ def preprocess(i): f.write(EOL + '# Download CM repo for scripts' + EOL) if use_copy_repo: - docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@mlperf-automations" + docker_repo_dest = "$HOME/CM/repos/mlcommons@mlperf-automations" f.write( f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' + EOL) diff --git a/script/get-ml-model-rgat/_cm.yaml b/script/get-ml-model-rgat/_cm.yaml index 54e5c119c..27a7e39e2 100644 --- a/script/get-ml-model-rgat/_cm.yaml +++ b/script/get-ml-model-rgat/_cm.yaml @@ -7,7 +7,6 @@ docker: fake_run_deps: True env: CM_ML_MODEL: RGAT - CM_ML_MODEL_DATASET: ICBH input_mapping: checkpoint: RGAT_CHECKPOINT_PATH new_env_keys: diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index 8a04e5b43..253a48c91 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -17,6 +17,7 @@ default_env: CM_DOCKER_DETACHED_MODE: 'yes' CM_DOCKER_REUSE_EXISTING_CONTAINER: 'no' CM_DOCKER_PRIVILEGED_MODE: 'no' + CM_PODMAN_MAP_USER_ID: 'no' input_mapping: all_gpus: CM_DOCKER_ADD_ALL_GPUS From 59785a11995983996ed44dc117d3b285c09d172e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 30 Dec 2024 00:10:03 +0530 Subject: [PATCH 67/80] Dont use ulimit in docker extra args --- script/app-mlperf-inference/_cm.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index f8a1be0a8..180bad643 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1830,7 +1830,7 @@ docker: skip_run_cmd: 'no' shm_size: '32gb' interactive: True - extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --ulimit=host --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu cm_repo: mlcommons@mlperf-automations cm_repo_branch: dev From b3149a237d16f4c3cab90ce2a4b1b2a81969c156 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 30 Dec 2024 02:58:36 +0530 Subject: [PATCH 68/80] CM_MLPERF_PERFORMANCE_SAMPLE_COUNT -> CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT --- script/app-mlperf-inference-amd/_cm.yaml | 2 +- script/app-mlperf-inference-dummy/_cm.yaml | 2 +- script/app-mlperf-inference-intel/_cm.yaml | 2 +- script/app-mlperf-inference-nvidia/_cm.yaml | 2 +- script/app-mlperf-inference-qualcomm/_cm.yaml | 2 +- script/app-mlperf-inference-redhat/_cm.yaml | 2 +- script/generate-mlperf-inference-user-conf/_cm.yaml | 2 +- script/generate-mlperf-inference-user-conf/customize.py | 4 ++-- 8 files changed, 9 insertions(+), 9 deletions(-) diff --git a/script/app-mlperf-inference-amd/_cm.yaml b/script/app-mlperf-inference-amd/_cm.yaml index 305578a17..f073011f8 100644 --- a/script/app-mlperf-inference-amd/_cm.yaml +++ b/script/app-mlperf-inference-amd/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/script/app-mlperf-inference-dummy/_cm.yaml b/script/app-mlperf-inference-dummy/_cm.yaml index a1f311cc7..1343835b6 100644 --- a/script/app-mlperf-inference-dummy/_cm.yaml +++ b/script/app-mlperf-inference-dummy/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/script/app-mlperf-inference-intel/_cm.yaml b/script/app-mlperf-inference-intel/_cm.yaml index 0975f0b0b..9a7c042d7 100644 --- a/script/app-mlperf-inference-intel/_cm.yaml +++ b/script/app-mlperf-inference-intel/_cm.yaml @@ -48,7 +48,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/script/app-mlperf-inference-nvidia/_cm.yaml b/script/app-mlperf-inference-nvidia/_cm.yaml index 0547783f6..5b96c7f65 100644 --- a/script/app-mlperf-inference-nvidia/_cm.yaml +++ b/script/app-mlperf-inference-nvidia/_cm.yaml @@ -1823,7 +1823,7 @@ variations: default_variations: batch-size: batch_size.2048 env: - CM_MLPERF_PERFORMANCE_SAMPLE_COUNT: "2048" + CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT: "2048" a100,sxm,retinanet,offline,run_harness: default_variations: diff --git a/script/app-mlperf-inference-qualcomm/_cm.yaml b/script/app-mlperf-inference-qualcomm/_cm.yaml index 8de84ac08..5e3de4302 100644 --- a/script/app-mlperf-inference-qualcomm/_cm.yaml +++ b/script/app-mlperf-inference-qualcomm/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF devices: CM_QAIC_DEVICES diff --git a/script/app-mlperf-inference-redhat/_cm.yaml b/script/app-mlperf-inference-redhat/_cm.yaml index 75f460f37..2c7011bd5 100644 --- a/script/app-mlperf-inference-redhat/_cm.yaml +++ b/script/app-mlperf-inference-redhat/_cm.yaml @@ -38,7 +38,7 @@ input_mapping: mlperf_conf: CM_MLPERF_CONF mode: CM_MLPERF_LOADGEN_MODE output_dir: CM_MLPERF_OUTPUT_DIR - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT scenario: CM_MLPERF_LOADGEN_SCENARIO user_conf: CM_MLPERF_USER_CONF skip_preprocess: CM_SKIP_PREPROCESS_DATASET diff --git a/script/generate-mlperf-inference-user-conf/_cm.yaml b/script/generate-mlperf-inference-user-conf/_cm.yaml index 99fc35995..c19bdcba3 100644 --- a/script/generate-mlperf-inference-user-conf/_cm.yaml +++ b/script/generate-mlperf-inference-user-conf/_cm.yaml @@ -49,7 +49,7 @@ input_mapping: server_target_qps: CM_MLPERF_LOADGEN_SERVER_TARGET_QPS singlestream_target_latency: CM_MLPERF_LOADGEN_SINGLESTREAM_TARGET_LATENCY multistream_target_latency: CM_MLPERF_LOADGEN_MULTISTREAM_TARGET_LATENCY - performance_sample_count: CM_MLPERF_PERFORMANCE_SAMPLE_COUNT + performance_sample_count: CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT # Env keys which are exposed to higher level scripts new_env_keys: diff --git a/script/generate-mlperf-inference-user-conf/customize.py b/script/generate-mlperf-inference-user-conf/customize.py index 4829b08b3..1972092fd 100644 --- a/script/generate-mlperf-inference-user-conf/customize.py +++ b/script/generate-mlperf-inference-user-conf/customize.py @@ -217,8 +217,8 @@ def preprocess(i): user_conf += ml_model_name + "." + scenario + \ "." + metric + " = " + str(metric_value) + "\n" - if env.get('CM_MLPERF_PERFORMANCE_SAMPLE_COUNT', '') != '': - performance_sample_count = env['CM_MLPERF_PERFORMANCE_SAMPLE_COUNT'] + if env.get('CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT', '') != '': + performance_sample_count = env['CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT'] user_conf += ml_model_name + ".*.performance_sample_count_override = " + \ performance_sample_count + "\n" From 477f80ff22a22b0503f54e7b5751525a1a520fe0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Mon, 30 Dec 2024 15:23:36 +0000 Subject: [PATCH 69/80] Fix env corruption in docker run command (#92) * Dont use ulimit in docker extra args * CM_MLPERF_PERFORMANCE_SAMPLE_COUNT -> CM_MLPERF_LOADGEN_PERFORMANCE_SAMPLE_COUNT * Fix env corruption in docker run command, fixes #91 --- automation/script/module_misc.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 505af8d8f..17483c2e7 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -2172,7 +2172,7 @@ def docker(i): # env keys corresponding to container mounts are explicitly passed to # the container run cmd - container_env_string = '' + container_env = {} for index in range(len(mounts)): mount = mounts[index] # Since windows may have 2 :, we search from the right @@ -2214,7 +2214,6 @@ def docker(i): new_container_mount, new_container_mount_env = get_container_path( env[tmp_value]) container_env_key = new_container_mount_env - # container_env_string += " --env.{}={} ".format(tmp_value, new_container_mount_env) else: # we skip those mounts mounts[index] = None skip = True @@ -2226,8 +2225,7 @@ def docker(i): continue mounts[index] = new_host_mount + ":" + new_container_mount if host_env_key: - container_env_string += " --env.{}={} ".format( - host_env_key, container_env_key) + container_env[host_env_key] = container_env_key for v in docker_input_mapping: if docker_input_mapping[v] == host_env_key: @@ -2258,10 +2256,16 @@ def docker(i): for key in proxy_keys: if os.environ.get(key, '') != '': value = os.environ[key] - container_env_string += " --env.{}={} ".format(key, value) + container_env[key] = value env['+ CM_DOCKER_BUILD_ARGS'].append( "{}={}".format(key, value)) + if container_env: + if not i_run_cmd.get('env'): + i_run_cmd['env'] = container_env + else: + i_run_cmd['env'] = {**i_run_cmd['env'], **container_env} + docker_use_host_group_id = i.get( 'docker_use_host_group_id', docker_settings.get('use_host_group_id')) @@ -2403,8 +2407,7 @@ def docker(i): 'docker_run_cmd_prefix': i.get('docker_run_cmd_prefix', '')}) if r['return'] > 0: return r - run_cmd = r['run_cmd_string'] + ' ' + \ - container_env_string + ' --docker_run_deps ' + run_cmd = r['run_cmd_string'] + ' ' + ' --docker_run_deps ' env['CM_RUN_STATE_DOCKER'] = True From 48ea6b46a7606d1c5d74909e94d5599dbe7ff9e1 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 31 Dec 2024 10:20:10 +0000 Subject: [PATCH 70/80] Fixes for R-GAT submission generation (#93) * Fixes for rgat closed division, submission generation improvements * [Automated Commit] Format Codebase * Update customize.py --- script/app-mlperf-inference/customize.py | 32 +++++++++++++++---- .../customize.py | 9 ++++-- script/get-mlperf-inference-src/_cm.yaml | 1 + script/get-mlperf-inference-src/customize.py | 2 ++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/script/app-mlperf-inference/customize.py b/script/app-mlperf-inference/customize.py index f62ae947d..2c34dad70 100644 --- a/script/app-mlperf-inference/customize.py +++ b/script/app-mlperf-inference/customize.py @@ -118,6 +118,8 @@ def postprocess(i): if mode == "accuracy" or mode == "compliance" and env[ 'CM_MLPERF_LOADGEN_COMPLIANCE_TEST'] == "TEST01": + out_baseline_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f"""> {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ if model == "resnet50": accuracy_filename = "accuracy-imagenet.py" accuracy_filepath = os.path.join(env['CM_MLPERF_INFERENCE_CLASSIFICATION_AND_DETECTION_PATH'], "tools", @@ -147,6 +149,17 @@ def postprocess(i): datatype_option = " --output_dtype " + \ env['CM_SQUAD_ACCURACY_DTYPE'] + elif 'rgat' in model: + accuracy_filename = "accuracy_igbh.py" + accuracy_filepath = os.path.join( + env['CM_MLPERF_INFERENCE_RGAT_PATH'], "tools", accuracy_filename) + dataset_args = " --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + \ + env['CM_DATASET_IGBH_SIZE'] + "'" + accuracy_log_file_option_name = " --mlperf-accuracy-file " + datatype_option = "" + out_baseline_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "baseline_accuracy.txt")} """ + out_compliance_accuracy_string = f""" --output-file {os.path.join(output_dir, "accuracy", "compliance_accuracy.txt")} """ + elif 'stable-diffusion-xl' in model: pass # No compliance check for now elif 'gpt' in model: @@ -390,9 +403,7 @@ def postprocess(i): cmd = "" xcmd = "" - readme_init = "This experiment is generated using the [MLCommons Collective Mind automation framework (CM)](https://github.com/mlcommons/cm4mlops).\n\n" - - readme_init += "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" + readme_init = "*Check [CM MLPerf docs](https://docs.mlcommons.org/inference) for more details.*\n\n" readme_body = "## Host platform\n\n* OS version: {}\n* CPU version: {}\n* Python version: {}\n* MLCommons CM version: {}\n\n".format(platform.platform(), platform.processor(), sys.version, cm.__version__) @@ -502,6 +513,7 @@ def postprocess(i): cmd = "cd " + TEST01_DIR + " && bash " + SCRIPT_PATH + " " + os.path.join(ACCURACY_DIR, "mlperf_log_accuracy.json") + " " + \ os.path.join(COMPLIANCE_DIR, "mlperf_log_accuracy.json") env['CMD'] = cmd + print(cmd) r = automation.run_native_script( {'run_script_input': run_script_input, 'env': env, 'script_name': 'verify_accuracy'}) if r['return'] > 0: @@ -516,9 +528,11 @@ def postprocess(i): print("\nDeterministic TEST01 failed... Trying with non-determinism.\n") # #Normal test failed, trying the check with non-determinism + baseline_accuracy_file = os.path.join( + TEST01_DIR, "mlperf_log_accuracy_baseline.json") CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy_baseline.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "baseline_accuracy.txt") + baseline_accuracy_file + ' ' + dataset_args + \ + datatype_option + out_baseline_accuracy_string env['CMD'] = CMD r = automation.run_native_script( @@ -526,9 +540,13 @@ def postprocess(i): if r['return'] > 0: return r + if os.stat(baseline_accuracy_file).st_size == 0: + return {'return': 1, + 'error': f"{baseline_accuracy_file} is empty"} + CMD = "cd " + ACCURACY_DIR + " && " + env['CM_PYTHON_BIN_WITH_PATH'] + ' ' + accuracy_filepath + accuracy_log_file_option_name + \ - os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + dataset_args + datatype_option + " > " + \ - os.path.join(OUTPUT_DIR, "compliance_accuracy.txt") + os.path.join(TEST01_DIR, "mlperf_log_accuracy.json") + \ + dataset_args + datatype_option + out_compliance_accuracy_string env['CMD'] = CMD r = automation.run_native_script( diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index a27086237..73cd70052 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -582,8 +582,11 @@ def generate_submission(env, state, inp, submission_division): os.makedirs(target) for log_file in os.listdir( compliance_accuracy_run_path): - if log_file.startswith( - "mlperf_log_accuracy.json") or log_file.endswith("accuracy.txt"): + log_file_name = os.path.basename( + log_file) + # print(os.path.join(compliance_accuracy_run_path, log_file)) + if log_file_name in [ + "mlperf_log_accuracy.json", "accuracy.txt", "baseline_accuracy.txt", "compliance_accuracy.txt"]: shutil.copy( os.path.join( compliance_accuracy_run_path, log_file), os.path.join( @@ -732,6 +735,8 @@ def postprocess(i): # submission_generation function if env.get('CM_MLPERF_SUBMISSION_DIVISION', '') == '': r = generate_submission(env, state, inp, submission_division="") + if r['return'] > 0: + return r else: for submission_division in submission_divisions: r = generate_submission(env, state, inp, submission_division) diff --git a/script/get-mlperf-inference-src/_cm.yaml b/script/get-mlperf-inference-src/_cm.yaml index b8bd39092..0ff1d31e4 100644 --- a/script/get-mlperf-inference-src/_cm.yaml +++ b/script/get-mlperf-inference-src/_cm.yaml @@ -24,6 +24,7 @@ new_env_keys: - CM_MLPERF_INFERENCE_DLRM_V2_PATH - CM_MLPERF_INFERENCE_GPTJ_PATH - CM_MLPERF_INFERENCE_RNNT_PATH +- CM_MLPERF_INFERENCE_RGAT_PATH - CM_MLPERF_INFERENCE_SOURCE - CM_MLPERF_INFERENCE_SOURCE_VERSION - CM_MLPERF_INFERENCE_VERSION diff --git a/script/get-mlperf-inference-src/customize.py b/script/get-mlperf-inference-src/customize.py index 16669e2d5..1a62da3b6 100644 --- a/script/get-mlperf-inference-src/customize.py +++ b/script/get-mlperf-inference-src/customize.py @@ -109,6 +109,8 @@ def postprocess(i): inference_root, 'recommendation', 'dlrm') env['CM_MLPERF_INFERENCE_DLRM_V2_PATH'] = os.path.join( inference_root, 'recommendation', 'dlrm_v2') + env['CM_MLPERF_INFERENCE_RGAT_PATH'] = os.path.join( + inference_root, 'graph', 'R-GAT') env['CM_MLPERF_INFERENCE_3DUNET_PATH'] = os.path.join( inference_root, 'vision', 'medical_imaging', '3d-unet-kits19') From 5faf15abe8521376226c9d408ed058bfca7ecdce Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Wed, 1 Jan 2025 23:38:30 +0530 Subject: [PATCH 71/80] Fix mounting of host cache entries inside docker for mlperf-inference-mixtral --- script/app-mlperf-inference-mlcommons-python/_cm.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/script/app-mlperf-inference-mlcommons-python/_cm.yaml b/script/app-mlperf-inference-mlcommons-python/_cm.yaml index 1c1a89eee..8ff24cad4 100644 --- a/script/app-mlperf-inference-mlcommons-python/_cm.yaml +++ b/script/app-mlperf-inference-mlcommons-python/_cm.yaml @@ -584,6 +584,8 @@ deps: skip_if_env: CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST: - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' ## Kits19 for 3d-unet - tags: get,dataset,kits19,preprocessed @@ -596,6 +598,8 @@ deps: skip_if_env: CM_MLPERF_DATASET_3DUNET_DOWNLOAD_TO_HOST: - 'yes' + CM_RUN_STATE_DOCKER: + - 'yes' ## Librispeech for rnnt - tags: get,dataset,librispeech,preprocessed From 19aed593cf0520b3eecf3133d441fca87ec92218 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 2 Jan 2025 15:14:59 +0000 Subject: [PATCH 72/80] Fixes for podman run, github actions (#95) * Update test-mlperf-inference-mixtral.yml * Update test-mlperf-inference-rgat.yml * Support native podman calls * Added gcc tag to compiler on linux platform for app-mlperf-inference * Added open division as default for mlperf-inference * Update test-nvidia-mlperf-inference-implementations.yml --- .../test-mlperf-inference-mixtral.yml | 2 +- .../workflows/test-mlperf-inference-rgat.yml | 2 +- ...vidia-mlperf-inference-implementations.yml | 17 +++++++++-- script/app-mlperf-inference/_cm.yaml | 7 ++++- script/build-docker-image/_cm.yaml | 3 ++ script/build-docker-image/customize.py | 3 +- script/run-docker-container/_cm.yaml | 4 ++- script/run-docker-container/customize.py | 30 +++++++++---------- script/run-mlperf-inference-app/_cm.yaml | 1 + 9 files changed, 46 insertions(+), 23 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 04a944fd4..6687ff048 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -31,5 +31,5 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo - cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 + cm run script --tags=run-mlperf,inference,_submission,_short --adr.inference-src.tags=_branch.dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-rgat.yml b/.github/workflows/test-mlperf-inference-rgat.yml index 94a2e174b..62d5a5139 100644 --- a/.github/workflows/test-mlperf-inference-rgat.yml +++ b/.github/workflows/test-mlperf-inference-rgat.yml @@ -31,7 +31,7 @@ jobs: cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }} run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 + cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet -v --target_qps=1 - name: Push Results if: github.repository_owner == 'gateoverflow' env: diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml index 2bbccae6b..bb83bd49a 100644 --- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml +++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations on: schedule: - - cron: "08 01 * * */3" #to be adjusted + - cron: "58 10 * * *" #to be adjusted jobs: run_nvidia: @@ -17,7 +17,8 @@ jobs: strategy: fail-fast: false matrix: - system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] + # system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9", "mlc-server" ] + system: [ "mlc-server" ] python-version: [ "3.12" ] model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ] exclude: @@ -25,12 +26,22 @@ jobs: steps: - name: Test MLPerf Inference NVIDIA ${{ matrix.model }} + env: + gpu_name: rtx_4090 run: | # Set hw_name based on matrix.system if [ "${{ matrix.system }}" = "GO-spr" ]; then hw_name="RTX4090x2" + gpu_name=rtx_4090 + docker_string=" --docker" + elif [ "${{ matrix.system }}" = "mlc-server" ]; then + hw_name="H100x8" + gpu_name=h100 + docker_string=" " else hw_name="RTX4090x1" + gpu_name=rtx_4090 + docker_string=" --docker" fi if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi @@ -40,6 +51,6 @@ jobs: pip install --upgrade cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet + cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean $docker_string --quiet cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 180bad643..6c939a225 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -1793,7 +1793,12 @@ update_meta_if_env: use_host_group_id: True use_host_user_id: True pass_user_group: True #useful if docker is run by a different user from the one who built it and under the same group - + - enable_if_env: + CM_HOST_OS_TYPE: + - linux + adr: + compiler: + tags: gcc docker: deps: diff --git a/script/build-docker-image/_cm.yaml b/script/build-docker-image/_cm.yaml index a9dc8cb67..8fd7c2571 100644 --- a/script/build-docker-image/_cm.yaml +++ b/script/build-docker-image/_cm.yaml @@ -38,6 +38,9 @@ input_mapping: new_env_keys: - CM_DOCKER_* +deps: + - tags: get,docker + prehook_deps: - enable_if_env: CM_BUILD_DOCKERFILE: diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index b8956de6c..6d971f804 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -66,7 +66,8 @@ def preprocess(i): # Prepare CMD to build image XCMD = [ - 'docker build ' + env.get('CM_DOCKER_CACHE_ARG', ''), + f'{env["CM_CONTAINER_TOOL"]} build ' + + env.get('CM_DOCKER_CACHE_ARG', ''), ' ' + build_args, ' -f "' + dockerfile_path + '"', ' -t "' + image_name, diff --git a/script/run-docker-container/_cm.yaml b/script/run-docker-container/_cm.yaml index 253a48c91..78f8397d2 100644 --- a/script/run-docker-container/_cm.yaml +++ b/script/run-docker-container/_cm.yaml @@ -58,6 +58,9 @@ input_mapping: new_env_keys: - 'CM_DOCKER_CONTAINER_ID' +deps: + - tags: get,docker + prehook_deps: - names: - build-docker-image @@ -69,4 +72,3 @@ prehook_deps: CM_DOCKER_CONTAINER_ID: - on tags: build,docker,image -- tags: get,docker diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 2157d595f..68acccab6 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -51,7 +51,7 @@ def preprocess(i): print('') print('Checking existing Docker container:') print('') - CMD = f"""docker ps --filter "ancestor={DOCKER_CONTAINER}" """ + CMD = f"""{env['CM_CONTAINER_TOOL']} ps --filter "ancestor={DOCKER_CONTAINER}" """ if os_info['platform'] == 'windows': CMD += " 2> nul" else: @@ -78,7 +78,7 @@ def preprocess(i): if env.get('CM_DOCKER_CONTAINER_ID', '') != '': del (env['CM_DOCKER_CONTAINER_ID']) # not valid ID - CMD = "docker images -q " + DOCKER_CONTAINER + CMD = f"""{env['CM_CONTAINER_TOOL']} images -q """ + DOCKER_CONTAINER if os_info['platform'] == 'windows': CMD += " 2> nul" @@ -196,11 +196,6 @@ def postprocess(i): return {'return': 1, 'error': 'Can\'t find separator : in a mount string: {}'.format( mount_cmd)} -# mount_parts = mount_cmd.split(":") -# if len(mount_parts) != 2: -# return {'return': 1, 'error': 'Invalid mount {} -# specified'.format(mount_parts)} - host_mount = mount_parts[0] if not os.path.exists(host_mount): @@ -240,14 +235,14 @@ def postprocess(i): existing_container_id = env.get('CM_DOCKER_CONTAINER_ID', '') if existing_container_id: - CMD = f"ID={existing_container_id} && docker exec $ID bash -c '" + run_cmd + "'" + CMD = f"""ID={existing_container_id} && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '""" + run_cmd + "'" else: - CONTAINER = f"docker run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash" - CMD = f"ID=`{CONTAINER}` && docker exec $ID bash -c '{run_cmd}'" + CONTAINER = f"""{env['CM_CONTAINER_TOOL']} run -dt {run_opts} --rm {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash""" + CMD = f"""ID=`{CONTAINER}` && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'""" if False and str(env.get('CM_KEEP_DETACHED_CONTAINER', '')).lower() not in [ 'yes', "1", 'true']: - CMD += " && docker kill $ID >/dev/null" + CMD += f""" && {env['CM_CONTAINER_TOOL']} kill $ID >/dev/null""" CMD += ' && echo "ID=$ID"' @@ -256,7 +251,10 @@ def postprocess(i): print('') print(CMD) print('') - print("Running " + run_cmd + " inside docker container") + print( + "Running " + + run_cmd + + f""" inside {env['CM_CONTAINER_TOOL']} container""") record_script({'cmd': CMD, 'env': env}) @@ -280,7 +278,8 @@ def postprocess(i): docker_out = result.stdout # if docker_out != 0: - # return {'return': docker_out, 'error': 'docker run failed'} + # return {'return': docker_out, 'error': f""{env['CM_CONTAINER_TOOL']} + # run failed""} lines = docker_out.split("\n") @@ -304,7 +303,7 @@ def postprocess(i): x1 = '-it' x2 = " && bash ) || bash" - CONTAINER = "docker run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ + CONTAINER = f"{env['CM_CONTAINER_TOOL']} run " + x1 + " --entrypoint " + x + x + " " + run_opts + \ " " + docker_image_repo + "/" + docker_image_name + ":" + docker_image_tag CMD = CONTAINER + " bash -c " + x + run_cmd_prefix + run_cmd + x2 + x @@ -320,7 +319,8 @@ def postprocess(i): if docker_out != 0: if docker_out % 256 == 0: docker_out = 1 - return {'return': docker_out, 'error': 'docker run failed'} + return {'return': docker_out, + 'error': f"""{env['CM_CONTAINER_TOOL']} run failed"""} return {'return': 0} diff --git a/script/run-mlperf-inference-app/_cm.yaml b/script/run-mlperf-inference-app/_cm.yaml index a2e3688e7..ea2185060 100644 --- a/script/run-mlperf-inference-app/_cm.yaml +++ b/script/run-mlperf-inference-app/_cm.yaml @@ -34,6 +34,7 @@ default_env: CM_MLPERF_RUN_STYLE: test CM_MLPERF_SKIP_SUBMISSION_GENERATION: no CM_DOCKER_PRIVILEGED_MODE: yes + CM_MLPERF_SUBMISSION_DIVISION: open input_mapping: api_server: CM_MLPERF_INFERENCE_API_SERVER From d3babb6305c078d3c915e02605545e0771537845 Mon Sep 17 00:00:00 2001 From: Arjun Date: Thu, 2 Jan 2025 19:12:38 +0000 Subject: [PATCH 73/80] Fix docker detached mode with podman --- automation/script/module_misc.py | 10 ++++++---- script/app-mlperf-inference/_cm.yaml | 4 ++-- script/build-docker-image/customize.py | 2 +- script/run-docker-container/customize.py | 21 ++++++++++---------- script/run-mlperf-inference-app/customize.py | 3 ++- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/automation/script/module_misc.py b/automation/script/module_misc.py index 17483c2e7..336073969 100644 --- a/automation/script/module_misc.py +++ b/automation/script/module_misc.py @@ -1977,8 +1977,6 @@ def docker(i): env['CM_DOCKER_CACHE'] = docker_cache image_repo = i.get('docker_image_repo', '') - if image_repo == '': - image_repo = 'local' # Host system needs to have docker r = self_module.cmind.access({'action': 'run', @@ -2438,10 +2436,8 @@ def docker(i): 'docker_os_version': docker_os_version, 'cm_repo': cm_repo, 'env': env, - 'image_repo': image_repo, 'interactive': interactive, 'mounts': mounts, - 'image_name': image_name, # 'image_tag': script_alias, 'image_tag_extra': image_tag_extra, 'detached': detached, @@ -2458,6 +2454,12 @@ def docker(i): } } + if image_repo: + cm_docker_input['image_repo'] = image_repo + + if image_name: + cm_docker_input['image_name'] = image_name + if all_gpus: cm_docker_input['all_gpus'] = True diff --git a/script/app-mlperf-inference/_cm.yaml b/script/app-mlperf-inference/_cm.yaml index 6c939a225..c50df363b 100644 --- a/script/app-mlperf-inference/_cm.yaml +++ b/script/app-mlperf-inference/_cm.yaml @@ -409,7 +409,7 @@ variations: nvidia-original: docker: interactive: True - extra_run_args: ' --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public os: "ubuntu" os_version: "20.04" @@ -434,7 +434,7 @@ variations: CM_HOST_OS_VERSION: - 20.04 docker: - extra_run_args: ' --runtime=nvidia --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' + extra_run_args: ' --runtime=nvidia --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' default_variations: backend: tensorrt diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index 6d971f804..f674ea677 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -37,7 +37,7 @@ def preprocess(i): # env['CM_BUILD_DOCKERFILE'] = "no" # if env.get("CM_DOCKER_IMAGE_REPO", "") == '': - env['CM_DOCKER_IMAGE_REPO'] = "local" + env['CM_DOCKER_IMAGE_REPO'] = "localhost/local" docker_image_name = env.get('CM_DOCKER_IMAGE_NAME', '') if docker_image_name == '': diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 68acccab6..4f0fa8277 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -3,7 +3,7 @@ import os import subprocess from os.path import exists - +import json def preprocess(i): @@ -51,7 +51,7 @@ def preprocess(i): print('') print('Checking existing Docker container:') print('') - CMD = f"""{env['CM_CONTAINER_TOOL']} ps --filter "ancestor={DOCKER_CONTAINER}" """ + CMD = f"""{env['CM_CONTAINER_TOOL']} ps --format=json --filter "ancestor={DOCKER_CONTAINER}" """ if os_info['platform'] == 'windows': CMD += " 2> nul" else: @@ -60,17 +60,18 @@ def preprocess(i): print('') try: - docker_container = subprocess.check_output( - CMD, shell=True).decode("utf-8") + out = subprocess.check_output( + CMD, shell=True, text=True).strip() except Exception as e: return { - 'return': 1, 'error': 'Docker is either not installed or not started:\n{}'.format(e)} + 'return': 1, + 'error': 'Unexpected error occurred with docker run:\n{}'.format(e) + } - output_split = docker_container.split("\n") - if len(output_split) > 1 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', + out_json = json.loads(out) + if len(out_json) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in ["1", "true", "yes"]: # container exists - out = output_split[1].split(" ") - existing_container_id = out[0] + existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id @@ -355,7 +356,7 @@ def record_script(i): def update_docker_info(env): # Updating Docker info - docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'local') + docker_image_repo = env.get('CM_DOCKER_IMAGE_REPO', 'localhost/local') env['CM_DOCKER_IMAGE_REPO'] = docker_image_repo docker_image_base = env.get('CM_DOCKER_IMAGE_BASE') diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 480beda5a..10f653e17 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -233,7 +233,8 @@ def preprocess(i): inp = {} if str(docker_dt).lower() in ["yes", "true", "1"]: # turning it off for the first run and after that we turn it on - env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' + if env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '') == '': + env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' env['CM_DOCKER_DETACHED_MODE'] = 'yes' if env.get('CM_DOCKER_IMAGE_NAME', '') != '': From ed8d52599ad22ddaf39b3f236016d443948183c3 Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 2 Jan 2025 19:13:23 +0000 Subject: [PATCH 74/80] [Automated Commit] Format Codebase --- script/run-docker-container/customize.py | 3 ++- script/run-mlperf-inference-app/customize.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 4f0fa8277..2c3a5789e 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -5,6 +5,7 @@ from os.path import exists import json + def preprocess(i): os_info = i['os_info'] @@ -70,7 +71,7 @@ def preprocess(i): out_json = json.loads(out) if len(out_json) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', - '')).lower() in ["1", "true", "yes"]: # container exists + '')).lower() in ["1", "true", "yes"]: # container exists existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id diff --git a/script/run-mlperf-inference-app/customize.py b/script/run-mlperf-inference-app/customize.py index 10f653e17..fc857c468 100644 --- a/script/run-mlperf-inference-app/customize.py +++ b/script/run-mlperf-inference-app/customize.py @@ -233,7 +233,7 @@ def preprocess(i): inp = {} if str(docker_dt).lower() in ["yes", "true", "1"]: # turning it off for the first run and after that we turn it on - if env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '') == '': + if env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '') == '': env['CM_DOCKER_REUSE_EXISTING_CONTAINER'] = 'no' env['CM_DOCKER_DETACHED_MODE'] = 'yes' From 5e8daea53038964bdf1d9b455f7a0743cfbf9203 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 3 Jan 2025 03:39:49 +0530 Subject: [PATCH 75/80] Fix bug in docker container detect --- script/run-docker-container/customize.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 2c3a5789e..9b3d26db6 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -69,9 +69,9 @@ def preprocess(i): 'error': 'Unexpected error occurred with docker run:\n{}'.format(e) } - out_json = json.loads(out) - if len(out_json) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', + if len(out) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in ["1", "true", "yes"]: # container exists + out_json = json.loads(out) existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id From 53ebc8d1c0a325af09a1473c1a845fbff5acc29c Mon Sep 17 00:00:00 2001 From: mlcommons-bot Date: Thu, 2 Jan 2025 22:10:09 +0000 Subject: [PATCH 76/80] [Automated Commit] Format Codebase --- script/run-docker-container/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 9b3d26db6..9bcb13037 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -70,7 +70,7 @@ def preprocess(i): } if len(out) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', - '')).lower() in ["1", "true", "yes"]: # container exists + '')).lower() in ["1", "true", "yes"]: # container exists out_json = json.loads(out) existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") From e20bcaef3b6e15df2cac02db1a677ae775751ea0 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 2 Jan 2025 22:17:44 +0000 Subject: [PATCH 77/80] Update format.yml --- .github/workflows/format.yml | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index f11721be2..dbf9a78bb 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -3,7 +3,7 @@ name: "Code formatting" on: push: branches: - - "**" + - "**" env: python_version: "3.9" @@ -12,16 +12,25 @@ jobs: format-code: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: Retrieve secrets from Keeper + id: ksecrets + uses: Keeper-Security/ksm-action@master + with: + keeper-secret-config: ${{ secrets.KSM_CONFIG }} + secrets: |- + v2h4jKiZlJywDSoKzRMnRw/field/Access Token > env:PAT # Fetch PAT and store in environment variable + + - name: Checkout code + uses: actions/checkout@v4 with: fetch-depth: 0 - ssh-key: ${{ secrets.DEPLOY_KEY }} + - name: Set up Python ${{ env.python_version }} uses: actions/setup-python@v3 with: python-version: ${{ env.python_version }} - - name: Format modified python files + - name: Format modified Python files env: filter: ${{ github.event.before }} run: | @@ -48,7 +57,9 @@ jobs: fi done - - name: Commit and Push + - name: Commit and push changes + env: + PAT: ${{ env.PAT }} # Use PAT fetched from Keeper run: | HAS_CHANGES=$(git diff --staged --name-only) if [ ${#HAS_CHANGES} -gt 0 ]; then @@ -56,6 +67,6 @@ jobs: git config --global user.email "mlcommons-bot@users.noreply.github.com" # Commit changes git commit -m '[Automated Commit] Format Codebase' - git push - - fi + # Use the PAT to push changes + git push https://x-access-token:${PAT}@github.com/${{ github.repository }} HEAD:${{ github.ref_name }} + fi From 9825d35d0c416960b13b3326ba06885ba470fd05 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 2 Jan 2025 22:41:03 +0000 Subject: [PATCH 78/80] Fix SUT name update in mlperf-inference-submission-generation (#96) * Remove ulimit memlock option for nvidia-mlperf-inference * Added an option to override SUT name during submission generation, turn on checker by default --- script/generate-mlperf-inference-submission/_cm.yaml | 4 ++-- script/generate-mlperf-inference-submission/customize.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/script/generate-mlperf-inference-submission/_cm.yaml b/script/generate-mlperf-inference-submission/_cm.yaml index 5e43dd787..8dad87f25 100644 --- a/script/generate-mlperf-inference-submission/_cm.yaml +++ b/script/generate-mlperf-inference-submission/_cm.yaml @@ -101,9 +101,9 @@ post_deps: - 'yes' - true tags: preprocess,mlperf,submission -- enable_if_env: +- skip_if_env: CM_RUN_SUBMISSION_CHECKER: - - 'yes' + - 'no' names: - mlperf-inference-submission-checker - submission-checker diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py index 73cd70052..d3ab67a2b 100644 --- a/script/generate-mlperf-inference-submission/customize.py +++ b/script/generate-mlperf-inference-submission/customize.py @@ -190,7 +190,7 @@ def generate_submission(env, state, inp, submission_division): result_path, 'system_meta.json') # checks for json file containing system meta sut_info = { - "hardware_name": None, + "system_name": None, "implementation": None, "device": None, "framework": None, @@ -276,7 +276,7 @@ def generate_submission(env, state, inp, submission_division): {model: returned_model_name}) if check_dict_filled(sut_info.keys(), sut_info): - system = sut_info["hardware_name"] + system = env.get('CM_HW_NAME', sut_info["system_name"]) implementation = sut_info["implementation"] device = sut_info["device"] framework = sut_info["framework"].replace(" ", "_") From 7be8b1c68f773ba53218e941d1f713314b3821da Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Thu, 2 Jan 2025 22:47:06 +0000 Subject: [PATCH 79/80] Update format.yml --- .github/workflows/format.yml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index dbf9a78bb..e1490bc0f 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -12,14 +12,6 @@ jobs: format-code: runs-on: ubuntu-latest steps: - - name: Retrieve secrets from Keeper - id: ksecrets - uses: Keeper-Security/ksm-action@master - with: - keeper-secret-config: ${{ secrets.KSM_CONFIG }} - secrets: |- - v2h4jKiZlJywDSoKzRMnRw/field/Access Token > env:PAT # Fetch PAT and store in environment variable - - name: Checkout code uses: actions/checkout@v4 with: @@ -58,8 +50,6 @@ jobs: done - name: Commit and push changes - env: - PAT: ${{ env.PAT }} # Use PAT fetched from Keeper run: | HAS_CHANGES=$(git diff --staged --name-only) if [ ${#HAS_CHANGES} -gt 0 ]; then @@ -68,5 +58,5 @@ jobs: # Commit changes git commit -m '[Automated Commit] Format Codebase' # Use the PAT to push changes - git push https://x-access-token:${PAT}@github.com/${{ github.repository }} HEAD:${{ github.ref_name }} + git push fi From 62ed33df388ee0527d17ab3388d43459bd64e058 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Fri, 3 Jan 2025 15:41:40 +0000 Subject: [PATCH 80/80] Added submit-mlperf-results CM script for automatic mlperf result submissions (#98) * Update format.yml * Added submit-mlperf-results script for auto upload of mlperf results * [Automated Commit] Format Codebase --- script/submit-mlperf-results/_cm.yaml | 22 +++ script/submit-mlperf-results/customize.py | 194 ++++++++++++++++++++++ 2 files changed, 216 insertions(+) create mode 100644 script/submit-mlperf-results/_cm.yaml create mode 100644 script/submit-mlperf-results/customize.py diff --git a/script/submit-mlperf-results/_cm.yaml b/script/submit-mlperf-results/_cm.yaml new file mode 100644 index 000000000..b7a10ce83 --- /dev/null +++ b/script/submit-mlperf-results/_cm.yaml @@ -0,0 +1,22 @@ +alias: submit-mlperf-results +automation_alias: script +automation_uid: 5b4e0237da074764 +category: MLPerf benchmark support +default_env: + CM_MLPERF_SUBMISSION_URL: https://submissions-ui.mlcommons.org + +input_mapping: + input: CM_MLPERF_SUBMISSION_FILE + submitter_id: CM_MLPERF_SUBMITTER_ID +tags: +- submit +- mlperf +- results +- mlperf-results +- publish-results +- submission +uid: cc01f0a82bef4216 +variations: + inference: + env: + CM_MLPERF_BENCHMARK: "Inference" diff --git a/script/submit-mlperf-results/customize.py b/script/submit-mlperf-results/customize.py new file mode 100644 index 000000000..d39b233f3 --- /dev/null +++ b/script/submit-mlperf-results/customize.py @@ -0,0 +1,194 @@ +import requests +from cmind import utils +import cmind as cm +import os +import json + + +def preprocess(i): + + os_info = i['os_info'] + env = i['env'] + meta = i['meta'] + automation = i['automation'] + + server = env['CM_MLPERF_SUBMISSION_URL'] + benchmark = env['CM_MLPERF_BENCHMARK'] + submitter_id = env['CM_MLPERF_SUBMITTER_ID'] + file_path = env['CM_MLPERF_SUBMISSION_FILE'] + + r = get_signed_url(server, benchmark, submitter_id, file_path) + if r['return'] > 0: + return r + + signed_url = r['signed_url'] + submission_id = r['submission_id'] + + # print(signed_url) + # print(submission_id) + r = upload_file_to_signed_url(file_path, signed_url) + if r['return'] > 0: + return r + + r = trigger_submission_checker( + server, submitter_id, benchmark, submission_id) + if r['return'] > 0: + return r + + return {'return': 0} + + +def get_signed_url(server, benchmark, submitter_id, file_path): + # Define the URL + url = f"{server}/index/url" + + # Define the headers + headers = { + "Content-Type": "application/json" + } + + # Define the payload + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "filename": file_path + } + + try: + # Make the POST request + response = requests.post(url, json=payload, headers=headers) + + # Check the response status + if response.status_code == 200: + # print("Request successful!") + # print("Response:", response.json()) + pass + else: + # print(f"Request failed with status code {response.status_code}") + # print("Response:", response.text) + pass + + except requests.exceptions.RequestException as e: + return {"return": 1, + "error": f"An error occurred in connecting to the server: {e}"} + + response_json = response.json() + # print(response_json) + # response = json.loads(response_json) + try: + signed_url = response_json['signed_url'] + submission_id = response_json['submission_id'] + except Exception as e: + return { + "return": 1, "error": f"An error occurred while processing the response: {e}"} + + return {'return': 0, 'signed_url': signed_url, + 'submission_id': submission_id} + + +def upload_file_to_signed_url(file_path, signed_url): + """ + Uploads a file to a signed URL using HTTP PUT. + + Parameters: + file_path (str): The path to the file you want to upload. + signed_url (str): The pre-signed URL for uploading the file. + + Returns: + dict: A dictionary with 'status_code' and 'response' keys. + """ + headers = { + 'Content-Type': 'application/octet-stream', + 'Access-Control-Allow-Headers': '*' + } + + try: + # Open the file in binary mode + with open(file_path, 'rb') as file: + response = requests.put( + signed_url, + data=file, + headers=headers + ) + + if response.status_code in [200, 201, 204]: + print("File uploaded successfully!") + return { + 'return': 0 + } + else: + print( + f"Failed to upload file. Status code: {response.status_code}") + print("Response:", response.text) + + return { + 'return': response.status_code, + 'error': response.text + } + + except FileNotFoundError: + print("Error: File not found.") + return { + 'return': 400, + 'error': f'''File {file_path} not found''' + } + + except requests.exceptions.RequestException as e: + print(f"Request failed: {e}") + return { + 'return': 500, + 'error': str(e) + } + + +def trigger_submission_checker( + server_url, submitter_id, benchmark, submission_id): + """ + Sends a POST request with URL-encoded form data. + + Parameters: + server_url (str): The server endpoint URL (e.g., https://example.com/index). + submitter_id (str): The ID of the submitter. + benchmark (str): The benchmark identifier. + submission_id (str): The submission ID. + + Returns: + dict: A dictionary containing status code and response content. + """ + url = f"{server_url}/index" + headers = { + "Content-Type": "application/x-www-form-urlencoded" + } + payload = { + "submitter_id": submitter_id, + "benchmark": benchmark, + "submission_id": submission_id + } + + try: + # Make the POST request with URL-encoded data + response = requests.post(url, data=payload, headers=headers) + + if response.ok: + print("Submission Check Request successful!") + pass + else: + print( + f"Submission Check Request failed with status code: {response.status_code}") + print("Response:", response.text) + + return { + "return": 0, + "response": response.text + } + + except requests.exceptions.RequestException as e: + print("An error occurred:", e) + return { + "return": 500, + "error": str(e) + } + + +def postprocess(i): + return {'return': 0}