From a6dcd31a1c2d8fec5f6a45a9385f1adc88c95b11 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 25 Jan 2025 20:45:48 +0000 Subject: [PATCH 01/16] Update test-mlperf-inference-abtf-poc.yml | Run docker from PR branch --- .github/workflows/test-mlperf-inference-abtf-poc.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml index 026db0b14..507223512 100644 --- a/.github/workflows/test-mlperf-inference-abtf-poc.yml +++ b/.github/workflows/test-mlperf-inference-abtf-poc.yml @@ -18,7 +18,7 @@ jobs: python-version: [ "3.8", "3.12" ] backend: [ "pytorch" ] implementation: [ "python" ] - docker: [ "", " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" ] + docker: [ "", " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" ] extra-args: [ "--adr.compiler.tags=gcc", "--env.MLC_MLPERF_LOADGEN_BUILD_FROM_SRC=off" ] exclude: - os: ubuntu-24.04 @@ -28,16 +28,16 @@ jobs: - os: windows-latest extra-args: "--adr.compiler.tags=gcc" - os: windows-latest - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" # windows docker image is not supported in CM yet - os: macos-latest python-version: "3.8" - os: macos-13 python-version: "3.8" - os: macos-latest - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" - os: macos-13 - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" steps: - uses: actions/checkout@v3 From d2e1f4efb243504699817ca4addf1c32c393c332 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sat, 25 Jan 2025 21:30:01 +0000 Subject: [PATCH 02/16] Fixes for docker mounts (#150) * Conditionally enable mitten from src * Fix docker mounts, docker_input_mapping -> input_mapping * Fix ENV string for mounts * Update userid if the docker user is existing --- .github/workflows/check-broken-links.yml | 1 + .../workflows/test-mlc-script-features.yml | 1 + automation/script/docker.py | 40 +++-- automation/script/docker_utils.py | 142 +++++++++++++++--- automation/script/module.py | 9 -- script/app-mlperf-inference-nvidia/meta.yaml | 9 +- script/app-mlperf-inference/meta.yaml | 4 +- script/build-docker-image/customize.py | 13 +- script/build-dockerfile/customize.py | 2 +- .../meta.yaml | 2 +- script/convert-csv-to-md/meta.yaml | 1 - .../meta.yaml | 2 +- script/get-docker/meta.yaml | 1 - script/get-nvidia-docker/meta.yaml | 1 - .../install-mlperf-logging-from-src/meta.yaml | 2 - .../run.sh | 2 +- script/run-docker-container/meta.yaml | 1 + script/run-mlperf-inference-app/meta.yaml | 2 +- .../meta.yaml | 2 +- script/set-device-settings-qaic/meta.yaml | 1 - script/set-performance-mode/meta.yaml | 1 - 21 files changed, 166 insertions(+), 73 deletions(-) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index aaaf83d14..8a192b29a 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -8,6 +8,7 @@ on: jobs: markdown-link-check: runs-on: ubuntu-latest + # check out the latest version of the code steps: - uses: actions/checkout@v4 diff --git a/.github/workflows/test-mlc-script-features.yml b/.github/workflows/test-mlc-script-features.yml index 5e67059a5..693b23da0 100644 --- a/.github/workflows/test-mlc-script-features.yml +++ b/.github/workflows/test-mlc-script-features.yml @@ -71,6 +71,7 @@ jobs: mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet - name: Run MLPerf Inference Retinanet with native and virtual Python + if: runner.os == 'linux' run: | mlcr --tags=app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu --adr.python.version_min=3.8 --adr.compiler.tags=gcc --adr.openimages-preprocessed.tags=_50 --scenario=Offline --mode=accuracy --test_query_count=10 --rerun --quiet diff --git a/automation/script/docker.py b/automation/script/docker.py index d5b179943..9852937c1 100644 --- a/automation/script/docker.py +++ b/automation/script/docker.py @@ -213,12 +213,7 @@ def docker_run(self_module, i): env = i.get('env', {}) regenerate_docker_file = not i.get('docker_noregenerate', False) - recreate_docker_image = i.get('docker_recreate', False) - - if is_true(i.get('docker_skip_build', False)): - regenerate_docker_file = False - recreate_docker_image = False - env['MLC_DOCKER_SKIP_BUILD'] = 'yes' + rebuild_docker_image = i.get('docker_rebuild', False) # Prune unnecessary Docker-related input keys r = prune_input({'input': i, 'extra_keys_starts_with': ['docker_']}) @@ -269,7 +264,10 @@ def docker_run(self_module, i): 'alias', ''), meta.get( 'uid', '') - mounts = copy.deepcopy(i.get('docker_mounts', [])) + mounts = copy.deepcopy( + i.get( + 'docker_mounts', + [])) # do we need a copy here? variations = meta.get('variations', {}) docker_settings = meta.get('docker', {}) state['docker'] = docker_settings @@ -334,11 +332,6 @@ def docker_run(self_module, i): if r['return'] > 0: return r - # Handle environment variable-based mounts - mounts = process_mounts(mounts, env, i, docker_settings) - if mounts is None: - return {'return': 1, 'error': 'Error processing mounts'} - # Prepare Docker-specific inputs docker_inputs, dockerfile_path = prepare_docker_inputs( i, docker_settings, script_path, True) @@ -346,6 +339,25 @@ def docker_run(self_module, i): if docker_inputs is None: return {'return': 1, 'error': 'Error preparing Docker inputs'} + docker_input_mapping = docker_settings.get('input_mapping') + + # Update env based on docker_input_mapping if they are in input + if docker_input_mapping and i: + env.update({docker_input_mapping[key]: i[key] + for key in docker_input_mapping if key in i}) + + # Handle environment variable-based mounts + res = process_mounts(mounts, env, docker_settings, f_run_cmd) + if res['return'] > 0: + return res + docker_inputs['mounts'] = res['mounts'] + container_env_string = res['container_env_string'] + + res = update_docker_environment( + docker_settings, env, container_env_string) + if res['return'] > 0: + return res + # Generate the run command r = regenerate_script_cmd({'script_uid': script_uid, 'script_alias': script_alias, @@ -353,12 +365,12 @@ def docker_run(self_module, i): 'run_cmd': f_run_cmd}) if r['return'] > 0: return r - final_run_cmd = r['run_cmd_string'] + final_run_cmd = f"""{r['run_cmd_string']} {container_env_string} --docker_run_deps """ # Execute the Docker container mlc_docker_input = { 'action': 'run', 'automation': 'script', 'tags': 'run,docker,container', - 'recreate': recreate_docker_image, + 'rebuild': rebuild_docker_image, 'env': env, 'mounts': mounts, 'script_tags': i.get('tags'), 'run_cmd': final_run_cmd, 'v': verbose, 'quiet': True, 'real_run': True, 'add_deps_recursive': {'build-docker-image': {'dockerfile': dockerfile_path}}, diff --git a/automation/script/docker_utils.py b/automation/script/docker_utils.py index 2d70a68f4..49f025cf4 100644 --- a/automation/script/docker_utils.py +++ b/automation/script/docker_utils.py @@ -7,7 +7,7 @@ import copy -def process_mounts(mounts, env, i, docker_settings): +def process_mounts(mounts, env, docker_settings, f_run_cmd): """ Processes and updates the Docker mounts based on the provided inputs and environment variables. @@ -20,21 +20,71 @@ def process_mounts(mounts, env, i, docker_settings): Returns: Updated mounts list or None in case of an error. """ - try: - # Add mounts specified via `env` variables - for mount_key in docker_settings.get('env_mounts', []): - mount_path = env.get(mount_key, '') - if mount_path: - mounts.append(mount_path) - - # Include user-specified additional mounts - if 'docker_additional_mounts' in i: - mounts.extend(i['docker_additional_mounts']) - - return mounts - except Exception as e: - logging.error(f"Error processing mounts: {e}") - return None + if 'mounts' in docker_settings: + mounts.extend(docker_settings['mounts']) + + docker_input_mapping = docker_settings.get("input_mapping", {}) + container_env_string = "" + + for index in range(len(mounts)): + mount = mounts[index] + + # Locate the last ':' to separate the mount into host and container + # paths + j = mount.rfind(':') + if j <= 0: + return { + 'return': 1, + 'error': f"Can't find separator ':' in the mount string: {mount}" + } + + host_mount, container_mount = mount[:j], mount[j + 1:] + new_host_mount = host_mount + new_container_mount = container_mount + host_env_key, container_env_key = None, str(container_mount) + + # Process host mount for environment variables + host_placeholders = re.findall(r'\${{ (.*?) }}', host_mount) + if host_placeholders: + for placeholder in host_placeholders: + if placeholder in env: + host_env_key = placeholder + new_host_mount = get_host_path(env[placeholder]) + else: # Skip mount if variable is missing + mounts[index] = None + break + + # Process container mount for environment variables + container_placeholders = re.findall(r'\${{ (.*?) }}', container_mount) + if container_placeholders: + for placeholder in container_placeholders: + if placeholder in env: + new_container_mount, container_env_key = get_container_path( + env[placeholder]) + else: # Skip mount if variable is missing + mounts[index] = None + break + + # Skip further processing if the mount was invalid + if mounts[index] is None: + continue + + # Update mount entry + mounts[index] = f"{new_host_mount}:{new_container_mount}" + + # Update container environment string and mappings + if host_env_key: + container_env_string += f" --env.{host_env_key}={container_env_key} " + for key, value in docker_input_mapping.items(): + if value == host_env_key: + i[key] = container_env_key + f_run_cmd[key] = container_env_key + + # Remove invalid mounts and construct mount string + mounts = [item for item in mounts if item is not None] + + return {'return': 0, 'mounts': mounts, + 'container_env_string': container_env_string} def prepare_docker_inputs(input_params, docker_settings, @@ -61,7 +111,7 @@ def prepare_docker_inputs(input_params, docker_settings, keys += [ "skip_run_cmd", "pre_run_cmds", "run_cmd_prefix", "all_gpus", "num_gpus", "device", "gh_token", "port_maps", "shm_size", "pass_user_id", "pass_user_group", "extra_run_args", "detached", "interactive", - "dt", "it" + "dt", "it", "use_host_group_id", "use_host_user_id" ] # Collect Dockerfile inputs docker_inputs = { @@ -102,7 +152,57 @@ def prepare_docker_inputs(input_params, docker_settings, return docker_inputs, dockerfile_path -def update_docker_paths(path, mounts=None, force_target_path=''): +def update_docker_environment(docker_settings, env, container_env_string): + """ + Updates the Docker environment variables and build arguments. + + Args: + docker_settings (dict): Docker configuration settings. + env (dict): The environment dictionary to update. + container_env_string (str): A string to store Docker container environment variable options. + + Returns: + dict: A dictionary with a return code indicating success or failure. + """ + # Define proxy-related environment variable keys to propagate + proxy_keys = [ + "ftp_proxy", "FTP_PROXY", + "http_proxy", "HTTP_PROXY", + "https_proxy", "HTTPS_PROXY", + "no_proxy", "NO_PROXY", + "socks_proxy", "SOCKS_PROXY", + "GH_TOKEN" + ] + + # Ensure the '+ CM_DOCKER_BUILD_ARGS' key exists in the environment + if '+ MLC_DOCKER_BUILD_ARGS' not in env: + env['+ MLC_DOCKER_BUILD_ARGS'] = [] + + # Add proxy environment variables to Docker build arguments and container + # environment string + for proxy_key in proxy_keys: + proxy_value = os.environ.get(proxy_key) + if proxy_value: + container_env_string += f" --env.{proxy_key}={proxy_value} " + env['+ MLC_DOCKER_BUILD_ARGS'].append(f"{proxy_key}={proxy_value}") + + # Add host group ID if specified in the Docker settings and not on Windows + if not is_false(docker_settings.get('pass_group_id')) and os.name != 'nt': + env['+ MLC_DOCKER_BUILD_ARGS'].append( + f"GID=\\\" $(id -g $USER) \\\"" + ) + + # Add host user ID if specified in the Docker settings and not on Windows + if not is_false(docker_settings.get( + 'use_host_user_id')) and os.name != 'nt': + env['+ MLC_DOCKER_BUILD_ARGS'].append( + f"UID=\\\" $(id -u $USER) \\\"" + ) + + return {'return': 0} + + +def update_container_paths(path, mounts=None, force_target_path=''): """ Update and return the absolute paths for a given host path and its container equivalent. Optionally updates a mounts list with the mapping of host and container paths. @@ -275,7 +375,9 @@ def get_docker_default(key): "skip_run_cmd": False, "pre_run_cmds": [], "run_cmd_prefix": '', - "port_maps": [] + "port_maps": [], + "use_host_user_id": True, + "use_host_group_id": True, } if key in defaults: return defaults[key] @@ -317,5 +419,5 @@ def get_container_path(value): new_path_split2 = new_path_split + path_split[repo_entry_index:] return "/".join(new_path_split1), "/".join(new_path_split2) else: - orig_path, target_path = update_path_for_docker(path=value) + orig_path, target_path = update_container_paths(path=value) return target_path, target_path diff --git a/automation/script/module.py b/automation/script/module.py index 6728d5136..d1c1812fe 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -823,10 +823,6 @@ def _run(self, i): posthook_deps = meta.get('posthook_deps', []) input_mapping = meta.get('input_mapping', {}) docker_settings = meta.get('docker') - docker_input_mapping = {} - if docker_settings: - docker_input_mapping = docker_settings.get( - 'docker_input_mapping', {}) new_env_keys_from_meta = meta.get('new_env_keys', []) new_state_keys_from_meta = meta.get('new_state_keys', []) @@ -6058,11 +6054,6 @@ def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps new_docker_settings = meta.get('docker') if new_docker_settings: docker_settings = state.get('docker', {}) - # docker_input_mapping = docker_settings.get('docker_input_mapping', {}) - # new_docker_input_mapping = new_docker_settings.get('docker_input_mapping', {}) - # if new_docker_input_mapping: - # # update_env_from_input_mapping(env, i['input'], docker_input_mapping) - # utils.merge_dicts({'dict1':docker_input_mapping, 'dict2':new_docker_input_mapping, 'append_lists':True, 'append_unique':True}) utils.merge_dicts({'dict1': docker_settings, 'dict2': new_docker_settings, 'append_lists': True, diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 473c336c6..eac133277 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -281,15 +281,14 @@ deps: enable_if_env: MLC_RUN_STATE_DOCKER: - 'yes' - - True - - 'True' - tags: get,nvidia,mitten skip_if_env: MLC_RUN_STATE_DOCKER: - 'yes' - - True - - 'True' + enable_if_env: + MLC_NVIDIA_MITTEN_FROM_SRC: + - 'yes' prehook_deps: ######################################################################## @@ -446,7 +445,7 @@ variations: group: model env: MLC_MODEL: stable-diffusion-xl - MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174" + MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/mlperf-automations/blob/main/script/get-ml-model-stable-diffusion/meta.yaml" MLC_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion" MLC_ML_MODEL_INPUTS_DATA_TYPE: int32 MLC_ML_MODEL_WEIGHTS_DATA_TYPE: int8 diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 6732b193a..3efd5120d 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -519,7 +519,7 @@ variations: os: ubuntu real_run: false run: true - docker_input_mapping: + input_mapping: criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH dlrm_data_path: DLRM_DATA_PATH intel_gptj_int8_model_path: MLC_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH @@ -1905,7 +1905,7 @@ docker: mlc_repo_branch: dev real_run: False os_version: '22.04' - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index be7c33035..acf660261 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -1,6 +1,7 @@ from mlc import utils import os from os.path import exists +from utils import * def preprocess(i): @@ -50,7 +51,7 @@ def preprocess(i): if env.get("MLC_DOCKER_IMAGE_TAG", "") == '': env['MLC_DOCKER_IMAGE_TAG'] = "latest" - if str(env.get("MLC_DOCKER_CACHE", "yes")).lower() in ["no", "false", "0"]: + if is_false(env.get("MLC_DOCKER_CACHE", "True")): env["MLC_DOCKER_CACHE_ARG"] = " --no-cache" CMD = '' @@ -82,13 +83,8 @@ def preprocess(i): CMD = ''.join(XCMD) - print('================================================') - print('CM generated the following Docker build command:') - print('') print(CMD) - print('') - env['MLC_DOCKER_BUILD_CMD'] = CMD return {'return': 0} @@ -108,7 +104,7 @@ def postprocess(i): env = i['env'] # Check if need to push docker image to the Docker Hub - if env.get('MLC_DOCKER_PUSH_IMAGE', '') in ['True', True, 'yes']: + if is_true(env.get('MLC_DOCKER_PUSH_IMAGE', '')): image_name = get_image_name(env) # Prepare CMD to build image @@ -122,9 +118,6 @@ def postprocess(i): with open(dockerfile_path + '.build.bat', 'w') as f: f.write(PCMD + '\n') - print('================================================') - print('CM generated the following Docker push command:') - print('') print(PCMD) print('') diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 23e7891dc..50a4c987c 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -264,7 +264,7 @@ def preprocess(i): DOCKER_GROUP = "-g $GID -o" user_shell = json.loads(shell) - f.write(f"""RUN id -u {docker_user} > /dev/null 2>&1 || useradd """ + DOCKER_USER_ID + DOCKER_GROUP + ' --create-home --shell ' + user_shell[0] + ' ' + f.write(f"""RUN (id -u {docker_user} > /dev/null 2>&1 && usermod -u $UID {docker_user}) || useradd """ + DOCKER_USER_ID + DOCKER_GROUP + ' --create-home --shell ' + user_shell[0] + ' ' + docker_user + EOL) f.write( 'RUN echo "' + diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index b5ca1340b..884d75348 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -331,7 +331,7 @@ docker: interactive: True os_version: '20.04' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH diff --git a/script/convert-csv-to-md/meta.yaml b/script/convert-csv-to-md/meta.yaml index e1ed6f82e..74413ee56 100644 --- a/script/convert-csv-to-md/meta.yaml +++ b/script/convert-csv-to-md/meta.yaml @@ -14,7 +14,6 @@ deps: - names: - tabulate tags: get,generic-python-lib,_package.tabulate -docker_input_mapping: {} input_description: {} input_mapping: csv_file: MLC_CSV_FILE diff --git a/script/generate-mlperf-inference-submission/meta.yaml b/script/generate-mlperf-inference-submission/meta.yaml index 51f45bba7..e36381834 100644 --- a/script/generate-mlperf-inference-submission/meta.yaml +++ b/script/generate-mlperf-inference-submission/meta.yaml @@ -45,7 +45,7 @@ docker: MLC_MLPERF_INFERENCE_SUBMISSION_BASE_DIR: - 'on' tags: get,mlperf,inference,submission,dir,local - docker_input_mapping: + input_mapping: results_dir: MLC_MLPERF_INFERENCE_RESULTS_DIR_ submission_base_dir: MLC_MLPERF_INFERENCE_SUBMISSION_BASE_DIR extra_run_args: ' --cap-add SYS_ADMIN' diff --git a/script/get-docker/meta.yaml b/script/get-docker/meta.yaml index b3a5f1f89..e26954685 100644 --- a/script/get-docker/meta.yaml +++ b/script/get-docker/meta.yaml @@ -5,7 +5,6 @@ cache: true category: Detection or installation of tools and artifacts deps: - tags: detect,os -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ diff --git a/script/get-nvidia-docker/meta.yaml b/script/get-nvidia-docker/meta.yaml index 303124799..39cf8d146 100644 --- a/script/get-nvidia-docker/meta.yaml +++ b/script/get-nvidia-docker/meta.yaml @@ -6,7 +6,6 @@ category: Detection or installation of tools and artifacts deps: - tags: detect,os - tags: get,docker -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [] diff --git a/script/install-mlperf-logging-from-src/meta.yaml b/script/install-mlperf-logging-from-src/meta.yaml index a41c3de26..bf74707b1 100644 --- a/script/install-mlperf-logging-from-src/meta.yaml +++ b/script/install-mlperf-logging-from-src/meta.yaml @@ -12,8 +12,6 @@ deps: extra_cache_tags: mlperf_logging env: MLC_GIT_CHECKOUT_PATH_ENV_NAME: MLC_MLPERF_LOGGING_REPO_PATH -docker_input_mapping: -input_description: new_env_keys: - MLC_MLPERF_LOGGING_REPO_PATH new_state_keys: [] diff --git a/script/push-mlperf-inference-results-to-github/run.sh b/script/push-mlperf-inference-results-to-github/run.sh index a2d07b80a..53a297cf9 100644 --- a/script/push-mlperf-inference-results-to-github/run.sh +++ b/script/push-mlperf-inference-results-to-github/run.sh @@ -21,6 +21,6 @@ git commit -a -m "${MLC_MLPERF_RESULTS_REPO_COMMIT_MESSAGE}" echo ${MLC_GIT_PUSH_CMD} ${MLC_GIT_PUSH_CMD} -test $? -eq 0 || (sleep $((RANDOM % 200 + 1)) && git pull && ${MLC_GIT_PUSH_CMD}) +test $? -eq 0 || (sleep $((RANDOM % 200 + 1)) && git pull --rebase && ${MLC_GIT_PUSH_CMD}) test $? -eq 0 || exit $? diff --git a/script/run-docker-container/meta.yaml b/script/run-docker-container/meta.yaml index e18855026..f6f3d19f0 100644 --- a/script/run-docker-container/meta.yaml +++ b/script/run-docker-container/meta.yaml @@ -52,6 +52,7 @@ input_mapping: pre_run_cmds: MLC_DOCKER_PRE_RUN_COMMANDS real_run: MLC_REAL_RUN recreate: MLC_DOCKER_IMAGE_RECREATE + rebuild: MLC_DOCKER_IMAGE_RECREATE run_cmd: MLC_DOCKER_RUN_CMD run_cmd_extra: MLC_DOCKER_RUN_CMD_EXTRA save_script: MLC_DOCKER_SAVE_SCRIPT diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index c2f64bb3f..9dc4408d6 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -174,7 +174,7 @@ docker_off: real_run: false run: true interactive: true - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH diff --git a/script/run-mlperf-inference-mobilenet-models/meta.yaml b/script/run-mlperf-inference-mobilenet-models/meta.yaml index b87bf4896..df19cbbfd 100644 --- a/script/run-mlperf-inference-mobilenet-models/meta.yaml +++ b/script/run-mlperf-inference-mobilenet-models/meta.yaml @@ -11,7 +11,7 @@ default_env: deps: - tags: get,sys-utils-cm docker: - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH results_dir: RESULTS_DIR submission_dir: SUBMISSION_DIR diff --git a/script/set-device-settings-qaic/meta.yaml b/script/set-device-settings-qaic/meta.yaml index 1599f0067..86caaad46 100644 --- a/script/set-device-settings-qaic/meta.yaml +++ b/script/set-device-settings-qaic/meta.yaml @@ -8,7 +8,6 @@ default_env: deps: - tags: detect-os - tags: get,qaic,platform,sdk -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: diff --git a/script/set-performance-mode/meta.yaml b/script/set-performance-mode/meta.yaml index 9d954ce60..0ead7bc75 100644 --- a/script/set-performance-mode/meta.yaml +++ b/script/set-performance-mode/meta.yaml @@ -6,7 +6,6 @@ category: DevOps automation deps: - tags: detect-os - tags: detect-cpu -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: From 1fcac9474fcfca2363668bcf662d115d8421a21b Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 00:29:39 +0000 Subject: [PATCH 03/16] Retry github push on failure on Windows for mlperf results * Update run.bat --- .github/workflows/check-broken-links.yml | 1 + script/push-mlperf-inference-results-to-github/run.bat | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index 8a192b29a..06034c32a 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -13,6 +13,7 @@ jobs: steps: - uses: actions/checkout@v4 + # Checks the status of hyperlinks in .md files in verbose mode - name: Check links uses: gaurav-nelson/github-action-markdown-link-check@v1 diff --git a/script/push-mlperf-inference-results-to-github/run.bat b/script/push-mlperf-inference-results-to-github/run.bat index 385235737..923218f9e 100644 --- a/script/push-mlperf-inference-results-to-github/run.bat +++ b/script/push-mlperf-inference-results-to-github/run.bat @@ -28,8 +28,12 @@ git commit -a -m "%MLC_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" if defined MLC_MLPERF_INFERENCE_SUBMISSION_DIR call %MLC_SET_REMOTE_URL_CMD% -echo "%MLC_GIT_PUSH_CMD%" -%MLC_GIT_PUSH_CMD% +@if errorlevel 1 ( + timeout /t %random:~0,3% /nobreak > nul + git pull --rebase + %MLC_GIT_PUSH_CMD% +) + REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% From ff187818e784fca5a417a0134d83074a84113714 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 02:50:53 +0000 Subject: [PATCH 04/16] Fixes for nvidia-mlperf-inference (#152) * Added variation for nvidia scratch space * Fix docker_utils import in script module --- automation/script/docker_utils.py | 2 +- automation/script/module.py | 2 +- script/app-mlperf-inference/meta.yaml | 9 +++++++++ script/build-dockerfile/meta.yaml | 1 + script/build-mlperf-inference-server-nvidia/meta.yaml | 4 ++-- .../get-mlperf-inference-nvidia-scratch-space/meta.yaml | 6 +++++- 6 files changed, 19 insertions(+), 5 deletions(-) diff --git a/automation/script/docker_utils.py b/automation/script/docker_utils.py index 49f025cf4..6379c515f 100644 --- a/automation/script/docker_utils.py +++ b/automation/script/docker_utils.py @@ -104,7 +104,7 @@ def prepare_docker_inputs(input_params, docker_settings, keys = [ "mlc_repo", "mlc_repo_branch", "base_image", "os", "os_version", "mlc_repos", "skip_mlc_sys_upgrade", "extra_sys_deps", - "gh_token", "fake_run_deps", "run_final_cmds", "real_run", "copy_files", "path" + "gh_token", "fake_run_deps", "run_final_cmds", "real_run", "copy_files", "path", "user" ] if run_stage: diff --git a/automation/script/module.py b/automation/script/module.py index d1c1812fe..b4141bbd8 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4950,7 +4950,7 @@ def find_cached_script(i): # TODO Need to restrict the below check to within container # env i['tmp_dep_cached_path'] = dependent_cached_path - import script.docker_utils + from script import docker_utils r = docker_utils.utils.get_container_path_script(i) if not os.path.exists(r['value_env']): # Need to rm this cache entry diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 3efd5120d..a8381c323 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -1671,6 +1671,8 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 + nvidia-scratch-space: + tags: _version.4_0-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1709,6 +1711,8 @@ variations: tags: _mlcommons intel-harness: tags: _v4.0 + nvidia-scratch-space: + tags: _version.4_1-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1725,6 +1729,8 @@ variations: tags: _go intel-harness: tags: _v4.1 + nvidia-scratch-space: + tags: _version.4_1 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1746,6 +1752,8 @@ variations: tags: _v4.1 inference-src: version: r5.0 + nvidia-scratch-space: + tags: _version.5.0-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1901,6 +1909,7 @@ docker: interactive: True extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu + user: mlcuser mlc_repo: mlcommons@mlperf-automations mlc_repo_branch: dev real_run: False diff --git a/script/build-dockerfile/meta.yaml b/script/build-dockerfile/meta.yaml index 0c85f1606..68ea20689 100644 --- a/script/build-dockerfile/meta.yaml +++ b/script/build-dockerfile/meta.yaml @@ -56,6 +56,7 @@ input_mapping: skip_mlc_sys_upgrade: MLC_DOCKER_SKIP_MLC_SYS_UPGRADE push_image: MLC_DOCKER_PUSH_IMAGE docker_not_pull_update: MLC_DOCKER_NOT_PULL_UPDATE + user: MLC_DOCKER_USER new_env_keys: - MLC_DOCKERFILE_* diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index 884d75348..3530c9482 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -191,8 +191,6 @@ variations: add_deps_recursive: nvidia-inference-common-code: version: r4.0 - nvidia-scratch-space: - tags: _version.4_1 deps: - tags: get,generic,sys-util,_git-lfs - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 @@ -344,6 +342,8 @@ docker: scratch_path: MLPERF_SCRATCH_PATH deps: - tags: get,mlperf,inference,nvidia,scratch,space + names: + - nvidia-scratch-space - tags: get,mlperf,inference,results,dir,local - tags: get,mlperf,inference,submission,dir,local - tags: get,nvidia-docker diff --git a/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml b/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml index 826db4016..fbdd96c18 100644 --- a/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml +++ b/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml @@ -39,8 +39,12 @@ variations: MLC_NVIDIA_SCRATCH_SPACE_VERSION: '4_1' group: version version.4_1-dev: - default: true env: MLC_NVIDIA_SCRATCH_SPACE_VERSION: 4_1-dev group: version + version.5_0-dev: + default: true + env: + MLC_NVIDIA_SCRATCH_SPACE_VERSION: 5_0-dev + group: version versions: {} From 1ee225822d407cac5b7d8234da99b4ecae0f8fcd Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 02:52:12 +0000 Subject: [PATCH 05/16] Update module.py --- automation/script/module.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/automation/script/module.py b/automation/script/module.py index b4141bbd8..868178f49 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -4951,7 +4951,7 @@ def find_cached_script(i): # env i['tmp_dep_cached_path'] = dependent_cached_path from script import docker_utils - r = docker_utils.utils.get_container_path_script(i) + r = docker_utils.get_container_path_script(i) if not os.path.exists(r['value_env']): # Need to rm this cache entry skip_cached_script = True From 35b1da6a6b6933cb4634037a61af278385565326 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 03:10:49 +0000 Subject: [PATCH 06/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index cd6972d2f..44f4602e6 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) -[![MLC Script Automation Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) +[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) [![MLPerf Inference ABTF POC Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml) From 74b8238b4e4fba7a10db4f00667fe4fb7b3fc2a5 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 03:14:38 +0000 Subject: [PATCH 07/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 44f4602e6..52e265982 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) -[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) +[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml?cache-bust=1) [![MLPerf Inference ABTF POC Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml) From a7a4a22716d12ed05942e0607cd9fd74970209f9 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 03:15:39 +0000 Subject: [PATCH 08/16] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 52e265982..28731911f 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) -[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml?cache-bust=1) +[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg?cache-bust=1)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) [![MLPerf Inference ABTF POC Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml) From f05ab068eda776d54b0584809a9049e35d800e10 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 13:26:31 +0000 Subject: [PATCH 09/16] Update build_wheel.yml --- .github/workflows/build_wheel.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 85039c7d5..867f93586 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -9,7 +9,6 @@ on: - dev paths: - VERSION - - setup.py jobs: build_wheels: From 16006bb5881deb081afcbaafeef02f3ad0017131 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 13:51:39 +0000 Subject: [PATCH 10/16] Update README.md --- README.md | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index 28731911f..6ee549b1b 100644 --- a/README.md +++ b/README.md @@ -23,12 +23,9 @@ Starting **January 2025**, MLPerf automation scripts are built on the powerful [ ## 🧰 MLCFlow (MLC) Automations -Building on the foundation of its predecessor, the **Collective Mind (CM)** framework, MLCFlow takes ML workflows to the next level by streamlining complex tasks like Docker container management and caching. The `mlcflow` package, written in Python, provides seamless support through both a command-line interface (CLI) and an API, making it easy to access and manage automation scripts. - -### Core Automations -- **Script Automation** – Automates script execution across different environments. -- **Cache Management** – Manages reusable cached results to accelerate workflow processes. +Building upon the robust foundation of its predecessor, the Collective Mind (CM) framework, MLCFlow elevates machine learning workflows by simplifying complex tasks such as Docker container management and caching. Written in Python, the mlcflow package offers a versatile interface, supporting both a user-friendly command-line interface (CLI) and a flexible API for effortless automation script management. +At its core, MLCFlow relies on a single powerful automation, the Script, which is extended by two actions: CacheAction and DockerAction. Together, these components provide streamlined functionality to optimize and enhance your ML workflow automation experience. --- @@ -40,10 +37,17 @@ We welcome contributions from the community! To contribute: Your contributions help drive the project forward! + +--- + +## 💬 Join the Discussion +Connect with us on the [MLCommons Benchmark Infra Discord channel](https://discord.gg/T9rHVwQFNX) to engage in discussions about **MLCFlow** and **MLPerf Automations**. We’d love to hear your thoughts, questions, and ideas! + --- -## 📰 News -Stay tuned for upcoming updates and announcements. +## 📰 Stay Updated +Keep track of the latest development progress and tasks on our [MLPerf Automations Development Board](https://github.com/orgs/mlcommons/projects/50/views/7?sliceBy%5Bvalue%5D=_noValue). +Stay tuned for exciting updates and announcements! --- From d3a564b86931f44a852be889c3899a134199c5af Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:03:22 +0000 Subject: [PATCH 11/16] Update test-mlperf-inference-resnet50.yml --- .../test-mlperf-inference-resnet50.yml | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 84c16e306..85bcd3cc2 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -58,8 +58,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper - if: github.repository_owner == 'mlcommons' + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -69,7 +79,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} - if: github.repository_owner == 'mlcommons' + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" @@ -78,15 +88,4 @@ jobs: git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet - - name: Push Results - env: - GITHUB_TOKEN: ${{ secrets.PAT1 }} - if: github.repository_owner == 'gateoverflow' - run: | - git config --global user.name "mlcommons-bot" - git config --global user.email "mlcommons-bot@users.noreply.github.com" - git config --global credential.https://github.com.helper "" - git config --global credential.https://github.com.helper "!gh auth git-credential" - git config --global credential.https://gist.github.com.helper "" - git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + From bc88acd60a72218371b6fdbdc0506220a88ace01 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:04:31 +0000 Subject: [PATCH 12/16] Update test-mlperf-inference-retinanet.yml --- .../workflows/test-mlperf-inference-retinanet.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index ca7d8c980..373eef9b4 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -52,7 +52,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -62,6 +73,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" From b0a1711ce658c4512e465e1db707b4331ee3573d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:04:59 +0000 Subject: [PATCH 13/16] Update test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml --- ...erence-bert-deepsparse-tf-onnxruntime-pytorch.yml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 0cea2d152..13b4a0288 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -47,7 +47,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -57,6 +68,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" From 8d11fc79a38855271d12815d0f1664d51ac13328 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:07:49 +0000 Subject: [PATCH 14/16] Update test-mlperf-inference-mlcommons-cpp-resnet50.yml --- ...lperf-inference-mlcommons-cpp-resnet50.yml | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index 57be88642..5bbec09b8 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -48,12 +48,30 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=app,mlperf,inference,mlcommons,cpp --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi + - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' + id: ksecrets + uses: Keeper-Security/ksm-action@master + with: + keeper-secret-config: ${{ secrets.KSM_CONFIG }} + secrets: |- + ubwkjh-Ii8UJDpG2EoU6GQ/field/Access Token > env:PAT - name: Push Results - if: github.repository_owner == 'gateoverflow' env: - USER: "GitHub Action" - EMAIL: "admin@gateoverflow.com" - GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ env.PAT }} + USER: mlcommons-bot + EMAIL: mlcommons-bot@users.noreply.github.com + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "${{ env.USER }}" git config --global user.email "${{ env.EMAIL }}" From a9d299b7768b9c3d123a19d3afadf47d0787ac9e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:09:16 +0000 Subject: [PATCH 15/16] Update test-mlperf-inference-tvm-resnet50.yml --- .../test-mlperf-inference-tvm-resnet50.yml | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-mlperf-inference-tvm-resnet50.yml b/.github/workflows/test-mlperf-inference-tvm-resnet50.yml index 9ef9a8e22..044310dcc 100644 --- a/.github/workflows/test-mlperf-inference-tvm-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-tvm-resnet50.yml @@ -34,8 +34,19 @@ jobs: mlcr --quiet --tags=get,sys-utils-cm - name: Test MLC Tutorial TVM run: | - mlcr --tags=run-mlperf,inference,_submission,_short --adr.python.name=mlperf --adr.python.version_min=3.8 --submitter=Community --implementation=python --hw_name=default --model=resnet50 --backend=tvm-onnx --device=cpu --scenario=Offline --mode=accuracy --test_query_count=5 --clean --quiet ${{ matrix.extra-options }} + mlcr --tags=run-mlperf,inference,_submission,_short --adr.python.name=mlperf --adr.python.version_min=3.8 --submitter=MLCommons --implementation=python --hw_name=gh_ubuntu-latest --model=resnet50 --backend=tvm-onnx --device=cpu --scenario=Offline --mode=accuracy --test_query_count=5 --clean --quiet ${{ matrix.extra-options }} + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -45,9 +56,12 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + USER: mlcommons-bot + EMAIL: mlcommons-bot@users.noreply.github.com + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | - git config --global user.name "mlcommons-bot" - git config --global user.email "mlcommons-bot@users.noreply.github.com" + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" git config --global credential.https://github.com.helper "" git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" From f84d23738b1edab748d9b9e4004b995bc143886c Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Sun, 26 Jan 2025 14:34:52 +0000 Subject: [PATCH 16/16] Update test-mlc-script-features.yml --- .../workflows/test-mlc-script-features.yml | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-mlc-script-features.yml b/.github/workflows/test-mlc-script-features.yml index 693b23da0..898512b7e 100644 --- a/.github/workflows/test-mlc-script-features.yml +++ b/.github/workflows/test-mlc-script-features.yml @@ -35,12 +35,12 @@ jobs: - name: Test Python venv run: | - mlc run script --tags=install,python-venv --name=test --quiet + mlcr --tags=install,python-venv --name=test --quiet mlc search cache --tags=get,python,virtual,name-test --quiet - name: Test variations run: | - mlc run script --tags=get,dataset,preprocessed,imagenet,_NHWC --quiet + mlcr --tags=get,dataset,preprocessed,imagenet,_NHWC --quiet mlc search cache --tags=get,dataset,preprocessed,imagenet,-_NCHW mlc search cache --tags=get,dataset,preprocessed,imagenet,-_NHWC @@ -48,27 +48,28 @@ jobs: continue-on-error: true if: runner.os == 'linux' run: | - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet + mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet test $? -eq 0 || exit $? - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.2 --quiet + mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.2 --quiet test $? -eq 0 || exit $? - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet --only_execute_from_cache=True - test $? -eq 0 || exit 0 + # Need to add find cache here + # mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet --only_execute_from_cache=True + # test $? -eq 0 || exit 0 - name: Test python install from src run: | - mlc run script --tags=python,src,install,_shared --version=3.9.10 --quiet + mlcr --tags=python,src,install,_shared --version=3.9.10 --quiet mlc search cache --tags=python,src,install,_shared,version-3.9.10 - name: Run docker container from dockerhub on linux if: runner.os == 'linux' run: | - mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=cm-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=cknowledge --quiet + mlcr --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=cm-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=cknowledge --quiet - name: Run docker container locally on linux if: runner.os == 'linux' run: | - mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet + mlcr --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet - name: Run MLPerf Inference Retinanet with native and virtual Python if: runner.os == 'linux'