diff --git a/.github/workflows/build_wheel.yml b/.github/workflows/build_wheel.yml index 85039c7d5..867f93586 100644 --- a/.github/workflows/build_wheel.yml +++ b/.github/workflows/build_wheel.yml @@ -9,7 +9,6 @@ on: - dev paths: - VERSION - - setup.py jobs: build_wheels: diff --git a/.github/workflows/check-broken-links.yml b/.github/workflows/check-broken-links.yml index aaaf83d14..06034c32a 100644 --- a/.github/workflows/check-broken-links.yml +++ b/.github/workflows/check-broken-links.yml @@ -8,10 +8,12 @@ on: jobs: markdown-link-check: runs-on: ubuntu-latest + # check out the latest version of the code steps: - uses: actions/checkout@v4 + # Checks the status of hyperlinks in .md files in verbose mode - name: Check links uses: gaurav-nelson/github-action-markdown-link-check@v1 diff --git a/.github/workflows/test-mlc-script-features.yml b/.github/workflows/test-mlc-script-features.yml index 5e67059a5..898512b7e 100644 --- a/.github/workflows/test-mlc-script-features.yml +++ b/.github/workflows/test-mlc-script-features.yml @@ -35,12 +35,12 @@ jobs: - name: Test Python venv run: | - mlc run script --tags=install,python-venv --name=test --quiet + mlcr --tags=install,python-venv --name=test --quiet mlc search cache --tags=get,python,virtual,name-test --quiet - name: Test variations run: | - mlc run script --tags=get,dataset,preprocessed,imagenet,_NHWC --quiet + mlcr --tags=get,dataset,preprocessed,imagenet,_NHWC --quiet mlc search cache --tags=get,dataset,preprocessed,imagenet,-_NCHW mlc search cache --tags=get,dataset,preprocessed,imagenet,-_NHWC @@ -48,29 +48,31 @@ jobs: continue-on-error: true if: runner.os == 'linux' run: | - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet + mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet test $? -eq 0 || exit $? - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.2 --quiet + mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.2 --quiet test $? -eq 0 || exit $? - mlc run script --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet --only_execute_from_cache=True - test $? -eq 0 || exit 0 + # Need to add find cache here + # mlcr --tags=get,generic-python-lib,_package.scipy --version=1.9.3 --quiet --only_execute_from_cache=True + # test $? -eq 0 || exit 0 - name: Test python install from src run: | - mlc run script --tags=python,src,install,_shared --version=3.9.10 --quiet + mlcr --tags=python,src,install,_shared --version=3.9.10 --quiet mlc search cache --tags=python,src,install,_shared,version-3.9.10 - name: Run docker container from dockerhub on linux if: runner.os == 'linux' run: | - mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=cm-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=cknowledge --quiet + mlcr --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=cm-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=cknowledge --quiet - name: Run docker container locally on linux if: runner.os == 'linux' run: | - mlc run script --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet + mlcr --tags=run,docker,container --adr.compiler.tags=gcc --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --image_name=mlc-script-app-image-classification-onnx-py --env.MLC_DOCKER_RUN_SCRIPT_TAGS=app,image-classification,onnx,python --env.MLC_DOCKER_IMAGE_BASE=ubuntu:22.04 --env.MLC_DOCKER_IMAGE_REPO=local --quiet - name: Run MLPerf Inference Retinanet with native and virtual Python + if: runner.os == 'linux' run: | mlcr --tags=app,mlperf,inference,generic,_cpp,_retinanet,_onnxruntime,_cpu --adr.python.version_min=3.8 --adr.compiler.tags=gcc --adr.openimages-preprocessed.tags=_50 --scenario=Offline --mode=accuracy --test_query_count=10 --rerun --quiet diff --git a/.github/workflows/test-mlperf-inference-abtf-poc.yml b/.github/workflows/test-mlperf-inference-abtf-poc.yml index 026db0b14..507223512 100644 --- a/.github/workflows/test-mlperf-inference-abtf-poc.yml +++ b/.github/workflows/test-mlperf-inference-abtf-poc.yml @@ -18,7 +18,7 @@ jobs: python-version: [ "3.8", "3.12" ] backend: [ "pytorch" ] implementation: [ "python" ] - docker: [ "", " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" ] + docker: [ "", " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" ] extra-args: [ "--adr.compiler.tags=gcc", "--env.MLC_MLPERF_LOADGEN_BUILD_FROM_SRC=off" ] exclude: - os: ubuntu-24.04 @@ -28,16 +28,16 @@ jobs: - os: windows-latest extra-args: "--adr.compiler.tags=gcc" - os: windows-latest - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" # windows docker image is not supported in CM yet - os: macos-latest python-version: "3.8" - os: macos-13 python-version: "3.8" - os: macos-latest - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" - os: macos-13 - docker: " --docker --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --docker_dt" + docker: " --docker --docker_mlc_repo=${{ github.event.pull_request.head.repo.html_url }} --docker_mlc_repo_branch=${{ github.event.pull_request.head.ref }} --docker_dt" steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 0cea2d152..13b4a0288 100644 --- a/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -47,7 +47,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -57,6 +68,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" diff --git a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml index 57be88642..5bbec09b8 100644 --- a/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mlcommons-cpp-resnet50.yml @@ -48,12 +48,30 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=app,mlperf,inference,mlcommons,cpp --submitter="MLCommons" --hw_name=gh_${{ matrix.os }} -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi + - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' + id: ksecrets + uses: Keeper-Security/ksm-action@master + with: + keeper-secret-config: ${{ secrets.KSM_CONFIG }} + secrets: |- + ubwkjh-Ii8UJDpG2EoU6GQ/field/Access Token > env:PAT - name: Push Results - if: github.repository_owner == 'gateoverflow' env: - USER: "GitHub Action" - EMAIL: "admin@gateoverflow.com" - GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ env.PAT }} + USER: mlcommons-bot + EMAIL: mlcommons-bot@users.noreply.github.com + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "${{ env.USER }}" git config --global user.email "${{ env.EMAIL }}" diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 84c16e306..85bcd3cc2 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -58,8 +58,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper - if: github.repository_owner == 'mlcommons' + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -69,7 +79,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} - if: github.repository_owner == 'mlcommons' + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" @@ -78,15 +88,4 @@ jobs: git config --global credential.https://gist.github.com.helper "" git config --global credential.https://gist.github.com.helper "!gh auth git-credential" mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet - - name: Push Results - env: - GITHUB_TOKEN: ${{ secrets.PAT1 }} - if: github.repository_owner == 'gateoverflow' - run: | - git config --global user.name "mlcommons-bot" - git config --global user.email "mlcommons-bot@users.noreply.github.com" - git config --global credential.https://github.com.helper "" - git config --global credential.https://github.com.helper "!gh auth git-credential" - git config --global credential.https://gist.github.com.helper "" - git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index ca7d8c980..373eef9b4 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -52,7 +52,18 @@ jobs: if: matrix.os != 'windows-latest' run: | mlcr --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=retinanet --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --quiet -v --target_qps=1 + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -62,6 +73,7 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | git config --global user.name "mlcommons-bot" git config --global user.email "mlcommons-bot@users.noreply.github.com" diff --git a/.github/workflows/test-mlperf-inference-tvm-resnet50.yml b/.github/workflows/test-mlperf-inference-tvm-resnet50.yml index 9ef9a8e22..044310dcc 100644 --- a/.github/workflows/test-mlperf-inference-tvm-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-tvm-resnet50.yml @@ -34,8 +34,19 @@ jobs: mlcr --quiet --tags=get,sys-utils-cm - name: Test MLC Tutorial TVM run: | - mlcr --tags=run-mlperf,inference,_submission,_short --adr.python.name=mlperf --adr.python.version_min=3.8 --submitter=Community --implementation=python --hw_name=default --model=resnet50 --backend=tvm-onnx --device=cpu --scenario=Offline --mode=accuracy --test_query_count=5 --clean --quiet ${{ matrix.extra-options }} + mlcr --tags=run-mlperf,inference,_submission,_short --adr.python.name=mlperf --adr.python.version_min=3.8 --submitter=MLCommons --implementation=python --hw_name=gh_ubuntu-latest --model=resnet50 --backend=tvm-onnx --device=cpu --scenario=Offline --mode=accuracy --test_query_count=5 --clean --quiet ${{ matrix.extra-options }} + - name: Randomly Execute Step + id: random-check + run: | + RANDOM_NUMBER=$((RANDOM % 10)) + echo "Random number is $RANDOM_NUMBER" + if [ "$RANDOM_NUMBER" -eq 0 ]; then + echo "run_step=true" >> $GITHUB_ENV + else + echo "run_step=false" >> $GITHUB_ENV + fi - name: Retrieve secrets from Keeper + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' id: ksecrets uses: Keeper-Security/ksm-action@master with: @@ -45,9 +56,12 @@ jobs: - name: Push Results env: GITHUB_TOKEN: ${{ env.PAT }} + USER: mlcommons-bot + EMAIL: mlcommons-bot@users.noreply.github.com + if: github.repository_owner == 'mlcommons' && env.run_step == 'true' run: | - git config --global user.name "mlcommons-bot" - git config --global user.email "mlcommons-bot@users.noreply.github.com" + git config --global user.name "${{ env.USER }}" + git config --global user.email "${{ env.EMAIL }}" git config --global credential.https://github.com.helper "" git config --global credential.https://github.com.helper "!gh auth git-credential" git config --global credential.https://gist.github.com.helper "" diff --git a/README.md b/README.md index cd6972d2f..6ee549b1b 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![License](https://img.shields.io/badge/License-Apache%202.0-green)](LICENSE.md) [![Downloads](https://static.pepy.tech/badge/mlcflow)](https://pepy.tech/project/mlcflow) -[![MLC Script Automation Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) +[![MLC script automation features test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml/badge.svg?cache-bust=1)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlc-script-features.yml) [![MLPerf Inference ABTF POC Test](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml/badge.svg)](https://github.com/mlcommons/mlperf-automations/actions/workflows/test-mlperf-inference-abtf-poc.yml) @@ -23,12 +23,9 @@ Starting **January 2025**, MLPerf automation scripts are built on the powerful [ ## 🧰 MLCFlow (MLC) Automations -Building on the foundation of its predecessor, the **Collective Mind (CM)** framework, MLCFlow takes ML workflows to the next level by streamlining complex tasks like Docker container management and caching. The `mlcflow` package, written in Python, provides seamless support through both a command-line interface (CLI) and an API, making it easy to access and manage automation scripts. - -### Core Automations -- **Script Automation** – Automates script execution across different environments. -- **Cache Management** – Manages reusable cached results to accelerate workflow processes. +Building upon the robust foundation of its predecessor, the Collective Mind (CM) framework, MLCFlow elevates machine learning workflows by simplifying complex tasks such as Docker container management and caching. Written in Python, the mlcflow package offers a versatile interface, supporting both a user-friendly command-line interface (CLI) and a flexible API for effortless automation script management. +At its core, MLCFlow relies on a single powerful automation, the Script, which is extended by two actions: CacheAction and DockerAction. Together, these components provide streamlined functionality to optimize and enhance your ML workflow automation experience. --- @@ -40,10 +37,17 @@ We welcome contributions from the community! To contribute: Your contributions help drive the project forward! + +--- + +## 💬 Join the Discussion +Connect with us on the [MLCommons Benchmark Infra Discord channel](https://discord.gg/T9rHVwQFNX) to engage in discussions about **MLCFlow** and **MLPerf Automations**. We’d love to hear your thoughts, questions, and ideas! + --- -## 📰 News -Stay tuned for upcoming updates and announcements. +## 📰 Stay Updated +Keep track of the latest development progress and tasks on our [MLPerf Automations Development Board](https://github.com/orgs/mlcommons/projects/50/views/7?sliceBy%5Bvalue%5D=_noValue). +Stay tuned for exciting updates and announcements! --- diff --git a/automation/script/docker.py b/automation/script/docker.py index d5b179943..9852937c1 100644 --- a/automation/script/docker.py +++ b/automation/script/docker.py @@ -213,12 +213,7 @@ def docker_run(self_module, i): env = i.get('env', {}) regenerate_docker_file = not i.get('docker_noregenerate', False) - recreate_docker_image = i.get('docker_recreate', False) - - if is_true(i.get('docker_skip_build', False)): - regenerate_docker_file = False - recreate_docker_image = False - env['MLC_DOCKER_SKIP_BUILD'] = 'yes' + rebuild_docker_image = i.get('docker_rebuild', False) # Prune unnecessary Docker-related input keys r = prune_input({'input': i, 'extra_keys_starts_with': ['docker_']}) @@ -269,7 +264,10 @@ def docker_run(self_module, i): 'alias', ''), meta.get( 'uid', '') - mounts = copy.deepcopy(i.get('docker_mounts', [])) + mounts = copy.deepcopy( + i.get( + 'docker_mounts', + [])) # do we need a copy here? variations = meta.get('variations', {}) docker_settings = meta.get('docker', {}) state['docker'] = docker_settings @@ -334,11 +332,6 @@ def docker_run(self_module, i): if r['return'] > 0: return r - # Handle environment variable-based mounts - mounts = process_mounts(mounts, env, i, docker_settings) - if mounts is None: - return {'return': 1, 'error': 'Error processing mounts'} - # Prepare Docker-specific inputs docker_inputs, dockerfile_path = prepare_docker_inputs( i, docker_settings, script_path, True) @@ -346,6 +339,25 @@ def docker_run(self_module, i): if docker_inputs is None: return {'return': 1, 'error': 'Error preparing Docker inputs'} + docker_input_mapping = docker_settings.get('input_mapping') + + # Update env based on docker_input_mapping if they are in input + if docker_input_mapping and i: + env.update({docker_input_mapping[key]: i[key] + for key in docker_input_mapping if key in i}) + + # Handle environment variable-based mounts + res = process_mounts(mounts, env, docker_settings, f_run_cmd) + if res['return'] > 0: + return res + docker_inputs['mounts'] = res['mounts'] + container_env_string = res['container_env_string'] + + res = update_docker_environment( + docker_settings, env, container_env_string) + if res['return'] > 0: + return res + # Generate the run command r = regenerate_script_cmd({'script_uid': script_uid, 'script_alias': script_alias, @@ -353,12 +365,12 @@ def docker_run(self_module, i): 'run_cmd': f_run_cmd}) if r['return'] > 0: return r - final_run_cmd = r['run_cmd_string'] + final_run_cmd = f"""{r['run_cmd_string']} {container_env_string} --docker_run_deps """ # Execute the Docker container mlc_docker_input = { 'action': 'run', 'automation': 'script', 'tags': 'run,docker,container', - 'recreate': recreate_docker_image, + 'rebuild': rebuild_docker_image, 'env': env, 'mounts': mounts, 'script_tags': i.get('tags'), 'run_cmd': final_run_cmd, 'v': verbose, 'quiet': True, 'real_run': True, 'add_deps_recursive': {'build-docker-image': {'dockerfile': dockerfile_path}}, diff --git a/automation/script/docker_utils.py b/automation/script/docker_utils.py index 2d70a68f4..6379c515f 100644 --- a/automation/script/docker_utils.py +++ b/automation/script/docker_utils.py @@ -7,7 +7,7 @@ import copy -def process_mounts(mounts, env, i, docker_settings): +def process_mounts(mounts, env, docker_settings, f_run_cmd): """ Processes and updates the Docker mounts based on the provided inputs and environment variables. @@ -20,21 +20,71 @@ def process_mounts(mounts, env, i, docker_settings): Returns: Updated mounts list or None in case of an error. """ - try: - # Add mounts specified via `env` variables - for mount_key in docker_settings.get('env_mounts', []): - mount_path = env.get(mount_key, '') - if mount_path: - mounts.append(mount_path) - - # Include user-specified additional mounts - if 'docker_additional_mounts' in i: - mounts.extend(i['docker_additional_mounts']) - - return mounts - except Exception as e: - logging.error(f"Error processing mounts: {e}") - return None + if 'mounts' in docker_settings: + mounts.extend(docker_settings['mounts']) + + docker_input_mapping = docker_settings.get("input_mapping", {}) + container_env_string = "" + + for index in range(len(mounts)): + mount = mounts[index] + + # Locate the last ':' to separate the mount into host and container + # paths + j = mount.rfind(':') + if j <= 0: + return { + 'return': 1, + 'error': f"Can't find separator ':' in the mount string: {mount}" + } + + host_mount, container_mount = mount[:j], mount[j + 1:] + new_host_mount = host_mount + new_container_mount = container_mount + host_env_key, container_env_key = None, str(container_mount) + + # Process host mount for environment variables + host_placeholders = re.findall(r'\${{ (.*?) }}', host_mount) + if host_placeholders: + for placeholder in host_placeholders: + if placeholder in env: + host_env_key = placeholder + new_host_mount = get_host_path(env[placeholder]) + else: # Skip mount if variable is missing + mounts[index] = None + break + + # Process container mount for environment variables + container_placeholders = re.findall(r'\${{ (.*?) }}', container_mount) + if container_placeholders: + for placeholder in container_placeholders: + if placeholder in env: + new_container_mount, container_env_key = get_container_path( + env[placeholder]) + else: # Skip mount if variable is missing + mounts[index] = None + break + + # Skip further processing if the mount was invalid + if mounts[index] is None: + continue + + # Update mount entry + mounts[index] = f"{new_host_mount}:{new_container_mount}" + + # Update container environment string and mappings + if host_env_key: + container_env_string += f" --env.{host_env_key}={container_env_key} " + for key, value in docker_input_mapping.items(): + if value == host_env_key: + i[key] = container_env_key + f_run_cmd[key] = container_env_key + + # Remove invalid mounts and construct mount string + mounts = [item for item in mounts if item is not None] + + return {'return': 0, 'mounts': mounts, + 'container_env_string': container_env_string} def prepare_docker_inputs(input_params, docker_settings, @@ -54,14 +104,14 @@ def prepare_docker_inputs(input_params, docker_settings, keys = [ "mlc_repo", "mlc_repo_branch", "base_image", "os", "os_version", "mlc_repos", "skip_mlc_sys_upgrade", "extra_sys_deps", - "gh_token", "fake_run_deps", "run_final_cmds", "real_run", "copy_files", "path" + "gh_token", "fake_run_deps", "run_final_cmds", "real_run", "copy_files", "path", "user" ] if run_stage: keys += [ "skip_run_cmd", "pre_run_cmds", "run_cmd_prefix", "all_gpus", "num_gpus", "device", "gh_token", "port_maps", "shm_size", "pass_user_id", "pass_user_group", "extra_run_args", "detached", "interactive", - "dt", "it" + "dt", "it", "use_host_group_id", "use_host_user_id" ] # Collect Dockerfile inputs docker_inputs = { @@ -102,7 +152,57 @@ def prepare_docker_inputs(input_params, docker_settings, return docker_inputs, dockerfile_path -def update_docker_paths(path, mounts=None, force_target_path=''): +def update_docker_environment(docker_settings, env, container_env_string): + """ + Updates the Docker environment variables and build arguments. + + Args: + docker_settings (dict): Docker configuration settings. + env (dict): The environment dictionary to update. + container_env_string (str): A string to store Docker container environment variable options. + + Returns: + dict: A dictionary with a return code indicating success or failure. + """ + # Define proxy-related environment variable keys to propagate + proxy_keys = [ + "ftp_proxy", "FTP_PROXY", + "http_proxy", "HTTP_PROXY", + "https_proxy", "HTTPS_PROXY", + "no_proxy", "NO_PROXY", + "socks_proxy", "SOCKS_PROXY", + "GH_TOKEN" + ] + + # Ensure the '+ CM_DOCKER_BUILD_ARGS' key exists in the environment + if '+ MLC_DOCKER_BUILD_ARGS' not in env: + env['+ MLC_DOCKER_BUILD_ARGS'] = [] + + # Add proxy environment variables to Docker build arguments and container + # environment string + for proxy_key in proxy_keys: + proxy_value = os.environ.get(proxy_key) + if proxy_value: + container_env_string += f" --env.{proxy_key}={proxy_value} " + env['+ MLC_DOCKER_BUILD_ARGS'].append(f"{proxy_key}={proxy_value}") + + # Add host group ID if specified in the Docker settings and not on Windows + if not is_false(docker_settings.get('pass_group_id')) and os.name != 'nt': + env['+ MLC_DOCKER_BUILD_ARGS'].append( + f"GID=\\\" $(id -g $USER) \\\"" + ) + + # Add host user ID if specified in the Docker settings and not on Windows + if not is_false(docker_settings.get( + 'use_host_user_id')) and os.name != 'nt': + env['+ MLC_DOCKER_BUILD_ARGS'].append( + f"UID=\\\" $(id -u $USER) \\\"" + ) + + return {'return': 0} + + +def update_container_paths(path, mounts=None, force_target_path=''): """ Update and return the absolute paths for a given host path and its container equivalent. Optionally updates a mounts list with the mapping of host and container paths. @@ -275,7 +375,9 @@ def get_docker_default(key): "skip_run_cmd": False, "pre_run_cmds": [], "run_cmd_prefix": '', - "port_maps": [] + "port_maps": [], + "use_host_user_id": True, + "use_host_group_id": True, } if key in defaults: return defaults[key] @@ -317,5 +419,5 @@ def get_container_path(value): new_path_split2 = new_path_split + path_split[repo_entry_index:] return "/".join(new_path_split1), "/".join(new_path_split2) else: - orig_path, target_path = update_path_for_docker(path=value) + orig_path, target_path = update_container_paths(path=value) return target_path, target_path diff --git a/automation/script/module.py b/automation/script/module.py index 6728d5136..868178f49 100644 --- a/automation/script/module.py +++ b/automation/script/module.py @@ -823,10 +823,6 @@ def _run(self, i): posthook_deps = meta.get('posthook_deps', []) input_mapping = meta.get('input_mapping', {}) docker_settings = meta.get('docker') - docker_input_mapping = {} - if docker_settings: - docker_input_mapping = docker_settings.get( - 'docker_input_mapping', {}) new_env_keys_from_meta = meta.get('new_env_keys', []) new_state_keys_from_meta = meta.get('new_state_keys', []) @@ -4954,8 +4950,8 @@ def find_cached_script(i): # TODO Need to restrict the below check to within container # env i['tmp_dep_cached_path'] = dependent_cached_path - import script.docker_utils - r = docker_utils.utils.get_container_path_script(i) + from script import docker_utils + r = docker_utils.get_container_path_script(i) if not os.path.exists(r['value_env']): # Need to rm this cache entry skip_cached_script = True @@ -6058,11 +6054,6 @@ def update_state_from_meta(meta, env, state, const, const_state, deps, post_deps new_docker_settings = meta.get('docker') if new_docker_settings: docker_settings = state.get('docker', {}) - # docker_input_mapping = docker_settings.get('docker_input_mapping', {}) - # new_docker_input_mapping = new_docker_settings.get('docker_input_mapping', {}) - # if new_docker_input_mapping: - # # update_env_from_input_mapping(env, i['input'], docker_input_mapping) - # utils.merge_dicts({'dict1':docker_input_mapping, 'dict2':new_docker_input_mapping, 'append_lists':True, 'append_unique':True}) utils.merge_dicts({'dict1': docker_settings, 'dict2': new_docker_settings, 'append_lists': True, diff --git a/script/app-mlperf-inference-nvidia/meta.yaml b/script/app-mlperf-inference-nvidia/meta.yaml index 473c336c6..eac133277 100644 --- a/script/app-mlperf-inference-nvidia/meta.yaml +++ b/script/app-mlperf-inference-nvidia/meta.yaml @@ -281,15 +281,14 @@ deps: enable_if_env: MLC_RUN_STATE_DOCKER: - 'yes' - - True - - 'True' - tags: get,nvidia,mitten skip_if_env: MLC_RUN_STATE_DOCKER: - 'yes' - - True - - 'True' + enable_if_env: + MLC_NVIDIA_MITTEN_FROM_SRC: + - 'yes' prehook_deps: ######################################################################## @@ -446,7 +445,7 @@ variations: group: model env: MLC_MODEL: stable-diffusion-xl - MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/cm4mlops/blob/main/script/get-ml-model-stable-diffusion/_cm.json#L174" + MLC_ML_MODEL_STARTING_WEIGHTS_FILENAME: "https://github.com/mlcommons/mlperf-automations/blob/main/script/get-ml-model-stable-diffusion/meta.yaml" MLC_ML_MODEL_WEIGHT_TRANSFORMATIONS: "quantization, affine fusion" MLC_ML_MODEL_INPUTS_DATA_TYPE: int32 MLC_ML_MODEL_WEIGHTS_DATA_TYPE: int8 diff --git a/script/app-mlperf-inference/meta.yaml b/script/app-mlperf-inference/meta.yaml index 6732b193a..a8381c323 100644 --- a/script/app-mlperf-inference/meta.yaml +++ b/script/app-mlperf-inference/meta.yaml @@ -519,7 +519,7 @@ variations: os: ubuntu real_run: false run: true - docker_input_mapping: + input_mapping: criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH dlrm_data_path: DLRM_DATA_PATH intel_gptj_int8_model_path: MLC_MLPERF_INFERENCE_INTEL_GPTJ_INT8_MODEL_PATH @@ -1671,6 +1671,8 @@ variations: tags: _ctuning intel-harness: tags: _v3.1 + nvidia-scratch-space: + tags: _version.4_0-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1709,6 +1711,8 @@ variations: tags: _mlcommons intel-harness: tags: _v4.0 + nvidia-scratch-space: + tags: _version.4_1-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1725,6 +1729,8 @@ variations: tags: _go intel-harness: tags: _v4.1 + nvidia-scratch-space: + tags: _version.4_1 default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1746,6 +1752,8 @@ variations: tags: _v4.1 inference-src: version: r5.0 + nvidia-scratch-space: + tags: _version.5.0-dev default_env: MLC_SKIP_SYS_UTILS: 'yes' MLC_REGENERATE_MEASURE_FILES: 'yes' @@ -1901,11 +1909,12 @@ docker: interactive: True extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined' os: ubuntu + user: mlcuser mlc_repo: mlcommons@mlperf-automations mlc_repo_branch: dev real_run: False os_version: '22.04' - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH diff --git a/script/build-docker-image/customize.py b/script/build-docker-image/customize.py index be7c33035..acf660261 100644 --- a/script/build-docker-image/customize.py +++ b/script/build-docker-image/customize.py @@ -1,6 +1,7 @@ from mlc import utils import os from os.path import exists +from utils import * def preprocess(i): @@ -50,7 +51,7 @@ def preprocess(i): if env.get("MLC_DOCKER_IMAGE_TAG", "") == '': env['MLC_DOCKER_IMAGE_TAG'] = "latest" - if str(env.get("MLC_DOCKER_CACHE", "yes")).lower() in ["no", "false", "0"]: + if is_false(env.get("MLC_DOCKER_CACHE", "True")): env["MLC_DOCKER_CACHE_ARG"] = " --no-cache" CMD = '' @@ -82,13 +83,8 @@ def preprocess(i): CMD = ''.join(XCMD) - print('================================================') - print('CM generated the following Docker build command:') - print('') print(CMD) - print('') - env['MLC_DOCKER_BUILD_CMD'] = CMD return {'return': 0} @@ -108,7 +104,7 @@ def postprocess(i): env = i['env'] # Check if need to push docker image to the Docker Hub - if env.get('MLC_DOCKER_PUSH_IMAGE', '') in ['True', True, 'yes']: + if is_true(env.get('MLC_DOCKER_PUSH_IMAGE', '')): image_name = get_image_name(env) # Prepare CMD to build image @@ -122,9 +118,6 @@ def postprocess(i): with open(dockerfile_path + '.build.bat', 'w') as f: f.write(PCMD + '\n') - print('================================================') - print('CM generated the following Docker push command:') - print('') print(PCMD) print('') diff --git a/script/build-dockerfile/customize.py b/script/build-dockerfile/customize.py index 23e7891dc..50a4c987c 100644 --- a/script/build-dockerfile/customize.py +++ b/script/build-dockerfile/customize.py @@ -264,7 +264,7 @@ def preprocess(i): DOCKER_GROUP = "-g $GID -o" user_shell = json.loads(shell) - f.write(f"""RUN id -u {docker_user} > /dev/null 2>&1 || useradd """ + DOCKER_USER_ID + DOCKER_GROUP + ' --create-home --shell ' + user_shell[0] + ' ' + f.write(f"""RUN (id -u {docker_user} > /dev/null 2>&1 && usermod -u $UID {docker_user}) || useradd """ + DOCKER_USER_ID + DOCKER_GROUP + ' --create-home --shell ' + user_shell[0] + ' ' + docker_user + EOL) f.write( 'RUN echo "' + diff --git a/script/build-dockerfile/meta.yaml b/script/build-dockerfile/meta.yaml index 0c85f1606..68ea20689 100644 --- a/script/build-dockerfile/meta.yaml +++ b/script/build-dockerfile/meta.yaml @@ -56,6 +56,7 @@ input_mapping: skip_mlc_sys_upgrade: MLC_DOCKER_SKIP_MLC_SYS_UPGRADE push_image: MLC_DOCKER_PUSH_IMAGE docker_not_pull_update: MLC_DOCKER_NOT_PULL_UPDATE + user: MLC_DOCKER_USER new_env_keys: - MLC_DOCKERFILE_* diff --git a/script/build-mlperf-inference-server-nvidia/meta.yaml b/script/build-mlperf-inference-server-nvidia/meta.yaml index b5ca1340b..3530c9482 100644 --- a/script/build-mlperf-inference-server-nvidia/meta.yaml +++ b/script/build-mlperf-inference-server-nvidia/meta.yaml @@ -191,8 +191,6 @@ variations: add_deps_recursive: nvidia-inference-common-code: version: r4.0 - nvidia-scratch-space: - tags: _version.4_1 deps: - tags: get,generic,sys-util,_git-lfs - tags: install,pytorch,from.src,_for-nvidia-mlperf-inference-v4.0 @@ -331,7 +329,7 @@ docker: interactive: True os_version: '20.04' base_image: nvcr.io/nvidia/mlperf/mlperf-inference:mlpinf-v3.1-cuda12.2-cudnn8.9-x86_64-ubuntu20.04-l4-public - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH @@ -344,6 +342,8 @@ docker: scratch_path: MLPERF_SCRATCH_PATH deps: - tags: get,mlperf,inference,nvidia,scratch,space + names: + - nvidia-scratch-space - tags: get,mlperf,inference,results,dir,local - tags: get,mlperf,inference,submission,dir,local - tags: get,nvidia-docker diff --git a/script/convert-csv-to-md/meta.yaml b/script/convert-csv-to-md/meta.yaml index e1ed6f82e..74413ee56 100644 --- a/script/convert-csv-to-md/meta.yaml +++ b/script/convert-csv-to-md/meta.yaml @@ -14,7 +14,6 @@ deps: - names: - tabulate tags: get,generic-python-lib,_package.tabulate -docker_input_mapping: {} input_description: {} input_mapping: csv_file: MLC_CSV_FILE diff --git a/script/generate-mlperf-inference-submission/meta.yaml b/script/generate-mlperf-inference-submission/meta.yaml index 51f45bba7..e36381834 100644 --- a/script/generate-mlperf-inference-submission/meta.yaml +++ b/script/generate-mlperf-inference-submission/meta.yaml @@ -45,7 +45,7 @@ docker: MLC_MLPERF_INFERENCE_SUBMISSION_BASE_DIR: - 'on' tags: get,mlperf,inference,submission,dir,local - docker_input_mapping: + input_mapping: results_dir: MLC_MLPERF_INFERENCE_RESULTS_DIR_ submission_base_dir: MLC_MLPERF_INFERENCE_SUBMISSION_BASE_DIR extra_run_args: ' --cap-add SYS_ADMIN' diff --git a/script/get-docker/meta.yaml b/script/get-docker/meta.yaml index b3a5f1f89..e26954685 100644 --- a/script/get-docker/meta.yaml +++ b/script/get-docker/meta.yaml @@ -5,7 +5,6 @@ cache: true category: Detection or installation of tools and artifacts deps: - tags: detect,os -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [ diff --git a/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml b/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml index 826db4016..fbdd96c18 100644 --- a/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml +++ b/script/get-mlperf-inference-nvidia-scratch-space/meta.yaml @@ -39,8 +39,12 @@ variations: MLC_NVIDIA_SCRATCH_SPACE_VERSION: '4_1' group: version version.4_1-dev: - default: true env: MLC_NVIDIA_SCRATCH_SPACE_VERSION: 4_1-dev group: version + version.5_0-dev: + default: true + env: + MLC_NVIDIA_SCRATCH_SPACE_VERSION: 5_0-dev + group: version versions: {} diff --git a/script/get-nvidia-docker/meta.yaml b/script/get-nvidia-docker/meta.yaml index 303124799..39cf8d146 100644 --- a/script/get-nvidia-docker/meta.yaml +++ b/script/get-nvidia-docker/meta.yaml @@ -6,7 +6,6 @@ category: Detection or installation of tools and artifacts deps: - tags: detect,os - tags: get,docker -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: [] diff --git a/script/install-mlperf-logging-from-src/meta.yaml b/script/install-mlperf-logging-from-src/meta.yaml index a41c3de26..bf74707b1 100644 --- a/script/install-mlperf-logging-from-src/meta.yaml +++ b/script/install-mlperf-logging-from-src/meta.yaml @@ -12,8 +12,6 @@ deps: extra_cache_tags: mlperf_logging env: MLC_GIT_CHECKOUT_PATH_ENV_NAME: MLC_MLPERF_LOGGING_REPO_PATH -docker_input_mapping: -input_description: new_env_keys: - MLC_MLPERF_LOGGING_REPO_PATH new_state_keys: [] diff --git a/script/push-mlperf-inference-results-to-github/run.bat b/script/push-mlperf-inference-results-to-github/run.bat index 385235737..923218f9e 100644 --- a/script/push-mlperf-inference-results-to-github/run.bat +++ b/script/push-mlperf-inference-results-to-github/run.bat @@ -28,8 +28,12 @@ git commit -a -m "%MLC_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" if defined MLC_MLPERF_INFERENCE_SUBMISSION_DIR call %MLC_SET_REMOTE_URL_CMD% -echo "%MLC_GIT_PUSH_CMD%" -%MLC_GIT_PUSH_CMD% +@if errorlevel 1 ( + timeout /t %random:~0,3% /nobreak > nul + git pull --rebase + %MLC_GIT_PUSH_CMD% +) + REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/script/push-mlperf-inference-results-to-github/run.sh b/script/push-mlperf-inference-results-to-github/run.sh index a2d07b80a..53a297cf9 100644 --- a/script/push-mlperf-inference-results-to-github/run.sh +++ b/script/push-mlperf-inference-results-to-github/run.sh @@ -21,6 +21,6 @@ git commit -a -m "${MLC_MLPERF_RESULTS_REPO_COMMIT_MESSAGE}" echo ${MLC_GIT_PUSH_CMD} ${MLC_GIT_PUSH_CMD} -test $? -eq 0 || (sleep $((RANDOM % 200 + 1)) && git pull && ${MLC_GIT_PUSH_CMD}) +test $? -eq 0 || (sleep $((RANDOM % 200 + 1)) && git pull --rebase && ${MLC_GIT_PUSH_CMD}) test $? -eq 0 || exit $? diff --git a/script/run-docker-container/meta.yaml b/script/run-docker-container/meta.yaml index e18855026..f6f3d19f0 100644 --- a/script/run-docker-container/meta.yaml +++ b/script/run-docker-container/meta.yaml @@ -52,6 +52,7 @@ input_mapping: pre_run_cmds: MLC_DOCKER_PRE_RUN_COMMANDS real_run: MLC_REAL_RUN recreate: MLC_DOCKER_IMAGE_RECREATE + rebuild: MLC_DOCKER_IMAGE_RECREATE run_cmd: MLC_DOCKER_RUN_CMD run_cmd_extra: MLC_DOCKER_RUN_CMD_EXTRA save_script: MLC_DOCKER_SAVE_SCRIPT diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index c2f64bb3f..9dc4408d6 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -174,7 +174,7 @@ docker_off: real_run: false run: true interactive: true - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH gptj_checkpoint_path: GPTJ_CHECKPOINT_PATH criteo_preprocessed_path: CRITEO_PREPROCESSED_PATH diff --git a/script/run-mlperf-inference-mobilenet-models/meta.yaml b/script/run-mlperf-inference-mobilenet-models/meta.yaml index b87bf4896..df19cbbfd 100644 --- a/script/run-mlperf-inference-mobilenet-models/meta.yaml +++ b/script/run-mlperf-inference-mobilenet-models/meta.yaml @@ -11,7 +11,7 @@ default_env: deps: - tags: get,sys-utils-cm docker: - docker_input_mapping: + input_mapping: imagenet_path: IMAGENET_PATH results_dir: RESULTS_DIR submission_dir: SUBMISSION_DIR diff --git a/script/set-device-settings-qaic/meta.yaml b/script/set-device-settings-qaic/meta.yaml index 1599f0067..86caaad46 100644 --- a/script/set-device-settings-qaic/meta.yaml +++ b/script/set-device-settings-qaic/meta.yaml @@ -8,7 +8,6 @@ default_env: deps: - tags: detect-os - tags: get,qaic,platform,sdk -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: diff --git a/script/set-performance-mode/meta.yaml b/script/set-performance-mode/meta.yaml index 9d954ce60..0ead7bc75 100644 --- a/script/set-performance-mode/meta.yaml +++ b/script/set-performance-mode/meta.yaml @@ -6,7 +6,6 @@ category: DevOps automation deps: - tags: detect-os - tags: detect-cpu -docker_input_mapping: {} input_description: {} input_mapping: {} new_env_keys: