diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml index e1490bc0f..0f5384d7b 100644 --- a/.github/workflows/format.yml +++ b/.github/workflows/format.yml @@ -53,10 +53,10 @@ jobs: run: | HAS_CHANGES=$(git diff --staged --name-only) if [ ${#HAS_CHANGES} -gt 0 ]; then - git config --global user.name mlcommons-bot - git config --global user.email "mlcommons-bot@users.noreply.github.com" + # Use the GitHub actor's name and email + git config --global user.name "${GITHUB_ACTOR}" + git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" # Commit changes git commit -m '[Automated Commit] Format Codebase' - # Use the PAT to push changes git push fi diff --git a/.github/workflows/test-mlperf-inference-dlrm.yml b/.github/workflows/test-mlperf-inference-dlrm.yml index 3ed51759d..f18b51b4d 100644 --- a/.github/workflows/test-mlperf-inference-dlrm.yml +++ b/.github/workflows/test-mlperf-inference-dlrm.yml @@ -25,7 +25,7 @@ jobs: export CM_REPOS=$HOME/GH_CM python3 -m pip install cm4mlops cm pull repo - cm run script --tags=run-mlperf,inference,_performance-only --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean + cm run script --tags=run-mlperf,inference,_performance-only --pull_changes=yes --pull_inference_changes=yes --submitter="MLCommons" --model=dlrm-v2-99 --implementation=reference --backend=pytorch --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=1 --docker_it=no --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --clean build_intel: if: github.repository_owner == 'gateoverflow_off' diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 6a1152893..bf2921bd2 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -27,5 +27,5 @@ jobs: python3 -m pip install cm4mlops cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --pull_changes=yes --pull_inference_changes=yes --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-llama2.yml b/.github/workflows/test-mlperf-inference-llama2.yml index 184940330..986ee21be 100644 --- a/.github/workflows/test-mlperf-inference-llama2.yml +++ b/.github/workflows/test-mlperf-inference-llama2.yml @@ -32,4 +32,4 @@ jobs: git config --global credential.helper store huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=llama2-70b-99 --implementation=reference --backend=${{ matrix.backend }} --precision=${{ matrix.precision }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker --quiet --test_query_count=1 --target_qps=0.001 --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.CM_MLPERF_MODEL_LLAMA2_70B_DOWNLOAD_TO_HOST=yes --clean - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-mixtral.yml b/.github/workflows/test-mlperf-inference-mixtral.yml index 6687ff048..c234d464e 100644 --- a/.github/workflows/test-mlperf-inference-mixtral.yml +++ b/.github/workflows/test-mlperf-inference-mixtral.yml @@ -32,4 +32,4 @@ jobs: huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential cm pull repo cm run script --tags=run-mlperf,inference,_submission,_short --adr.inference-src.tags=_branch.dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1 - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions + cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index 9bcc53e8f..9700e4b79 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -1,11 +1,10 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# Run MLPerf inference ResNet50 name: MLPerf inference ResNet50 on: pull_request_target: - branches: [ "main", "dev", "mlperf-inference" ] + branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-resnet50.yml' - '**' @@ -39,10 +38,20 @@ jobs: if: matrix.os == 'windows-latest' run: | git config --system core.longpaths true - - name: Install dependencies + + - name: Install cm4mlops on Windows + if: matrix.os == 'windows-latest' + run: | + $env:CM_PULL_DEFAULT_MLOPS_REPO = "no"; pip install cm4mlops + - name: Install dependencies on Unix Platforms + if: matrix.os != 'windows-latest' + run: | + CM_PULL_DEFAULT_MLOPS_REPO=no pip install cm4mlops + + - name: Pull MLOps repo run: | - pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Test MLPerf Inference ResNet50 (Windows) if: matrix.os == 'windows-latest' run: | @@ -51,17 +60,19 @@ jobs: if: matrix.os != 'windows-latest' run: | cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --hw_name=gh_${{ matrix.os }}_x86 --model=resnet50 --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --target_qps=1 -v --quiet + - name: Retrieve secrets from Keeper + id: ksecrets + uses: Keeper-Security/ksm-action@master + with: + keeper-secret-config: ${{ secrets.KSM_CONFIG }} + secrets: |- + ubwkjh-Ii8UJDpG2EoU6GQ/field/Access Token > env:PAT # Fetch PAT and store in environment variable + - name: Push Results - if: github.repository_owner == 'gateoverflow' + if: github.repository_owner == 'mlcommons' env: - USER: "GitHub Action" - EMAIL: "admin@gateoverflow.com" - GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ env.PAT }} run: | - git config --global user.name "${{ env.USER }}" - git config --global user.email "${{ env.EMAIL }}" - git config --global credential.https://github.com.helper "" - git config --global credential.https://github.com.helper "!gh auth git-credential" - git config --global credential.https://gist.github.com.helper "" - git config --global credential.https://gist.github.com.helper "!gh auth git-credential" - cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet + git config --global user.name mlcommons-bot + git config --global user.email "mlcommons-bot@users.noreply.github.com" + cm run script --tags=push,github,mlperf,inference,submission --env.CM_GITHUB_PAT=${{ env.PAT }} --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from R50 GH action on ${{ matrix.os }}" --quiet diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index 3df5ea51b..182f04321 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -1,11 +1,10 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions +# Run MLPerf inference Retinanet name: MLPerf inference retinanet on: pull_request_target: - branches: [ "main", "dev", "mlperf-inference" ] + branches: [ "main", "dev" ] paths: - '.github/workflows/test-mlperf-inference-retinanet.yml' - '**' @@ -39,10 +38,18 @@ jobs: if: matrix.os == 'windows-latest' run: | git config --system core.longpaths true - - name: Install dependencies + - name: Install cm4mlops on Windows + if: matrix.os == 'windows-latest' + run: | + $env:CM_PULL_DEFAULT_MLOPS_REPO = "no"; pip install cm4mlops + - name: Install dependencies on Unix Platforms + if: matrix.os != 'windows-latest' + run: | + CM_PULL_DEFAULT_MLOPS_REPO=no pip install cm4mlops + - name: Pull MLOps repo run: | - python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm" cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + - name: Test MLPerf Inference Retinanet using ${{ matrix.backend }} on ${{ matrix.os }} if: matrix.os == 'windows-latest' run: | diff --git a/HISTORY.md b/HISTORY.md new file mode 100644 index 000000000..f9e62e1e8 --- /dev/null +++ b/HISTORY.md @@ -0,0 +1,72 @@ +## Timeline of CM developments + +### **🚀 2022: Foundation and Early Developments** + +- **March 2022:** Grigori Fursin began developing **CM (Collective Mind)**, also referred to as **CK2**, as a successor to CK [at OctoML](https://github.com/octoml/ck/commits/master/?since=2022-03-01&until=2022-03-31). +- **April 2022:** **Arjun Suresh** joined OctoML and collaborated with Grigori on developing **CM Automation** tools. +- **May 2022:** The **CM CLI** and **Python interface** were successfully [implemented and stabilized](https://github.com/octoml/ck/commits/master/?since=2022-04-01&until=2022-05-31) by Grigori. + +--- + +### **🛠️ July–September 2022: MLPerf Integration and First Submission** + +- Arjun completed the development of the **MLPerf Inference Script** within CM. +- OctoML achieved **first MLPerf Inference submission (v2.1)** using **CM Automation** ([progress here](https://github.com/octoml/ck/commits/master/?since=2022-06-01&until=2022-09-30)). + +--- + +### **📊 October 2022 – March 2023: End-to-End Automation** + +- End-to-end MLPerf inference automations using CM was successfully [completed in CM](https://github.com/octoml/ck/commits/master/?since=2022-10-01&until=2023-03-31). +- **Additional benchmarks** and **Power Measurement support** were integrated into CM. +- **cTuning** achieved a successful MLPerf Inference **v3.0 submission** using CM Automation. + +--- + +### **🔄 April 2023: Transition and New Funding** + +- Arjun and Grigori departed OctoML and resumed **CM development** under funding from **cKnowledge.org** and **cTuning**. + +--- + +### **🚀 April–October 2023: Expanded Support and Milestone Submission** + +- MLPerf inference automations were [extended](https://github.com/mlcommons/ck/commits/master?since=2023-04-01&until=2023-10-31) to support **NVIDIA implementations**. +- **cTuning** achieved the **largest-ever MLPerf Inference submission (v3.1)** using CM Automation. + +--- + +### **🤝 November 2023: MLCommons Partnership** + +- **MLCommons** began funding CM development to enhance support for **NVIDIA MLPerf inference** and introduce support for **Intel** and **Qualcomm MLPerf inference** implementations. + +--- + +### **🌐 October 2023 – March 2024: Multi-Platform Expansion** + +- MLPerf inference automations were [expanded](https://github.com/mlcommons/ck/commits/master?since=2023-10-01&until=2024-03-15) to support **NVIDIA, Intel, and Qualcomm implementations**. +- **cTuning** completed the **MLPerf Inference v4.0 submission** using CM Automation. + +--- + +### **📝 April 2024: Documentation Improvements** + +- MLCommons contracted **Arjun Suresh** via **GATEOverflow** to improve **MLPerf inference documentation** and enhance CM Automation on various platforms. + +--- + +### **👥 May 2024: Team Expansion** + +- **Anandhu Sooraj** joined MLCommons to collaborate with **Arjun Suresh** on CM development. + +--- + +### **📖 June–December 2024: Enhanced Documentation and Automation** + +- **Dedicated documentation site** launched for **MLPerf inference**. +- **CM scripts** were developed for **MLPerf Automotive**. +- **CM Docker support** was stabilized. +- **GitHub Actions workflows** were added for **MLPerf inference reference implementations** and **NVIDIA integrations** ([see updates](https://github.com/mlcommons/mlperf-automations/commits/main?since=2024-06-01&until=2024-12-31)). + +--- + diff --git a/VERSION b/VERSION index 724e8d94e..4528c7c55 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.6.18 +0.6.19 diff --git a/git_commit_hash.txt b/git_commit_hash.txt index b21cfb83e..0ed8ff909 100644 --- a/git_commit_hash.txt +++ b/git_commit_hash.txt @@ -1 +1 @@ -76796b4c3966b04011c3cb6118412516c90ba50b +81816f94c4a396a012412cb3a1cf4096b4ad103e diff --git a/script/draw-graph-from-json-data/_cm.yaml b/script/draw-graph-from-json-data/_cm.yaml index 4cea12c42..eb1d1a157 100644 --- a/script/draw-graph-from-json-data/_cm.yaml +++ b/script/draw-graph-from-json-data/_cm.yaml @@ -19,3 +19,4 @@ deps: - python3 - tags: get,generic-python-lib,_package.networkx - tags: get,generic-python-lib,_package.matplotlib + - tags: get,generic-python-lib,_package.typing_extensions diff --git a/script/push-mlperf-inference-results-to-github/customize.py b/script/push-mlperf-inference-results-to-github/customize.py index f1cfe1eba..3fab7359d 100644 --- a/script/push-mlperf-inference-results-to-github/customize.py +++ b/script/push-mlperf-inference-results-to-github/customize.py @@ -1,6 +1,7 @@ from cmind import utils import cmind as cm import os +from giturlparse import parse def preprocess(i): @@ -32,6 +33,11 @@ def preprocess(i): env['CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE'] = env.get( 'CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE', 'Added new results') + p = parse(repo) + if env.get('CM_GITHUB_PAT', '') != '': + token = env['CM_GITHUB_PAT'] + env['CM_SET_REMOTE_URL_CMD'] = f"""git remote set-url origin https://git:{token}@{p.host}/{p.owner}/{p.repo}""" + return {'return': 0} diff --git a/script/push-mlperf-inference-results-to-github/run.bat b/script/push-mlperf-inference-results-to-github/run.bat index 2052eb564..0784e055e 100644 --- a/script/push-mlperf-inference-results-to-github/run.bat +++ b/script/push-mlperf-inference-results-to-github/run.bat @@ -25,6 +25,9 @@ REM Check if the previous command was successful if %errorlevel% neq 0 exit /b %errorlevel% git commit -a -m "%CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE%" + +if defined CM_MLPERF_INFERENCE_SUBMISSION_DIR call %CM_SET_REMOTE_URL_CMD% + git push REM Check if the previous command was successful diff --git a/script/push-mlperf-inference-results-to-github/run.sh b/script/push-mlperf-inference-results-to-github/run.sh index 1eb4f663e..7fb95eb3d 100644 --- a/script/push-mlperf-inference-results-to-github/run.sh +++ b/script/push-mlperf-inference-results-to-github/run.sh @@ -16,5 +16,10 @@ fi test $? -eq 0 || exit $? git commit -a -m "${CM_MLPERF_RESULTS_REPO_COMMIT_MESSAGE}" + +if [[ -n ${CM_SET_REMOTE_URL_CMD} ]]; then + ${CM_SET_REMOTE_URL_CMD} +fi + git push test $? -eq 0 || exit $? diff --git a/script/run-docker-container/customize.py b/script/run-docker-container/customize.py index 9bcb13037..d9518f516 100644 --- a/script/run-docker-container/customize.py +++ b/script/run-docker-container/customize.py @@ -71,7 +71,20 @@ def preprocess(i): if len(out) > 0 and str(env.get('CM_DOCKER_REUSE_EXISTING_CONTAINER', '')).lower() in ["1", "true", "yes"]: # container exists - out_json = json.loads(out) + # print(out) + out_split = out.splitlines() + if len(out_split) > 0: + try: + out_json = json.loads(out_split[0]) + # print("JSON successfully loaded:", out_json) + except json.JSONDecodeError as e: + print(f"Error: First line of 'out' is not valid JSON: {e}") + return { + 'return': 1, 'error': f"Error: First line of 'out' is not valid JSON: {e}"} + else: + out_json = [] + + if isinstance(out_json, list) and len(out_json) > 0: existing_container_id = out_json[0]['Id'] print(f"Reusing existing container {existing_container_id}") env['CM_DOCKER_CONTAINER_ID'] = existing_container_id diff --git a/setup.py b/setup.py index 8cc2aec00..0e040665e 100644 --- a/setup.py +++ b/setup.py @@ -146,14 +146,18 @@ def custom_function(self): 'force': True, 'all': True}) branch = os.environ.get('CM_MLOPS_REPO_BRANCH', 'dev') - r = cmind.access({'action': 'pull', - 'automation': 'repo', - 'artifact': 'mlcommons@mlperf-automations', - 'checkout': commit_hash, - 'branch': branch}) - print(r) - if r['return'] > 0: - return r['return'] + pull_default_mlops_repo = os.environ.get( + 'CM_PULL_DEFAULT_MLOPS_REPO', 'true') + + if str(pull_default_mlops_repo).lower() not in ["no", "0", "false"]: + r = cmind.access({'action': 'pull', + 'automation': 'repo', + 'artifact': 'mlcommons@mlperf-automations', + 'checkout': commit_hash, + 'branch': branch}) + print(r) + if r['return'] > 0: + return r['return'] def get_sys_platform(self): self.system = platform.system()