Skip to content

Fixes for nvidia mlperf inference #156

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 26, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
pip install --upgrade cm4mlops
cm pull repo
pip install --upgrade mlc-scripts
mlc pull repo
mlcr --tags=run-mlperf,inference,_all-scenarios,_full,_r4.1-dev --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=amd --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_mlc_repo=gateoverflow@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=rocm --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet --docker_skip_run_cmd=yes
# mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=dev --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
python3 -m venv gh_action_conda
source gh_action_conda/bin/activate
export MLC_REPOS=$HOME/GH_MLC
pip install --upgrade cm4mlops
pip install --upgrade mlc-scripts
pip install tabulate
mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --execution_mode=valid --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=IntelSPR.24c --implementation=intel --backend=pytorch --category=datacenter --division=open --scenario=Offline --docker_dt=yes --docker_it=no --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cpu --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean --docker --quiet
mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on SPR.24c" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=IntelSPR.24c
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
name: Build Python Wheel
name: Build mlc-scripts Wheel

on:
pull_request:
branches:
- main
- dev
paths:
- '.github/workflows/test-mlperf-wheel.yml'
- '.github/workflows/test-mlcscripts-wheel.yml'
- 'setup.py'

jobs:
Expand All @@ -16,6 +16,9 @@ jobs:
matrix:
os: [macos-latest, ubuntu-latest, windows-latest]
python-version: [ '3.8', '3.13']
exclude:
- os: windows-latest
python-version: "3.8"

runs-on: ${{ matrix.os }}

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-gptj.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
python3 -m pip install cm4mlops
cm pull repo
python3 -m pip install --upgrade mlc-scripts
mlc pull repo
mlcr --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --docker --pull_changes=yes --pull_inference_changes=yes --model=gptj-99 --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --target_qps=1 --quiet --docker_it=no --docker_mlc_repo=gateoverflow@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --beam_size=1 --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --get_platform_details=yes --implementation=reference --clean
mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions

4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-llama2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ jobs:
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
pip install cm4mlops
pip install mlc-scripts
pip install tabulate
cm pull repo
mlc pull repo
pip install "huggingface_hub[cli]"
git config --global credential.helper store
huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-mixtral.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ jobs:
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
pip install cm4mlops
pip install --upgrade mlc-scripts
pip install "huggingface_hub[cli]"
git config --global credential.helper store
huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
cm pull repo
mlc pull repo
mlcr --tags=run-mlperf,inference,_submission,_short --adr.inference-src.tags=_branch.dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_mlc_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.MLC_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.MLC_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1
mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions
11 changes: 11 additions & 0 deletions .github/workflows/test-mlperf-inference-resnet50.yml
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,24 @@ jobs:
- name: Randomly Execute Step
id: random-check
run: |
if [[ "$RUNNER_OS" == "Windows" ]]; then
$RANDOM_NUMBER = Get-Random -Maximum 10
Write-Host "Random number is $RANDOM_NUMBER"
if ($RANDOM_NUMBER -eq 0) {
Write-Host "run_step=true" | Out-File -FilePath $Env:GITHUB_ENV -Append
} else {
Write-Host "run_step=false" | Out-File -FilePath $Env:GITHUB_ENV -Append
}
else
RANDOM_NUMBER=$((RANDOM % 10))
echo "Random number is $RANDOM_NUMBER"
if [ "$RANDOM_NUMBER" -eq 0 ]; then
echo "run_step=true" >> $GITHUB_ENV
else
echo "run_step=false" >> $GITHUB_ENV
fi
fi

- name: Retrieve secrets from Keeper
if: github.repository_owner == 'mlcommons' && env.run_step == 'true'
id: ksecrets
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-rnnt.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,10 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies on Unix Platforms
run: |
MLC_PULL_DEFAULT_MLOPS_REPO=no pip install cm4mlops
pip install mlcflow
- name: Pull MLOps repository
run: |
cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
mlc pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
mlcr --quiet --tags=get,sys-utils-cm
- name: Test MLPerf Inference RNNT
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-sdxl.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
source gh_action/bin/deactivate || python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
python3 -m pip install cm4mlops
cm pull repo
python3 -m pip install mlc-scripts
mlc pull repo
mlcr --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --docker --model=sdxl --backend=${{ matrix.backend }} --device=cuda --scenario=Offline --test_query_count=1 --precision=${{ matrix.precision }} --quiet --docker_it=no --docker_mlc_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --env.MLC_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes --clean
mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from self hosted Github actions - NVIDIARTX4090" --quiet --submission_dir=$HOME/gh_action_submissions
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations

on:
schedule:
- cron: "58 23 * * *" #to be adjusted
- cron: "45 17 * * *" #to be adjusted

jobs:
run_nvidia:
Expand All @@ -17,7 +17,7 @@ jobs:
strategy:
fail-fast: false
matrix:
system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9"]
system: [ "GO-spr", "phoenix", "GO-i9"]
# system: [ "mlc-server" ]
python-version: [ "3.12" ]
model: [ "resnet50", "retinanet", "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ]
Expand Down Expand Up @@ -48,9 +48,9 @@ jobs:
python3 -m venv gh_action
source gh_action/bin/activate
export MLC_REPOS=$HOME/GH_MLC
MLC_PULL_DEFAULT_MLOPS_REPO=no pip install --upgrade cm4mlops
cm pull repo
pip install --upgrade mlcflow
mlc pull repo mlcommons@mlperf-automations --branch=dev

mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt=yes --docker_it=no --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean $docker_string --quiet
mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean $docker_string --quiet

mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
2 changes: 1 addition & 1 deletion .github/workflows/test-qaic-compute-sdk-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
MLC_PULL_DEFAULT_MLOPS_REPO=no pip install cm4mlops
pip install mlc-scripts
mlcr --tags=get,sys-utils-cm --quiet

- name: Test QAIC Compute SDK for compilation
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test-qaic-software-kit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Pull MLOps repository
run: |
pip install mlperf
pip install mlc-scripts
mlcr --tags=get,sys-utils-mlc --quiet

- name: Test Software Kit for compilation on Ubuntu 20.04
Expand Down
9 changes: 5 additions & 4 deletions automation/script/docker_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def process_mounts(mounts, env, docker_settings, f_run_cmd):
for placeholder in container_placeholders:
if placeholder in env:
new_container_mount, container_env_key = get_container_path(
env[placeholder])
env[placeholder], docker_settings.get('user', 'mlcuser'))
else: # Skip mount if variable is missing
mounts[index] = None
break
Expand Down Expand Up @@ -400,18 +400,19 @@ def get_host_path(value):

def get_container_path_script(i):
tmp_dep_cached_path = i['tmp_dep_cached_path']
value_mnt, value_env = get_container_path(tmp_dep_cached_path)
value_mnt, value_env = get_container_path(
tmp_dep_cached_path, os.getlogin())
return {'return': 0, 'value_mnt': value_mnt, 'value_env': value_env}


def get_container_path(value):
def get_container_path(value, username="mlcuser"):
path_split = value.split(os.sep)
if len(path_split) == 1:
return value

new_value = ''
if "cache" in path_split and "local" in path_split:
new_path_split = ["", "home", "mlcuser", "MLC", "repos"]
new_path_split = ["", "home", username, "MLC", "repos"]
repo_entry_index = path_split.index("local")
if len(path_split) >= repo_entry_index + 3:
new_path_split1 = new_path_split + \
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "mlc-scripts"
version="0.0.5"
description = "Automation scripts for running ML applications using MLC interface"
authors = [
{ name = "MLCommons", email = "systems@mlcommons.org" }
Expand Down
32 changes: 16 additions & 16 deletions script/build-dockerfile/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import json
import re
import shutil
from utils import *


def preprocess(i):
Expand Down Expand Up @@ -255,8 +256,7 @@ def preprocess(i):
'MLC_DOCKER_USE_DEFAULT_USER', '') == '':
env['MLC_DOCKER_USE_DEFAULT_USER'] = 'yes'

if docker_user and str(env.get('MLC_DOCKER_USE_DEFAULT_USER', '')).lower() not in [
"yes", "1", "true"]:
if docker_user and not is_true(env.get('MLC_DOCKER_USE_DEFAULT_USER', '')):

f.write('RUN groupadd -g $GID -o ' + docker_group + EOL)

Expand All @@ -283,16 +283,19 @@ def preprocess(i):
dockerfile_env_input_string = dockerfile_env_input_string + " --env." + \
docker_env_key + "=" + str(dockerfile_env[docker_env_key])

workdir = get_value(env, config, 'WORKDIR', 'MLC_DOCKER_WORKDIR')
if workdir and (f"""/home/{docker_user}""" not in workdir or str(env.get('MLC_DOCKER_USE_DEFAULT_USER', '')).lower() not in [
"yes", "1", "true"]):
workdir = env.get('WORKDIR', '')
if workdir == '':
workdir = f"""/home/{docker_user}"""

if f"""/home/{docker_user}""" not in workdir or not is_true(
env.get('MLC_DOCKER_USE_DEFAULT_USER', '')):
f.write('WORKDIR ' + workdir + EOL)

f.write(EOL + '# Install python packages' + EOL)
python = get_value(env, config, 'PYTHON', 'MLC_DOCKERFILE_PYTHON')

docker_use_virtual_python = env.get('MLC_DOCKER_USE_VIRTUAL_PYTHON', "yes")
if str(docker_use_virtual_python).lower() not in ["no", "0", "false"]:
if not is_false(docker_use_virtual_python):
f.write('RUN {} -m venv $HOME/venv/mlc'.format(python) + " " + EOL)
f.write('ENV PATH="$HOME/venv/mlc/bin:$PATH"' + EOL)
# f.write('RUN . /opt/venv/mlc/bin/activate' + EOL)
Expand Down Expand Up @@ -342,8 +345,7 @@ def preprocess(i):
for y in x.split(','):
f.write('RUN ' + y + EOL)

if str(env.get('MLC_DOCKER_SKIP_MLC_SYS_UPGRADE', False)
).lower() not in ["true", "1", "yes"]:
if not is_true(env.get('MLC_DOCKER_SKIP_MLC_SYS_UPGRADE', False)):
f.write(EOL + '# Install all system dependencies' + EOL)
f.write('RUN mlc run script --tags=get,sys-utils-mlc --quiet' + EOL)

Expand All @@ -368,14 +370,12 @@ def preprocess(i):
env['MLC_DOCKER_RUN_CMD'] += "mlc version"
skip_extra = True
else:
if str(env.get('MLC_DOCKER_NOT_PULL_UPDATE', 'False')
).lower() not in ["yes", "1", "true"]:
if not is_true(env.get('MLC_DOCKER_NOT_PULL_UPDATE', 'False')):
env['MLC_DOCKER_RUN_CMD'] += "mlc pull repo && "
env['MLC_DOCKER_RUN_CMD'] += "mlc run script --tags=" + \
env['MLC_DOCKER_RUN_SCRIPT_TAGS'] + ' --quiet'
else:
if str(env.get('MLC_DOCKER_NOT_PULL_UPDATE', 'False')
).lower() not in ["yes", "1", "true"]:
if not is_true(env.get('MLC_DOCKER_NOT_PULL_UPDATE', 'False')):
env['MLC_DOCKER_RUN_CMD'] = "mlc pull repo && " + \
env['MLC_DOCKER_RUN_CMD']

Expand All @@ -394,8 +394,8 @@ def preprocess(i):
if run_cmd_extra != '':
x += ' ' + run_cmd_extra

if env.get('MLC_DOCKER_RUN_SCRIPT_TAGS', '') != '' and str(env.get(
'MLC_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')).lower() in ["yes", "1", "true"]:
if env.get('MLC_DOCKER_RUN_SCRIPT_TAGS', '') != '' and is_true(env.get(
'MLC_DOCKER_ADD_DEPENDENT_SCRIPTS_RUN_COMMANDS', '')):
mlc_input = {'action': 'run',
'automation': 'script',
'tags': f"""{env['MLC_DOCKER_RUN_SCRIPT_TAGS']}""",
Expand All @@ -417,8 +417,8 @@ def preprocess(i):
f.write(x + EOL)

# fake_run to install the dependent scripts and caching them
if not "run" in env['MLC_DOCKER_RUN_CMD'] and str(
env.get('MLC_REAL_RUN', False)).lower() in ["false", "0", "no"]:
if not "run" in env['MLC_DOCKER_RUN_CMD'] and is_false(
env.get('MLC_REAL_RUN', False)):
fake_run = dockerfile_env_input_string

x = 'RUN ' + env['MLC_DOCKER_RUN_CMD'] + fake_run + run_cmd_extra
Expand Down
2 changes: 0 additions & 2 deletions script/build-dockerfile/dockerinfo.json
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@
"GID": "",
"GROUP": "mlc",
"SHELL": "[\"/bin/bash\", \"-c\"]",
"WORKDIR": "/home/mlcuser",
"distros": {
"ubuntu": {
"USER": "ubuntu",
"WORKDIR": "/home/ubuntu",
"package-manager-update-cmd": "apt-get update -y",
"package-manager-get-cmd": "apt-get install -y",
"packages": [
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,11 @@ def run(self):
print("Running custom post-install command...")
commit_hash = get_commit_hash()
import mlc
branch = os.environ.get('MLC_REPO_BRANCH', 'mlc')
branch = os.environ.get('MLC_REPO_BRANCH', 'dev')

res = mlc.access({'action': 'pull',
'automation': 'repo',
'url': 'mlcommons@mlperf-automations',
'target': 'repo',
'repo': 'mlcommons@mlperf-automations',
'branch': branch,
'checkout': commit_hash
})
Expand Down
Loading