mlcommons · arjunsuresh · Jan 2, 2025 · Jan 1, 2025 · Jan 1, 2025 · Jan 1, 2025
@@ -31,5 +31,5 @@ jobs:
         git config --global credential.helper store
         huggingface-cli login --token ${{ secrets.HF_TOKEN }} --add-to-git-credential
         cm pull repo
-        cm run script --tags=run-mlperf,inference,_submission,_short --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1
+        cm run script --tags=run-mlperf,inference,_submission,_short --adr.inference-src.tags=_branch.dev --submitter="MLCommons" --pull_changes=yes --pull_inference_changes=yes --model=mixtral-8x7b --implementation=reference --batch_size=1 --precision=${{ matrix.precision }} --backend=${{ matrix.backend }} --category=datacenter --scenario=Offline --execution_mode=test --device=${{ matrix.device }} --docker_it=no --docker_cm_repo=gateoverflow@mlperf-automations --adr.compiler.tags=gcc --hw_name=gh_action --docker_dt=yes  --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --docker --quiet --test_query_count=3 --target_qps=0.001 --clean --env.CM_MLPERF_MODEL_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --env.CM_MLPERF_DATASET_MIXTRAL_8X7B_DOWNLOAD_TO_HOST=yes --adr.openorca-mbxp-gsm8k-combined-preprocessed.tags=_size.1
         cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_test_submissions_v5.0 --repo_branch=dev --commit_message="Results from self hosted Github actions - GO-phoenix" --quiet --submission_dir=$HOME/gh_action_submissions
@@ -31,7 +31,7 @@ jobs:
         cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
     - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }}
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --pull_changes=yes --pull_inference_changes=yes  --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet  -v --target_qps=1
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --adr.inference-src.tags=_branch.dev --pull_changes=yes --pull_inference_changes=yes  --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --category=datacenter --quiet  -v --target_qps=1
     - name: Push Results
       if: github.repository_owner == 'gateoverflow'
       env:

@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "08 01 * * */3" #to be adjusted
+    - cron: "58 10 * * *" #to be adjusted
 
 jobs:
   run_nvidia:
@@ -17,20 +17,31 @@ jobs:
       strategy:
         fail-fast: false
         matrix:
-          system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9" ] 
+          # system: [ "GO-spr", "phoenix-Amd-Am5", "GO-i9", "mlc-server" ]
+          system: [ "mlc-server" ] 
           python-version: [ "3.12" ]
           model: [ "resnet50",  "retinanet",  "bert-99", "bert-99.9", "gptj-99.9", "3d-unet-99.9", "sdxl" ]
           exclude:
            - model: gptj-99.9
 
       steps:
       - name: Test MLPerf Inference NVIDIA ${{ matrix.model }}
+        env:
+          gpu_name: rtx_4090
         run: |
           # Set hw_name based on matrix.system
           if [ "${{ matrix.system }}" = "GO-spr" ]; then
             hw_name="RTX4090x2"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
+          elif [ "${{ matrix.system }}" = "mlc-server" ]; then
+            hw_name="H100x8"
+            gpu_name=h100
+            docker_string=" "
           else
             hw_name="RTX4090x1"
+            gpu_name=rtx_4090
+            docker_string=" --docker"
           fi
 
           if [ -f "gh_action/bin/deactivate" ]; then source gh_action/bin/deactivate; fi
@@ -40,6 +51,6 @@ jobs:
           pip install --upgrade cm4mlops
           cm pull repo
 
-          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=rtx_4090 --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  --docker --quiet
+          cm run script --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r4.1-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=datacenter,edge --division=closed  --docker_dt=yes --docker_it=no --docker_cm_repo=mlcommons@mlperf-automations --docker_cm_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string --quiet
 
           cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
@@ -1793,7 +1793,12 @@ update_meta_if_env:
       use_host_group_id: True
       use_host_user_id: True
       pass_user_group: True #useful if docker is run by a different user from the one who built it and under the same group
-
+  - enable_if_env:
+      CM_HOST_OS_TYPE:
+        - linux
+      adr:
+        compiler:
+          tags: gcc
 
 docker:
   deps:

@@ -38,6 +38,9 @@ input_mapping:
 new_env_keys:
 - CM_DOCKER_*
 
+deps:
+  - tags: get,docker
+
 prehook_deps:
 - enable_if_env:
     CM_BUILD_DOCKERFILE:

@@ -66,7 +66,8 @@ def preprocess(i):
 
     # Prepare CMD to build image
     XCMD = [
-        'docker build ' + env.get('CM_DOCKER_CACHE_ARG', ''),
+        f'{env["CM_CONTAINER_TOOL"]} build ' +
+        env.get('CM_DOCKER_CACHE_ARG', ''),
         ' ' + build_args,
         ' -f "' + dockerfile_path + '"',
         ' -t "' + image_name,

@@ -58,6 +58,9 @@ input_mapping:
 new_env_keys:
   - 'CM_DOCKER_CONTAINER_ID'
 
+deps:
+  - tags: get,docker
+
 prehook_deps:
 - names:
   - build-docker-image
@@ -69,4 +72,3 @@ prehook_deps:
     CM_DOCKER_CONTAINER_ID:
     - on
   tags: build,docker,image
-- tags: get,docker
@@ -51,7 +51,7 @@ def preprocess(i):
     print('')
     print('Checking existing Docker container:')
     print('')
-    CMD = f"""docker ps --filter "ancestor={DOCKER_CONTAINER}" """
+    CMD = f"""{env['CM_CONTAINER_TOOL']} ps --filter "ancestor={DOCKER_CONTAINER}" """
     if os_info['platform'] == 'windows':
         CMD += " 2> nul"
     else:
@@ -78,7 +78,7 @@ def preprocess(i):
         if env.get('CM_DOCKER_CONTAINER_ID', '') != '':
             del (env['CM_DOCKER_CONTAINER_ID'])  # not valid ID
 
-        CMD = "docker images -q " + DOCKER_CONTAINER
+        CMD = f"""{env['CM_CONTAINER_TOOL']} images -q """ + DOCKER_CONTAINER
 
         if os_info['platform'] == 'windows':
             CMD += " 2> nul"
@@ -196,11 +196,6 @@ def postprocess(i):
                 return {'return': 1, 'error': 'Can\'t find separator : in a mount string: {}'.format(
                     mount_cmd)}
 
-#            mount_parts = mount_cmd.split(":")
-#            if len(mount_parts) != 2:
-# return {'return': 1, 'error': 'Invalid mount {}
-# specified'.format(mount_parts)}
-
             host_mount = mount_parts[0]
 
             if not os.path.exists(host_mount):
@@ -240,14 +235,14 @@ def postprocess(i):
 
         existing_container_id = env.get('CM_DOCKER_CONTAINER_ID', '')
         if existing_container_id:
-            CMD = f"ID={existing_container_id} && docker exec $ID bash -c '" + run_cmd + "'"
+            CMD = f"""ID={existing_container_id} && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '""" + run_cmd + "'"
         else:
-            CONTAINER = f"docker run -dt {run_opts} --rm  {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash"
-            CMD = f"ID=`{CONTAINER}` && docker exec $ID bash -c '{run_cmd}'"
+            CONTAINER = f"""{env['CM_CONTAINER_TOOL']} run -dt {run_opts} --rm  {docker_image_repo}/{docker_image_name}:{docker_image_tag} bash"""
+            CMD = f"""ID=`{CONTAINER}` && {env['CM_CONTAINER_TOOL']} exec $ID bash -c '{run_cmd}'"""
 
             if False and str(env.get('CM_KEEP_DETACHED_CONTAINER', '')).lower() not in [
                     'yes', "1", 'true']:
-                CMD += " && docker kill $ID >/dev/null"
+                CMD += f""" && {env['CM_CONTAINER_TOOL']} kill $ID >/dev/null"""
 
         CMD += ' && echo "ID=$ID"'
 
@@ -256,7 +251,10 @@ def postprocess(i):
         print('')
         print(CMD)
         print('')
-        print("Running " + run_cmd + " inside docker container")
+        print(
+            "Running " +
+            run_cmd +
+            f""" inside {env['CM_CONTAINER_TOOL']} container""")
 
         record_script({'cmd': CMD, 'env': env})
 
@@ -280,7 +278,8 @@ def postprocess(i):
 
         docker_out = result.stdout
         # if docker_out != 0:
-        #    return {'return': docker_out, 'error': 'docker run failed'}
+        # return {'return': docker_out, 'error': f""{env['CM_CONTAINER_TOOL']}
+        # run failed""}
 
         lines = docker_out.split("\n")
 
@@ -304,7 +303,7 @@ def postprocess(i):
             x1 = '-it'
             x2 = " && bash ) || bash"
 
-        CONTAINER = "docker run " + x1 + " --entrypoint " + x + x + " " + run_opts + \
+        CONTAINER = f"{env['CM_CONTAINER_TOOL']} run " + x1 + " --entrypoint " + x + x + " " + run_opts + \
             " " + docker_image_repo + "/" + docker_image_name + ":" + docker_image_tag
         CMD = CONTAINER + " bash -c " + x + run_cmd_prefix + run_cmd + x2 + x
 
@@ -320,7 +319,8 @@ def postprocess(i):
         if docker_out != 0:
             if docker_out % 256 == 0:
                 docker_out = 1
-            return {'return': docker_out, 'error': 'docker run failed'}
+            return {'return': docker_out,
+                    'error': f"""{env['CM_CONTAINER_TOOL']} run failed"""}
 
     return {'return': 0}
 

@@ -34,6 +34,7 @@ default_env:
   CM_MLPERF_RUN_STYLE: test
   CM_MLPERF_SKIP_SUBMISSION_GENERATION: no
   CM_DOCKER_PRIVILEGED_MODE: yes
+  CM_MLPERF_SUBMISSION_DIVISION: open
 
 input_mapping:
   api_server: CM_MLPERF_INFERENCE_API_SERVER