mlcommons · arjunsuresh · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024 · Dec 12, 2024
@@ -0,0 +1,48 @@
+name: MLPerf inference rgat
+
+on:
+  pull_request_target:
+    branches: [ "main", "dev" ]
+    paths:
+      - '.github/workflows/test-mlperf-inference-rgat.yml'
+      - '**'
+      - '!**.md'
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest]
+        python-version: [ "3.12" ]
+        backend: [ "pytorch" ]
+        implementation: [ "python" ]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python3 -m pip install "cmind @ git+https://git@github.com/mlcommons/ck.git@mlperf-inference#subdirectory=cm"
+        cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }}
+    - name: Test MLPerf Inference R-GAT using ${{ matrix.backend }} on ${{ matrix.os }}
+      run: |
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="MLCommons" --hw_name=gh_${{ matrix.os }}_x86 --model=rgat --implementation=${{ matrix.implementation }} --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --quiet  -v --target_qps=1
+    - name: Push Results
+      if: github.repository_owner == 'gateoverflow'
+      env:
+          USER: "GitHub Action"
+          EMAIL: "admin@gateoverflow.com"
+          GITHUB_TOKEN: ${{ secrets.TEST_RESULTS_GITHUB_TOKEN }}
+      run: |
+        git config --global user.name "${{ env.USER }}"
+        git config --global user.email "${{ env.EMAIL }}"
+        git config --global credential.https://github.com.helper ""
+        git config --global credential.https://github.com.helper "!gh auth git-credential"
+        git config --global credential.https://gist.github.com.helper ""
+        git config --global credential.https://gist.github.com.helper "!gh auth git-credential"
+        cm run script --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/gateoverflow/mlperf_inference_test_submissions_v5.0 --repo_branch=main --commit_message="Results from R-GAT GH action on ${{ matrix.os }}" --quiet
@@ -1634,12 +1634,12 @@ def dockerfile(i):
             'docker_cm_repo',
             docker_settings.get(
                 'cm_repo',
-                'mlcommons@cm4mlops'))
+                'mlcommons@mlperf-automations'))
         cm_repo_branch = i.get(
             'docker_cm_repo_branch',
             docker_settings.get(
                 'cm_repo_branch',
-                'mlperf-inference'))
+                'main'))
 
         cm_repo_flags = i.get(
             'docker_cm_repo_flags',
@@ -2295,7 +2295,7 @@ def docker(i):
             'docker_cm_repo',
             docker_settings.get(
                 'cm_repo',
-                'mlcommons@cm4mlops'))
+                'mlcommons@mlperf-automations'))
 
         docker_path = i.get('docker_path', '').strip()
         if docker_path == '':

@@ -222,6 +222,7 @@ deps:
       CM_MODEL:
         - dlrm-v2-99
         - dlrm-v2-99.9
+        - rgat
     enable_if_env:
       CM_MLPERF_BACKEND:
       - pytorch
@@ -234,6 +235,11 @@ deps:
     names:
     - ml-engine-torchvision
     - torchvision
+    skip_if_env:
+      CM_MODEL:
+        - dlrm-v2-99
+        - dlrm-v2-99.9
+        - rgat
     enable_if_env:
       CM_MLPERF_BACKEND:
       - pytorch
@@ -487,7 +493,7 @@ deps:
       CM_MODEL:
       - rgat
     skip_if_env:
-      CM_ML_MODEL_RGAT_CHECKPOINT_PATH:
+      RGAT_CHECKPOINT_PATH:
         - 'on'
 
   ########################################################################
@@ -619,9 +625,14 @@ deps:
     enable_if_env:
       CM_MODEL:
       - rgat
-    skip_if_env:
+    skip_if_any_env:
       CM_DATASET_IGBH_PATH:
       - "on"
+    skip_if_env:
+      CM_RUN_STATE_DOCKER:
+      - 'yes'
+      CM_USE_DATASET_FROM_HOST:
+      - 'yes'
 
   ########################################################################
   # Install MLPerf inference dependencies
@@ -1226,45 +1237,48 @@ variations:
     group: models
     env:
       CM_MODEL: rgat
-    adr:
+    add_deps_recursive:
       pytorch:
-        version: 2.1.0
+        version_max: "2.4.1"
+        version_max_usable: "2.4.1"
     deps:
       - tags: get,generic-python-lib,_package.colorama
       - tags: get,generic-python-lib,_package.tqdm
       - tags: get,generic-python-lib,_package.requests
       - tags: get,generic-python-lib,_package.torchdata
-        version: 0.7.0
-      - tags: get,generic-python-lib,_package.torchvision
-        version: 0.16.0
       - tags: get,generic-python-lib,_package.pybind11
       - tags: get,generic-python-lib,_package.PyYAML
       - tags: get,generic-python-lib,_package.numpy
-        version: 1.26.4
+        version_max: "1.26.4"
+        version_max_usable: "1.26.4"
       - tags: get,generic-python-lib,_package.pydantic 
       - tags: get,generic-python-lib,_package.igb,_url.git+https://github.com/IllinoisGraphBenchmark/IGB-Datasets.git
-
-  rgat,cuda:
-    deps:
-      - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
+      - tags: get,generic-python-lib,_package.torch-geometric
+        update_tags_from_env_with_prefix:
+          _find_links_url.: 
+            - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL
       - tags: get,generic-python-lib,_package.torch-scatter
+        update_tags_from_env_with_prefix:
+          _find_links_url.: 
+            - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL
       - tags: get,generic-python-lib,_package.torch-sparse
-      - tags: get,generic-python-lib,_package.torch-geometric
-        env:
-          CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>.html"
+        update_tags_from_env_with_prefix:
+          _find_links_url.: 
+            - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL
+      - tags: get,generic-python-lib,_package.dgl
+        update_tags_from_env_with_prefix:
+          _find_links_url.: 
+            - CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL
+
+  rgat,cuda:
+    env:
+      CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>.html"
+      CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<<CM_TORCH_VERSION_MAJOR_MINOR>>>/cu121/repo.html"
 
   rgat,cpu:
-    deps:
-      - tags: get,generic-python-lib,_package.torch-geometric
-        env:
-          CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
-      - tags: get,generic-python-lib,_package.torch-scatter
-        env:
-          CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
-      - tags: get,generic-python-lib,_package.torch-sparse
-        env:
-          CM_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
-      - tags: get,generic-python-lib,_package.dgl,_find_links_url.https://data.dgl.ai/wheels/torch-2.1/repo.html
+    env:
+      CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL: "https://data.pyg.org/whl/torch-<<<CM_TORCH_VERSION>>>+cpu.html"
+      CM_TMP_GENERIC_PYTHON_PIP_EXTRA_FIND_LINKS_URL_DGL: "https://data.dgl.ai/wheels/torch-<<<CM_TORCH_VERSION_MAJOR_MINOR>>>/repo.html"
 
   # Target devices
   cpu:

@@ -388,7 +388,9 @@ def get_run_cmd_reference(
             env['CM_VLLM_SERVER_MODEL_NAME'] = env.get(
                 "CM_VLLM_SERVER_MODEL_NAME") or "NousResearch/Meta-Llama-3-8B-Instruct"
             # env['CM_MLPERF_INFERENCE_API_SERVER'] = "http://localhost:8000"
-            cmd += f" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm "
+            cmd += f""" --api-server {env['CM_MLPERF_INFERENCE_API_SERVER']} \
+                    --model-path {env['CM_VLLM_SERVER_MODEL_NAME']} \
+                    --api-model-name {env['CM_VLLM_SERVER_MODEL_NAME']} --vllm """
         else:
             cmd += f" --model-path {env['LLAMA2_CHECKPOINT_PATH']}"
 
@@ -493,7 +495,7 @@ def get_run_cmd_reference(
             scenario_extra_options + mode_extra_options + \
             " --output " + env['CM_MLPERF_OUTPUT_DIR'] + \
             ' --dtype ' + dtype_rgat + \
-            " --model-path " + env['CM_ML_MODEL_RGAT_CHECKPOINT_PATH']
+            " --model-path " + env['RGAT_CHECKPOINT_PATH']
 
         if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes":
             cmd += " --in-memory "

@@ -781,6 +781,15 @@ variations:
       - mlperf-accuracy-script
       - 3d-unet-accuracy-script
       tags: run,accuracy,mlperf,_igbh
+    docker:
+      deps:
+      - tags: get,dataset,igbh
+        enable_if_env:
+          CM_USE_DATASET_FROM_HOST:
+          - 'yes'
+        names:
+          - igbh-original
+          - igbh-dataset
 
   sdxl:
     group:
@@ -1808,8 +1817,8 @@ docker:
   interactive: True
   extra_run_args: ' --dns 8.8.8.8 --dns 8.8.4.4 --ulimit memlock=-1 --cap-add SYS_ADMIN --cap-add SYS_TIME --security-opt apparmor=unconfined --security-opt seccomp=unconfined'
   os: ubuntu
-  cm_repo: mlcommons@cm4mlops
-  cm_repo_branch: mlperf-inference
+  cm_repo: mlcommons@mlperf-automations
+  cm_repo_branch: dev
   real_run: False
   os_version: '22.04'
   docker_input_mapping:

@@ -356,7 +356,7 @@ def postprocess(i):
             host_info['system_name'] = env['CM_HOST_SYSTEM_NAME']
 
         # Check CM automation repository
-        repo_name = 'mlcommons@cm4mlops'
+        repo_name = 'mlcommons@mlperf-automations'
         repo_hash = ''
         r = cm.access({'action': 'find', 'automation': 'repo',
                       'artifact': 'mlcommons@cm4mlops,9e97bb72b0474657'})

@@ -19,7 +19,7 @@ default_env:
     '
   CM_DOCKER_OS: ubuntu
   CM_DOCKER_NOT_PULL_UPDATE: False
-  CM_MLOPS_REPO_BRANCH: mlperf-inference
+  CM_MLOPS_REPO_BRANCH: dev
 
 input_mapping:
   build: CM_BUILD_DOCKER_IMAGE

@@ -131,7 +131,7 @@ def preprocess(i):
                 print(
                     f"Converted repo format from {env['CM_MLOPS_REPO']} to {cm_mlops_repo}")
         else:
-            cm_mlops_repo = "mlcommons@cm4mlops"
+            cm_mlops_repo = "mlcommons@mlperf-automations"
 
     cm_mlops_repo_branch_string = f" --branch={env['CM_MLOPS_REPO_BRANCH']}"
 
@@ -299,7 +299,7 @@ def preprocess(i):
     f.write(EOL + '# Download CM repo for scripts' + EOL)
 
     if use_copy_repo:
-        docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@cm4mlops"
+        docker_repo_dest = "/home/cmuser/CM/repos/mlcommons@mlperf-automations"
         f.write(
             f'COPY --chown=cmuser:cm {relative_repo_path} {docker_repo_dest}' +
             EOL)

@@ -31,8 +31,8 @@ deps:
     - 'on'
   tags: get,mlperf,submission,dir
 docker:
-  cm_repo: mlcommons@cm4mlops
-  cm_repo_branch: mlperf-inference
+  cm_repo: mlcommons@mlperf-automations
+  cm_repo_branch: dev
   deps:
   - names: get-mlperf-inference-results-dir
     skip_if_env: