From a7bd3149e552d17c3df55622b4ff350ebebca14f Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 14 Feb 2025 09:19:42 +0000
Subject: [PATCH 1/4] Update test-nvidia-mlperf-inference-implementations.yml

---
 .../workflows/test-nvidia-mlperf-inference-implementations.yml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
index 0cab69c89..415d697b7 100644
--- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
+++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -58,7 +58,7 @@ jobs:
           pip install --upgrade mlcflow
           mlc pull repo mlcommons@mlperf-automations --branch=dev
           
-          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="GATEOverflow " --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
+          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="GATEOverflow" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
           #mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
           mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v5.0 --repo_branch=main --commit_message="Results from GH actions on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
 

From 968f1a67f9a57254b6cef9a30f410644d462f261 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 14 Feb 2025 14:53:25 +0530
Subject: [PATCH 2/4] Strip mlperf inference submitter name

---
 script/generate-mlperf-inference-submission/customize.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py
index 0d1ac5a69..9bbe9b2d0 100644
--- a/script/generate-mlperf-inference-submission/customize.py
+++ b/script/generate-mlperf-inference-submission/customize.py
@@ -150,7 +150,7 @@ def generate_submission(env, state, inp, submission_division):
 
     # Check submitter
     if env.get('MLC_MLPERF_SUBMITTER'):
-        submitter = env['MLC_MLPERF_SUBMITTER']
+        submitter = env['MLC_MLPERF_SUBMITTER'].strip()
         system_meta_tmp['submitter'] = submitter
     else:
         submitter = system_meta_default['submitter']

From 403724930876a6d5cbf1808b1c72137c6ddfb777 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 14 Feb 2025 11:20:39 +0000
Subject: [PATCH 3/4] Update test-nvidia-mlperf-inference-implementations.yml

---
 .../workflows/test-nvidia-mlperf-inference-implementations.yml  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
index 415d697b7..2fac2632c 100644
--- a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
+++ b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "05 01 * * *"
+    - cron: "27 11 * * *"
 
 jobs:
   run_nvidia:

From 124c3dd7ca024977b100cae21e4b9e090184a32b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 14 Feb 2025 18:32:44 +0530
Subject: [PATCH 4/4] Fix llama2 mlperf variants

---
 script/get-ml-model-llama2/meta.yaml     | 29 +++++++++++++++++++++---
 script/get-ml-model-llama2/run-rclone.sh |  5 +++-
 2 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/script/get-ml-model-llama2/meta.yaml b/script/get-ml-model-llama2/meta.yaml
index 0b0819678..a816ab604 100644
--- a/script/get-ml-model-llama2/meta.yaml
+++ b/script/get-ml-model-llama2/meta.yaml
@@ -93,23 +93,46 @@ variations:
     group: download-source
     env:
       MLC_DOWNLOAD_SRC: huggingface
+  70b:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-chat-hf
+    group: model-size
+    default: true
+    default_variations:
+      huggingface-stub: meta-llama/Llama-2-70b-chat-hf
+  7b:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-7b-chat-hf
+    group: model-size    
+    default_variations:
+      huggingface-stub: meta-llama/Llama-2-7b-chat-hf
+
+  70b-fused-qkv:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-fused-qkv-mlperf
+    group: model-size
+
   meta-llama/Llama-2-70b-chat-hf:
+    base:
+      - 70b
     adr:
       hf-zoo:
         tags: _model-stub.meta-llama/Llama-2-70b-chat-hf
-    default: true
     env:
-      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-chat-hf
       MLC_MODEL_ZOO_ENV_KEY: LLAMA2
     group: huggingface-stub
+
   meta-llama/Llama-2-7b-chat-hf:
+    base:
+      - 7b
     adr:
       hf-zoo:
         tags: _model-stub.meta-llama/Llama-2-7b-chat-hf
     env:
-      MLC_GIT_CHECKOUT_FOLDER: Llama-2-7b-chat-hf
       MLC_MODEL_ZOO_ENV_KEY: LLAMA2
     group: huggingface-stub
+  
+
   nvidia:
     default_variations:
       framework: pytorch
diff --git a/script/get-ml-model-llama2/run-rclone.sh b/script/get-ml-model-llama2/run-rclone.sh
index 0d56e5eeb..7daee2da8 100644
--- a/script/get-ml-model-llama2/run-rclone.sh
+++ b/script/get-ml-model-llama2/run-rclone.sh
@@ -1,3 +1,6 @@
 rclone config create mlc-llama2 drive config_is_local=false scope=drive.readonly root_folder_id=11tBZvvrh0FCm3XuR5E849K42TqftYdUF
 rclone config reconnect mlc-llama2:
-rclone sync mlc-llama2:${MLC_GIT_CHECKOUT_FOLDER} ${LLAMA2_CHECKPOINT_PATH}/${MLC_GIT_CHECKOUT_FOLDER} -P
+cmd="rclone sync mlc-llama2:${MLC_GIT_CHECKOUT_FOLDER} ${LLAMA2_CHECKPOINT_PATH}/${MLC_GIT_CHECKOUT_FOLDER} -P"
+echo $cmd
+eval $cmd
+test $? -eq 0 || exit $?