Support different llama2 variants (#222)

arjunsuresh · web-flow · commit 532dd2144aa0 · 2025-02-14T13:03:59.000Z
* Strip mlperf inference submitter name


* Fix llama2 mlperf variants
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -2,7 +2,7 @@ name: MLPerf Inference Nvidia implementations
 
 on:
   schedule:
-    - cron: "05 01 * * *"
+    - cron: "27 11 * * *"
 
 jobs:
   run_nvidia:
@@ -58,7 +58,7 @@ jobs:
           pip install --upgrade mlcflow
           mlc pull repo mlcommons@mlperf-automations --branch=dev
           
-          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="GATEOverflow " --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
+          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="GATEOverflow" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
           #mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
           mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v5.0 --repo_branch=main --commit_message="Results from GH actions on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
 
diff --git a/script/generate-mlperf-inference-submission/customize.py b/script/generate-mlperf-inference-submission/customize.py
@@ -150,7 +150,7 @@ def generate_submission(env, state, inp, submission_division):
 
     # Check submitter
     if env.get('MLC_MLPERF_SUBMITTER'):
-        submitter = env['MLC_MLPERF_SUBMITTER']
+        submitter = env['MLC_MLPERF_SUBMITTER'].strip()
         system_meta_tmp['submitter'] = submitter
     else:
         submitter = system_meta_default['submitter']
diff --git a/script/get-ml-model-llama2/meta.yaml b/script/get-ml-model-llama2/meta.yaml
@@ -93,23 +93,46 @@ variations:
     group: download-source
     env:
       MLC_DOWNLOAD_SRC: huggingface
+  70b:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-chat-hf
+    group: model-size
+    default: true
+    default_variations:
+      huggingface-stub: meta-llama/Llama-2-70b-chat-hf
+  7b:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-7b-chat-hf
+    group: model-size    
+    default_variations:
+      huggingface-stub: meta-llama/Llama-2-7b-chat-hf
+
+  70b-fused-qkv:
+    env:
+      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-fused-qkv-mlperf
+    group: model-size
+
   meta-llama/Llama-2-70b-chat-hf:
+    base:
+      - 70b
     adr:
       hf-zoo:
         tags: _model-stub.meta-llama/Llama-2-70b-chat-hf
-    default: true
     env:
-      MLC_GIT_CHECKOUT_FOLDER: Llama-2-70b-chat-hf
       MLC_MODEL_ZOO_ENV_KEY: LLAMA2
     group: huggingface-stub
+
   meta-llama/Llama-2-7b-chat-hf:
+    base:
+      - 7b
     adr:
       hf-zoo:
         tags: _model-stub.meta-llama/Llama-2-7b-chat-hf
     env:
-      MLC_GIT_CHECKOUT_FOLDER: Llama-2-7b-chat-hf
       MLC_MODEL_ZOO_ENV_KEY: LLAMA2
     group: huggingface-stub
+  
+
   nvidia:
     default_variations:
       framework: pytorch
diff --git a/script/get-ml-model-llama2/run-rclone.sh b/script/get-ml-model-llama2/run-rclone.sh
@@ -1,3 +1,6 @@
 rclone config create mlc-llama2 drive config_is_local=false scope=drive.readonly root_folder_id=11tBZvvrh0FCm3XuR5E849K42TqftYdUF
 rclone config reconnect mlc-llama2:
-rclone sync mlc-llama2:${MLC_GIT_CHECKOUT_FOLDER} ${LLAMA2_CHECKPOINT_PATH}/${MLC_GIT_CHECKOUT_FOLDER} -P
+cmd="rclone sync mlc-llama2:${MLC_GIT_CHECKOUT_FOLDER} ${LLAMA2_CHECKPOINT_PATH}/${MLC_GIT_CHECKOUT_FOLDER} -P"
+echo $cmd
+eval $cmd
+test $? -eq 0 || exit $?