Support mlcommons checkpoint for llama2 (#220)

arjunsuresh · web-flow · commit c586ee9b78bc · 2025-02-13T23:19:16.000Z
* Update test-nvidia-mlperf-inference-implementations.yml

* Support mlperf checkpoint for llama2
diff --git a/.github/workflows/test-nvidia-mlperf-inference-implementations.yml b/.github/workflows/test-nvidia-mlperf-inference-implementations.yml
@@ -58,7 +58,7 @@ jobs:
           pip install --upgrade mlcflow
           mlc pull repo mlcommons@mlperf-automations --branch=dev
           
-          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="MLCommons" --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
+          mlcr --tags=run-mlperf,inference,_all-scenarios,_submission,_full,_r5.0-dev --preprocess_submission=yes --pull_changes=yes --pull_inference_changes=yes --execution_mode=valid --gpu_name=$gpu_name --pull_changes=yes --pull_inference_changes=yes --model=${{ matrix.model }} --submitter="GATEOverflow " --hw_name=$hw_name --implementation=nvidia --backend=tensorrt --category=$category --division=closed  --docker_dt --docker_mlc_repo=mlcommons@mlperf-automations --docker_mlc_repo_branch=dev --adr.compiler.tags=gcc --device=cuda --use_model_from_host=yes --use_dataset_from_host=yes --results_dir=$HOME/gh_action_results --submission_dir=$HOME/gh_action_submissions --clean  $docker_string $submission_preprocessor_args --quiet
           #mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/mlcommons/mlperf_inference_unofficial_submissions_v5.0 --repo_branch=auto-update --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
-          mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v5.0 --repo_branch=main --commit_message="Results from GH action on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
+          mlcr --tags=push,github,mlperf,inference,submission --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v5.0 --repo_branch=main --commit_message="Results from GH actions on NVIDIA_$hw_name" --quiet --submission_dir=$HOME/gh_action_submissions --hw_name=$hw_name
 
diff --git a/script/get-ml-model-llama2/customize.py b/script/get-ml-model-llama2/customize.py
@@ -29,6 +29,12 @@ def preprocess(i):
         else:
             if path == '' or not os.path.exists(path):
                 env['MLC_TMP_REQUIRE_DOWNLOAD'] = 'yes'
+                if env['MLC_DOWNLOAD_SRC'] == "mlcommons":
+                    i['run_script_input']['script_name'] = 'run-rclone'
+                    if env.get('MLC_OUTDIRNAME', '') != '':
+                        env['LLAMA2_CHECKPOINT_PATH'] = env['MLC_OUTDIRNAME']
+                    else:
+                        env['LLAMA2_CHECKPOINT_PATH'] = os.getcwd()
 
     return {'return': 0}
 
diff --git a/script/get-ml-model-llama2/meta.yaml b/script/get-ml-model-llama2/meta.yaml
@@ -19,6 +19,8 @@ prehook_deps:
 - enable_if_env:
     MLC_TMP_REQUIRE_DOWNLOAD:
     - 'yes'
+    MLC_DOWNLOAD_SRC:
+    - 'huggingface'
   env: {}
   extra_cache_tags: llama2,llama-2
   force_env_keys:
@@ -82,6 +84,15 @@ variations:
       MLC_ML_MODEL_PRECISION: int8
       MLC_ML_MODEL_WEIGHT_DATA_TYPES: int8
     group: precision
+  mlc:
+    group: download-source
+    default: true
+    env:
+      MLC_DOWNLOAD_SRC: mlcommons
+  hf:
+    group: download-source
+    env:
+      MLC_DOWNLOAD_SRC: huggingface
   meta-llama/Llama-2-70b-chat-hf:
     adr:
       hf-zoo:
diff --git a/script/get-ml-model-llama2/run-rclone.sh b/script/get-ml-model-llama2/run-rclone.sh
@@ -0,0 +1,3 @@
+rclone config create mlc-llama2 drive config_is_local=false scope=drive.readonly root_folder_id=11tBZvvrh0FCm3XuR5E849K42TqftYdUF
+rclone config reconnect mlc-llama2:
+rclone copy mlc-llama2:Llama-2-7b-chat-hf ${LLAMA2_CHECKPOINT_PATH}/Llama-2-7b-chat-hf -P

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+rclone config create mlc-llama2 drive config_is_local=false scope=drive.readonly root_folder_id=11tBZvvrh0FCm3XuR5E849K42TqftYdUF`
	`2`	`+rclone config reconnect mlc-llama2:`
	`3`	`+rclone copy mlc-llama2:Llama-2-7b-chat-hf ${LLAMA2_CHECKPOINT_PATH}/Llama-2-7b-chat-hf -P`