Skip to content

Code changes for supporting llama3_1-405b reference implementation #111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Jan 5, 2025
Merged
778 changes: 410 additions & 368 deletions script/app-mlperf-inference-mlcommons-python/_cm.yaml

Large diffs are not rendered by default.

30 changes: 28 additions & 2 deletions script/app-mlperf-inference-mlcommons-python/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def preprocess(i):
str(env['CM_MLPERF_LOADGEN_BATCH_SIZE'])

if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and (
env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid":
env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL'] or 'llama3' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid":
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \
env['CM_MLPERF_LOADGEN_QUERY_COUNT']

Expand Down Expand Up @@ -127,7 +127,7 @@ def preprocess(i):
if 'CM_MLPERF_USER_CONF' in env:
user_conf_path = env['CM_MLPERF_USER_CONF']
x = "" if os_info['platform'] == 'windows' else "'"
if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]:
if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"] or "llama3" in env["CM_MODEL"]:
scenario_extra_options += " --user-conf " + x + user_conf_path + x
else:
scenario_extra_options += " --user_conf " + x + user_conf_path + x
Expand Down Expand Up @@ -499,6 +499,32 @@ def get_run_cmd_reference(

if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes":
cmd += " --in-memory "

elif "llama3" in env['CM_MODEL']:
env['RUN_DIR'] = os.path.join(
env['CM_MLPERF_INFERENCE_SOURCE'],
"language",
"llama3.1-405b")

if int(env.get('CM_MLPERF_INFERENCE_TP_SIZE', '')) > 1:
env['VLLM_WORKER_MULTIPROC_METHOD'] = "spawn"

cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \
" --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \
" --dataset-path " + env['CM_DATASET_LLAMA3_PATH'] + \
" --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
" --model-path " + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + \
" --tensor-parallel-size " + env['CM_MLPERF_INFERENCE_TP_SIZE'] + \
" --vllm "

if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '':
cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}"


cmd = cmd.replace("--count", "--total-sample-count")
cmd = cmd.replace("--max-batchsize", "--batch-size")


if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]:
cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN']
Expand Down
42 changes: 42 additions & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ variations:
tags: _int32
cnndm-accuracy-script:
tags: _int32
llama3_1-405b-accuracy-script:
tags: _int32
env:
CM_MLPERF_PYTHON: 'yes'
CM_MLPERF_IMPLEMENTATION: mlcommons_python
Expand Down Expand Up @@ -272,6 +274,10 @@ variations:
default_variations:
backend: pytorch

reference,llama3_1-405b:
default_variations:
backend: pytorch

reference,mixtral-8x7b:
default_variations:
backend: pytorch
Expand Down Expand Up @@ -795,6 +801,40 @@ variations:
- igbh-original
- igbh-dataset

llama3_1-405b:
group:
model
add_deps_recursive:
mlperf-inference-implementation:
tags: _llama3_1-405b
env:
CM_MODEL:
llama3_1-405b
posthook_deps:
- enable_if_env:
CM_MLPERF_LOADGEN_MODE:
- accuracy
- all
CM_MLPERF_ACCURACY_RESULTS_DIR:
- 'on'
skip_if_env:
CM_MLPERF_IMPLEMENTATION:
- nvidia
names:
- mlperf-accuracy-script
- llama3_1-405b-accuracy-script
tags: run,accuracy,mlperf,_dataset_llama3
docker:
deps:
- tags: get,ml-model,llama3
enable_if_env:
CM_USE_DATASET_FROM_HOST:
- 'yes'
names:
- llama3_1-405b
- llama3-405b


sdxl:
group:
model
Expand Down Expand Up @@ -1682,6 +1722,8 @@ variations:
tags: _mlcommons
intel-harness:
tags: _v4.1
inference-src:
version: r5.0
default_env:
CM_SKIP_SYS_UTILS: 'yes'
CM_REGENERATE_MEASURE_FILES: 'yes'
Expand Down
56 changes: 56 additions & 0 deletions script/get-dataset-mlperf-inference-llama3/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
alias: get-dataset-mlperf-inference-llama3
automation_alias: script
automation_uid: 5b4e0237da074764
cache: true
tags:
- get
- dataset
- mlperf
- llama3
- inference
uid: c3bc69599cbc4db7
new_env_keys:
- CM_DATASET_LLAMA3_PATH
input_mapping:
outdirname: CM_OUTDIRNAME
prehook_deps:
- env:
CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_LLAMA3_PATH
CM_EXTRACT_TO_FOLDER: llama-3-dataset
extra_cache_tags: dataset,llama3
force_cache: true
enable_if_env:
CM_TMP_REQUIRE_DOWNLOAD:
- 'yes'
names:
- dae
tags: download-and-extract
update_tags_from_env_with_prefix:
_url.:
- CM_DOWNLOAD_URL
variations:
validation:
default: true
group: dataset-type
env:
CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl
CM_DATASET_TYPE: validation
CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl
calibration:
group: dataset-type
env:
CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl
CM_DATASET_TYPE: calibration
CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl
rclone:
add_deps_recursive:
dae:
tags: _rclone
default: true
env:
CM_DOWNLOAD_FILENAME: checkpoint
CM_DOWNLOAD_URL: <<<CM_RCLONE_URL>>>
CM_RCLONE_CONFIG_NAME: mlc-inference
group: download-tool
print_env_at_the_end:
CM_DATASET_LLAMA3_PATH: Path to the dataset
31 changes: 31 additions & 0 deletions script/get-dataset-mlperf-inference-llama3/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from cmind import utils
import os


def preprocess(i):

os_info = i['os_info']

env = i['env']

if os_info['platform'] == "windows":
return {'return': 1, 'error': 'Script not supported in windows yet!'}

if env.get('CM_DATASET_LLAMA3_PATH', '') == '':
env['CM_TMP_REQUIRE_DOWNLOAD'] = "yes"

if env.get('CM_OUTDIRNAME', '') != '':
env['CM_DOWNLOAD_PATH'] = env['CM_OUTDIRNAME']

return {'return': 0}


def postprocess(i):

env = i['env']

if env.get('CM_TMP_REQUIRE_DOWNLOAD', '') == "yes":
env['CM_DATASET_LLAMA3_PATH'] = os.path.join(
env['CM_DATASET_LLAMA3_PATH'], env['CM_DATASET_FILE_NAME'])

return {'return': 0}
68 changes: 68 additions & 0 deletions script/get-ml-model-llama3/_cm.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
alias: get-ml-model-llama3
automation_alias: script
automation_uid: 5b4e0237da074764
cache: true
category: AI/ML models
input_mapping:
outdirname: CM_OUTDIRNAME
new_env_keys:
- CM_ML_MODEL_*
- LLAMA3_CHECKPOINT_PATH
prehook_deps:
- enable_if_env:
CM_TMP_REQUIRE_DOWNLOAD:
- 'yes'
env: {}
extra_cache_tags: llama3,llama-3
force_env_keys:
- CM_GIT_CHECKOUT_FOLDER
names:
- hf-zoo
tags: get,ml-model,huggingface,zoo,_clone-repo
print_env_at_the_end:
LLAMA3_CHECKPOINT_PATH: LLAMA3 checkpoint path
tags:
- get
- raw
- ml-model
- language-processing
- llama3
- llama3-405b
uid: 2f8cef2acc334e80
variations:
fp16:
default: true
env:
CM_ML_MODEL_INPUT_DATA_TYPES: fp16
CM_ML_MODEL_PRECISION: fp16
CM_ML_MODEL_WEIGHT_DATA_TYPES: fp16
group: precision
meta-llama/Llama-3.1-405B-Instruct:
adr:
hf-zoo:
tags: _model-stub.meta-llama/Llama-3.1-405B-Instruct
default: true
env:
CM_ML_MODEL_NAME: Llama-3-405b-instruct
CM_MODEL_ZOO_ENV_KEY: LLAMA3
group: huggingface-stub
meta-llama/Llama-3.1-8B-Instruct:
adr:
hf-zoo:
tags: _model-stub.meta-llama/Llama-3.1-8B-Instruct
env:
CM_ML_MODEL_NAME: Llama-3-8b-instruct
CM_MODEL_ZOO_ENV_KEY: LLAMA3
group: huggingface-stub
vllm:
default: true
env:
CM_ML_MODEL_FRAMEWORK: vllm
group: framework
stub.#:
adr:
hf-zoo:
tags: _model-stub.#
env:
CM_MODEL_ZOO_ENV_KEY: LLAMA3
group: huggingface-stub
35 changes: 35 additions & 0 deletions script/get-ml-model-llama3/customize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from cmind import utils
import os


def preprocess(i):

os_info = i['os_info']
env = i['env']

# skip download and register in cache if the llama3 checkpoint path is
# already defined by the user
if env.get('CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH', '') != '':
env['LLAMA3_CHECKPOINT_PATH'] = env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH']
return {'return': 0}

path = env.get('CM_OUTDIRNAME', '').strip()

if path != "":
os.makedirs(path, exist_ok=True)
env['CM_GIT_CHECKOUT_FOLDER'] = os.path.join(
path, env['CM_ML_MODEL_NAME'])

env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes'

return {'return': 0}


def postprocess(i):

env = i['env']

env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_PATH']

return {'return': 0}
3 changes: 3 additions & 0 deletions script/get-mlperf-inference-src/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ versions:
tags: _tag.v4.1
env:
CM_MLPERF_LAST_RELEASE: v4.1
r5.0:
env:
CM_MLPERF_LAST_RELEASE: v5.0
tvm:
env:
CM_MLPERF_LAST_RELEASE: v3.1
Expand Down
4 changes: 4 additions & 0 deletions script/process-mlperf-accuracy/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -265,3 +265,7 @@ variations:
env:
CM_DATASET: igbh
group: dataset
dataset_llama3:
env:
CM_DATASET: dataset_llama3
group: dataset
4 changes: 4 additions & 0 deletions script/process-mlperf-accuracy/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,10 @@ def preprocess(i):
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join(
result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'"

elif dataset == "dataset_llama3":
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama3.1-405b", "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(
result_dir, "mlperf_log_accuracy.json") + "' --dtype '" + env['CM_ACCURACY_DTYPE'] + "' --dataset-file '" + env['CM_DATASET_LLAMA3_PATH'] + "' > '" + out_file + "'"

else:
return {'return': 1, 'error': 'Unsupported dataset'}

Expand Down
3 changes: 3 additions & 0 deletions script/run-mlperf-inference-app/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ default_env:
CM_MLPERF_SKIP_SUBMISSION_GENERATION: no
CM_DOCKER_PRIVILEGED_MODE: yes
CM_MLPERF_SUBMISSION_DIVISION: open
CM_MLPERF_INFERENCE_TP_SIZE: 1

input_mapping:
api_server: CM_MLPERF_INFERENCE_API_SERVER
Expand Down Expand Up @@ -109,6 +110,7 @@ input_mapping:
sut: CM_MLPERF_INFERENCE_SUT_VARIATION
nvidia_llama2_dataset_file_path: CM_NVIDIA_LLAMA_DATASET_FILE_PATH
tp_size: CM_NVIDIA_TP_SIZE
vllm_tp_size: CM_MLPERF_INFERENCE_TP_SIZE
vllm_model_name: CM_VLLM_SERVER_MODEL_NAME
num_workers: CM_MLPERF_INFERENCE_NUM_WORKERS
max_test_duration: CM_MLPERF_MAX_DURATION_TEST
Expand Down Expand Up @@ -463,6 +465,7 @@ input_description:
- mobilenet
- efficientnet
- rgat
- llama3_1-405b
default: resnet50
desc: MLPerf model
sort: 200
Expand Down
Loading