Skip to content

Commit c32d3a5

Browse files
authored
Code changes for supporting llama3_1-405b reference implementation (#111)
* llama3_1 reference implementation code changes
1 parent 4ac4fc4 commit c32d3a5

File tree

11 files changed

+684
-370
lines changed

11 files changed

+684
-370
lines changed

script/app-mlperf-inference-mlcommons-python/_cm.yaml

Lines changed: 410 additions & 368 deletions
Large diffs are not rendered by default.

script/app-mlperf-inference-mlcommons-python/customize.py

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def preprocess(i):
6868
str(env['CM_MLPERF_LOADGEN_BATCH_SIZE'])
6969

7070
if env.get('CM_MLPERF_LOADGEN_QUERY_COUNT', '') != '' and not env.get('CM_TMP_IGNORE_MLPERF_QUERY_COUNT', False) and (
71-
env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid":
71+
env['CM_MLPERF_LOADGEN_MODE'] == 'accuracy' or 'gptj' in env['CM_MODEL'] or 'llama2' in env['CM_MODEL'] or 'mixtral' in env['CM_MODEL'] or 'llama3' in env['CM_MODEL']) and env.get('CM_MLPERF_RUN_STYLE', '') != "valid":
7272
env['CM_MLPERF_LOADGEN_EXTRA_OPTIONS'] += " --count " + \
7373
env['CM_MLPERF_LOADGEN_QUERY_COUNT']
7474

@@ -127,7 +127,7 @@ def preprocess(i):
127127
if 'CM_MLPERF_USER_CONF' in env:
128128
user_conf_path = env['CM_MLPERF_USER_CONF']
129129
x = "" if os_info['platform'] == 'windows' else "'"
130-
if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"]:
130+
if 'llama2-70b' in env['CM_MODEL'] or "mixtral-8x7b" in env["CM_MODEL"] or "llama3" in env["CM_MODEL"]:
131131
scenario_extra_options += " --user-conf " + x + user_conf_path + x
132132
else:
133133
scenario_extra_options += " --user_conf " + x + user_conf_path + x
@@ -499,6 +499,32 @@ def get_run_cmd_reference(
499499

500500
if env.get('CM_ACTIVATE_RGAT_IN_MEMORY', '') == "yes":
501501
cmd += " --in-memory "
502+
503+
elif "llama3" in env['CM_MODEL']:
504+
env['RUN_DIR'] = os.path.join(
505+
env['CM_MLPERF_INFERENCE_SOURCE'],
506+
"language",
507+
"llama3.1-405b")
508+
509+
if int(env.get('CM_MLPERF_INFERENCE_TP_SIZE', '')) > 1:
510+
env['VLLM_WORKER_MULTIPROC_METHOD'] = "spawn"
511+
512+
cmd = env['CM_PYTHON_BIN_WITH_PATH'] + " main.py " \
513+
" --scenario " + env['CM_MLPERF_LOADGEN_SCENARIO'] + \
514+
" --dataset-path " + env['CM_DATASET_LLAMA3_PATH'] + \
515+
" --output-log-dir " + env['CM_MLPERF_OUTPUT_DIR'] + \
516+
' --dtype ' + env['CM_MLPERF_MODEL_PRECISION'] + \
517+
" --model-path " + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + \
518+
" --tensor-parallel-size " + env['CM_MLPERF_INFERENCE_TP_SIZE'] + \
519+
" --vllm "
520+
521+
if env.get('CM_MLPERF_INFERENCE_NUM_WORKERS', '') != '':
522+
cmd += f" --num-workers {env['CM_MLPERF_INFERENCE_NUM_WORKERS']}"
523+
524+
525+
cmd = cmd.replace("--count", "--total-sample-count")
526+
cmd = cmd.replace("--max-batchsize", "--batch-size")
527+
502528

503529
if env.get('CM_NETWORK_LOADGEN', '') in ["lon", "sut"]:
504530
cmd = cmd + " " + "--network " + env['CM_NETWORK_LOADGEN']

script/app-mlperf-inference/_cm.yaml

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ variations:
221221
tags: _int32
222222
cnndm-accuracy-script:
223223
tags: _int32
224+
llama3_1-405b-accuracy-script:
225+
tags: _int32
224226
env:
225227
CM_MLPERF_PYTHON: 'yes'
226228
CM_MLPERF_IMPLEMENTATION: mlcommons_python
@@ -272,6 +274,10 @@ variations:
272274
default_variations:
273275
backend: pytorch
274276

277+
reference,llama3_1-405b:
278+
default_variations:
279+
backend: pytorch
280+
275281
reference,mixtral-8x7b:
276282
default_variations:
277283
backend: pytorch
@@ -795,6 +801,40 @@ variations:
795801
- igbh-original
796802
- igbh-dataset
797803

804+
llama3_1-405b:
805+
group:
806+
model
807+
add_deps_recursive:
808+
mlperf-inference-implementation:
809+
tags: _llama3_1-405b
810+
env:
811+
CM_MODEL:
812+
llama3_1-405b
813+
posthook_deps:
814+
- enable_if_env:
815+
CM_MLPERF_LOADGEN_MODE:
816+
- accuracy
817+
- all
818+
CM_MLPERF_ACCURACY_RESULTS_DIR:
819+
- 'on'
820+
skip_if_env:
821+
CM_MLPERF_IMPLEMENTATION:
822+
- nvidia
823+
names:
824+
- mlperf-accuracy-script
825+
- llama3_1-405b-accuracy-script
826+
tags: run,accuracy,mlperf,_dataset_llama3
827+
docker:
828+
deps:
829+
- tags: get,ml-model,llama3
830+
enable_if_env:
831+
CM_USE_DATASET_FROM_HOST:
832+
- 'yes'
833+
names:
834+
- llama3_1-405b
835+
- llama3-405b
836+
837+
798838
sdxl:
799839
group:
800840
model
@@ -1682,6 +1722,8 @@ variations:
16821722
tags: _mlcommons
16831723
intel-harness:
16841724
tags: _v4.1
1725+
inference-src:
1726+
version: r5.0
16851727
default_env:
16861728
CM_SKIP_SYS_UTILS: 'yes'
16871729
CM_REGENERATE_MEASURE_FILES: 'yes'
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
alias: get-dataset-mlperf-inference-llama3
2+
automation_alias: script
3+
automation_uid: 5b4e0237da074764
4+
cache: true
5+
tags:
6+
- get
7+
- dataset
8+
- mlperf
9+
- llama3
10+
- inference
11+
uid: c3bc69599cbc4db7
12+
new_env_keys:
13+
- CM_DATASET_LLAMA3_PATH
14+
input_mapping:
15+
outdirname: CM_OUTDIRNAME
16+
prehook_deps:
17+
- env:
18+
CM_DOWNLOAD_FINAL_ENV_NAME: CM_DATASET_LLAMA3_PATH
19+
CM_EXTRACT_TO_FOLDER: llama-3-dataset
20+
extra_cache_tags: dataset,llama3
21+
force_cache: true
22+
enable_if_env:
23+
CM_TMP_REQUIRE_DOWNLOAD:
24+
- 'yes'
25+
names:
26+
- dae
27+
tags: download-and-extract
28+
update_tags_from_env_with_prefix:
29+
_url.:
30+
- CM_DOWNLOAD_URL
31+
variations:
32+
validation:
33+
default: true
34+
group: dataset-type
35+
env:
36+
CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl
37+
CM_DATASET_TYPE: validation
38+
CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl
39+
calibration:
40+
group: dataset-type
41+
env:
42+
CM_RCLONE_URL: mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl
43+
CM_DATASET_TYPE: calibration
44+
CM_DATASET_FILE_NAME: mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl
45+
rclone:
46+
add_deps_recursive:
47+
dae:
48+
tags: _rclone
49+
default: true
50+
env:
51+
CM_DOWNLOAD_FILENAME: checkpoint
52+
CM_DOWNLOAD_URL: <<<CM_RCLONE_URL>>>
53+
CM_RCLONE_CONFIG_NAME: mlc-inference
54+
group: download-tool
55+
print_env_at_the_end:
56+
CM_DATASET_LLAMA3_PATH: Path to the dataset
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
from cmind import utils
2+
import os
3+
4+
5+
def preprocess(i):
6+
7+
os_info = i['os_info']
8+
9+
env = i['env']
10+
11+
if os_info['platform'] == "windows":
12+
return {'return': 1, 'error': 'Script not supported in windows yet!'}
13+
14+
if env.get('CM_DATASET_LLAMA3_PATH', '') == '':
15+
env['CM_TMP_REQUIRE_DOWNLOAD'] = "yes"
16+
17+
if env.get('CM_OUTDIRNAME', '') != '':
18+
env['CM_DOWNLOAD_PATH'] = env['CM_OUTDIRNAME']
19+
20+
return {'return': 0}
21+
22+
23+
def postprocess(i):
24+
25+
env = i['env']
26+
27+
if env.get('CM_TMP_REQUIRE_DOWNLOAD', '') == "yes":
28+
env['CM_DATASET_LLAMA3_PATH'] = os.path.join(
29+
env['CM_DATASET_LLAMA3_PATH'], env['CM_DATASET_FILE_NAME'])
30+
31+
return {'return': 0}

script/get-ml-model-llama3/_cm.yaml

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
alias: get-ml-model-llama3
2+
automation_alias: script
3+
automation_uid: 5b4e0237da074764
4+
cache: true
5+
category: AI/ML models
6+
input_mapping:
7+
outdirname: CM_OUTDIRNAME
8+
new_env_keys:
9+
- CM_ML_MODEL_*
10+
- LLAMA3_CHECKPOINT_PATH
11+
prehook_deps:
12+
- enable_if_env:
13+
CM_TMP_REQUIRE_DOWNLOAD:
14+
- 'yes'
15+
env: {}
16+
extra_cache_tags: llama3,llama-3
17+
force_env_keys:
18+
- CM_GIT_CHECKOUT_FOLDER
19+
names:
20+
- hf-zoo
21+
tags: get,ml-model,huggingface,zoo,_clone-repo
22+
print_env_at_the_end:
23+
LLAMA3_CHECKPOINT_PATH: LLAMA3 checkpoint path
24+
tags:
25+
- get
26+
- raw
27+
- ml-model
28+
- language-processing
29+
- llama3
30+
- llama3-405b
31+
uid: 2f8cef2acc334e80
32+
variations:
33+
fp16:
34+
default: true
35+
env:
36+
CM_ML_MODEL_INPUT_DATA_TYPES: fp16
37+
CM_ML_MODEL_PRECISION: fp16
38+
CM_ML_MODEL_WEIGHT_DATA_TYPES: fp16
39+
group: precision
40+
meta-llama/Llama-3.1-405B-Instruct:
41+
adr:
42+
hf-zoo:
43+
tags: _model-stub.meta-llama/Llama-3.1-405B-Instruct
44+
default: true
45+
env:
46+
CM_ML_MODEL_NAME: Llama-3-405b-instruct
47+
CM_MODEL_ZOO_ENV_KEY: LLAMA3
48+
group: huggingface-stub
49+
meta-llama/Llama-3.1-8B-Instruct:
50+
adr:
51+
hf-zoo:
52+
tags: _model-stub.meta-llama/Llama-3.1-8B-Instruct
53+
env:
54+
CM_ML_MODEL_NAME: Llama-3-8b-instruct
55+
CM_MODEL_ZOO_ENV_KEY: LLAMA3
56+
group: huggingface-stub
57+
vllm:
58+
default: true
59+
env:
60+
CM_ML_MODEL_FRAMEWORK: vllm
61+
group: framework
62+
stub.#:
63+
adr:
64+
hf-zoo:
65+
tags: _model-stub.#
66+
env:
67+
CM_MODEL_ZOO_ENV_KEY: LLAMA3
68+
group: huggingface-stub
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from cmind import utils
2+
import os
3+
4+
5+
def preprocess(i):
6+
7+
os_info = i['os_info']
8+
env = i['env']
9+
10+
# skip download and register in cache if the llama3 checkpoint path is
11+
# already defined by the user
12+
if env.get('CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH', '') != '':
13+
env['LLAMA3_CHECKPOINT_PATH'] = env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH']
14+
return {'return': 0}
15+
16+
path = env.get('CM_OUTDIRNAME', '').strip()
17+
18+
if path != "":
19+
os.makedirs(path, exist_ok=True)
20+
env['CM_GIT_CHECKOUT_FOLDER'] = os.path.join(
21+
path, env['CM_ML_MODEL_NAME'])
22+
23+
env['CM_TMP_REQUIRE_DOWNLOAD'] = 'yes'
24+
25+
return {'return': 0}
26+
27+
28+
def postprocess(i):
29+
30+
env = i['env']
31+
32+
env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] = env['LLAMA3_CHECKPOINT_PATH']
33+
env['CM_GET_DEPENDENT_CACHED_PATH'] = env['CM_ML_MODEL_PATH']
34+
35+
return {'return': 0}

script/get-mlperf-inference-src/_cm.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ versions:
179179
tags: _tag.v4.1
180180
env:
181181
CM_MLPERF_LAST_RELEASE: v4.1
182+
r5.0:
183+
env:
184+
CM_MLPERF_LAST_RELEASE: v5.0
182185
tvm:
183186
env:
184187
CM_MLPERF_LAST_RELEASE: v3.1

script/process-mlperf-accuracy/_cm.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,3 +265,7 @@ variations:
265265
env:
266266
CM_DATASET: igbh
267267
group: dataset
268+
dataset_llama3:
269+
env:
270+
CM_DATASET: dataset_llama3
271+
group: dataset

script/process-mlperf-accuracy/customize.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ def preprocess(i):
199199
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "graph", "R-GAT", "tools", "accuracy_igbh.py") + "' --mlperf-accuracy-file '" + os.path.join(
200200
result_dir, "mlperf_log_accuracy.json") + "' --dataset-path '" + env['CM_DATASET_IGBH_PATH'] + "' --dataset-size '" + env['CM_DATASET_IGBH_SIZE'] + "' --output-file '" + out_file + "'"
201201

202+
elif dataset == "dataset_llama3":
203+
CMD = env['CM_PYTHON_BIN_WITH_PATH'] + " '" + os.path.join(env['CM_MLPERF_INFERENCE_SOURCE'], "language", "llama3.1-405b", "evaluate-accuracy.py") + "' --checkpoint-path '" + env['CM_ML_MODEL_LLAMA3_CHECKPOINT_PATH'] + "' --mlperf-accuracy-file '" + os.path.join(
204+
result_dir, "mlperf_log_accuracy.json") + "' --dtype '" + env['CM_ACCURACY_DTYPE'] + "' --dataset-file '" + env['CM_DATASET_LLAMA3_PATH'] + "' > '" + out_file + "'"
205+
202206
else:
203207
return {'return': 1, 'error': 'Unsupported dataset'}
204208

0 commit comments

Comments
 (0)