diff --git a/automation/script/doc.py b/automation/script/doc.py index fd64a5900..d3c08ed7b 100644 --- a/automation/script/doc.py +++ b/automation/script/doc.py @@ -80,12 +80,14 @@ def generate_docs(metadata, script_path, generic_inputs): tags_string = script_tags_help script_input_mapping = metadata.get('input_mapping', {}) + script_default_env = metadata.get('default_env', {}) script_input_description = metadata.get('input_description', {}) r = get_run_readme( tags_string, script_input_mapping, script_input_description, + script_default_env, generic_inputs) if r['return'] > 0: return r @@ -102,7 +104,8 @@ def generate_docs(metadata, script_path, generic_inputs): return {'return': 0} -def get_run_readme(tags, input_mapping, input_description, generic_inputs): +def get_run_readme(tags, input_mapping, input_description, + default_env, generic_inputs): run_readme = f"""## Run Commands ```bash @@ -112,6 +115,10 @@ def get_run_readme(tags, input_mapping, input_description, generic_inputs): """ if input_description: + for i in input_description: + if i in input_mapping and input_mapping[i] in default_env: + input_description[i]['default'] = default_env[input_mapping[i]] + input_description_string = generate_markdown( "Script Inputs", input_description) else: @@ -141,13 +148,14 @@ def infer_type(field): def generate_markdown(heading, input_dict): lines = [ f"### {heading}\n", - "| Name | Description | Default | Type |", + "| Name | Description | Choices | Default |", "|------|-------------|---------|------|"] for key in sorted( input_dict, key=lambda k: input_dict[k].get("sort", 9999)): field = input_dict[key] desc = field.get("desc", "") + choices = field.get("choices", "") default = field.get("default", "") dtype = infer_type(field) - lines.append(f"| `--{key}` | {desc} | `{default}` | {dtype} |") + lines.append(f"| `--{key}` | {desc} | {choices} | `{default}` |") return "\n".join(lines) diff --git a/script/run-mlperf-inference-app/docs/README.md b/script/run-mlperf-inference-app/docs/README.md index a32cd5e2d..6e2100673 100644 --- a/script/run-mlperf-inference-app/docs/README.md +++ b/script/run-mlperf-inference-app/docs/README.md @@ -9,64 +9,61 @@ mlcr run-mlperf,inference ### Script Inputs -| Name | Description | Default | Type | +| Name | Description | Choices | Default | |------|-------------|---------|------| -| `--division` | MLPerf division | `open` | str | -| `--category` | MLPerf category | `edge` | str | -| `--device` | MLPerf device | `cpu` | str | -| `--model` | MLPerf model | `resnet50` | str | -| `--precision` | MLPerf model precision | `` | str | -| `--implementation` | MLPerf implementation | `mlcommons-python` | str | -| `--backend` | MLPerf framework (backend) | `onnxruntime` | str | -| `--scenario` | MLPerf scenario | `Offline` | str | -| `--mode` | MLPerf benchmark mode | `` | str | -| `--execution_mode` | MLPerf execution mode | `test` | str | -| `--sut` | SUT configuration (if known) | `` | str | -| `--submitter` | Submitter name (without space) | `CTuning` | str | -| `--results_dir` | Folder path to store results (defaults to the current working directory) | `` | str | -| `--submission_dir` | Folder path to store MLPerf submission tree | `` | str | -| `--power` | Measure power | `no` | str | -| `--adr.mlperf-power-client.power_server` | MLPerf Power server IP address | `192.168.0.15` | str | -| `--adr.mlperf-power-client.port` | MLPerf Power server port | `4950` | int | -| `--adr.compiler.tags` | Compiler for loadgen and any C/C++ part of implementation | `` | str | -| `--adr.inference-src-loadgen.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to build LoadGen (to enable non-reference implementations) | `` | str | -| `--adr.inference-src.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to run benchmarks (to enable non-reference implementations) | `` | str | -| `--adr.mlperf-inference-implementation.max_batchsize` | Maximum batchsize to be used | `` | str | -| `--adr.mlperf-inference-implementation.num_threads` | Number of threads (reference & C++ implementation only) | `` | str | -| `--adr.python.name` | Python virtual environment name (optional) | `` | str | -| `--adr.python.version` | Force Python version (must have all system deps) | `` | str | -| `--adr.python.version_min` | Minimal Python version | `3.8` | str | -| `--clean` | Clean run | `False` | bool | -| `--compliance` | Whether to run compliance tests (applicable only for closed division) | `no` | str | -| `--dashboard_wb_project` | W&B dashboard project | `` | str | -| `--dashboard_wb_user` | W&B dashboard user | `` | str | -| `--hw_name` | MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...) | `` | str | -| `--multistream_target_latency` | Set MultiStream target latency | `` | str | -| `--offline_target_qps` | Set LoadGen Offline target QPS | `` | str | -| `--quiet` | Quiet run (select default values for all questions) | `True` | bool | -| `--server_target_qps` | Set Server target QPS | `` | str | -| `--singlestream_target_latency` | Set SingleStream target latency | `` | str | -| `--target_latency` | Set Target latency | `` | str | -| `--target_qps` | Set LoadGen target QPS | `` | str | -| `--j` | Print results dictionary to console at the end of the run | `False` | bool | -| `--repro` | Record input/output/state/info files to make it easier to reproduce results | `False` | bool | -| `--time` | Print script execution time at the end of the run | `True` | bool | -| `--debug` | Debug this script | `False` | bool | +| `--division` | MLPerf division | ['open', 'closed'] | `open` | +| `--category` | MLPerf category | ['edge', 'datacenter', 'network'] | `edge` | +| `--device` | MLPerf device | ['cpu', 'cuda', 'rocm', 'qaic'] | `cpu` | +| `--model` | MLPerf model | ['resnet50', 'retinanet', 'bert-99', 'bert-99.9', '3d-unet-99', '3d-unet-99.9', 'rnnt', 'dlrm-v2-99', 'dlrm-v2-99.9', 'gptj-99', 'gptj-99.9', 'sdxl', 'llama2-70b-99', 'llama2-70b-99.9', 'mixtral-8x7b', 'mobilenet', 'efficientnet', 'rgat', 'llama3_1-405b', 'pointpainting'] | `resnet50` | +| `--precision` | MLPerf model precision | ['float32', 'float16', 'bfloat16', 'int8', 'uint8'] | `` | +| `--implementation` | MLPerf implementation | ['mlcommons-python', 'mlcommons-cpp', 'nvidia', 'intel', 'qualcomm', 'ctuning-cpp-tflite'] | `reference` | +| `--backend` | MLPerf framework (backend) | ['onnxruntime', 'tf', 'pytorch', 'deepsparse', 'tensorrt', 'glow', 'tvm-onnx'] | `onnxruntime` | +| `--scenario` | MLPerf scenario | ['Offline', 'Server', 'SingleStream', 'MultiStream'] | `Offline` | +| `--mode` | MLPerf benchmark mode | ['', 'accuracy', 'performance'] | `` | +| `--execution_mode` | MLPerf execution mode | ['test', 'fast', 'valid'] | `test` | +| `--sut` | SUT configuration (if known) | | `` | +| `--submitter` | Submitter name (without space) | | `MLCommons` | +| `--results_dir` | Folder path to store results (defaults to the current working directory) | | `` | +| `--submission_dir` | Folder path to store MLPerf submission tree | | `` | +| `--power` | Measure power | ['yes', 'no'] | `no` | +| `--adr.mlperf-power-client.power_server` | MLPerf Power server IP address | | `192.168.0.15` | +| `--adr.mlperf-power-client.port` | MLPerf Power server port | | `4950` | +| `--adr.compiler.tags` | Compiler for loadgen and any C/C++ part of implementation | | `` | +| `--adr.inference-src-loadgen.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to build LoadGen (to enable non-reference implementations) | | `` | +| `--adr.inference-src.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to run benchmarks (to enable non-reference implementations) | | `` | +| `--adr.mlperf-inference-implementation.max_batchsize` | Maximum batchsize to be used | | `` | +| `--adr.mlperf-inference-implementation.num_threads` | Number of threads (reference & C++ implementation only) | | `` | +| `--adr.python.name` | Python virtual environment name (optional) | | `` | +| `--adr.python.version` | Force Python version (must have all system deps) | | `` | +| `--adr.python.version_min` | Minimal Python version | | `3.8` | +| `--clean` | Clean run | | `False` | +| `--compliance` | Whether to run compliance tests (applicable only for closed division) | ['yes', 'no'] | `no` | +| `--hw_name` | MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...) | | `` | +| `--multistream_target_latency` | Set MultiStream target latency | | `` | +| `--offline_target_qps` | Set LoadGen Offline target QPS | | `` | +| `--quiet` | Quiet run (select default values for all questions) | | `True` | +| `--server_target_qps` | Set Server target QPS | | `` | +| `--singlestream_target_latency` | Set SingleStream target latency | | `` | +| `--target_latency` | Set Target latency | | `` | +| `--target_qps` | Set LoadGen target QPS | | `` | +| `--repro` | Record input/output/state/info files to make it easier to reproduce results | | `False` | +| `--time` | Print script execution time at the end of the run | | `True` | +| `--debug` | Debug this script | | `False` | ### Generic Script Inputs -| Name | Description | Default | Type | +| Name | Description | Choices | Default | |------|-------------|---------|------| -| `--input` | Input to the script passed using the env key `MLC_INPUT` | `` | str | -| `--output` | Output from the script passed using the env key `MLC_OUTPUT` | `` | str | -| `--outdirname` | The directory to store the script output | `cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory` | str | -| `--outbasename` | The output file/folder name | `` | str | -| `--name` | | `` | str | -| `--extra_cache_tags` | Extra cache tags to be added to the cached entry when the script results are saved | `` | str | -| `--skip_compile` | Skip compilation | `False` | bool | -| `--skip_run` | Skip run | `False` | bool | -| `--accept_license` | Accept the required license requirement to run the script | `False` | bool | -| `--skip_system_deps` | Skip installing any system dependencies | `False` | bool | -| `--git_ssh` | Use SSH for git repos | `False` | bool | -| `--gh_token` | Github Token | `` | str | -| `--hf_token` | Huggingface Token | `` | str | -| `--verify_ssl` | Verify SSL | `False` | bool | +| `--input` | Input to the script passed using the env key `MLC_INPUT` | | `` | +| `--output` | Output from the script passed using the env key `MLC_OUTPUT` | | `` | +| `--outdirname` | The directory to store the script output | | `cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory` | +| `--outbasename` | The output file/folder name | | `` | +| `--name` | | | `` | +| `--extra_cache_tags` | Extra cache tags to be added to the cached entry when the script results are saved | | `` | +| `--skip_compile` | Skip compilation | | `False` | +| `--skip_run` | Skip run | | `False` | +| `--accept_license` | Accept the required license requirement to run the script | | `False` | +| `--skip_system_deps` | Skip installing any system dependencies | | `False` | +| `--git_ssh` | Use SSH for git repos | | `False` | +| `--gh_token` | Github Token | | `` | +| `--hf_token` | Huggingface Token | | `` | +| `--verify_ssl` | Verify SSL | | `False` | diff --git a/script/run-mlperf-inference-app/meta.yaml b/script/run-mlperf-inference-app/meta.yaml index ad5ba5d82..be45c1d0a 100644 --- a/script/run-mlperf-inference-app/meta.yaml +++ b/script/run-mlperf-inference-app/meta.yaml @@ -31,6 +31,9 @@ predeps: False default_env: MLC_MLPERF_IMPLEMENTATION: reference MLC_MLPERF_MODEL: resnet50 + MLC_MLPERF_BACKEND: onnxruntime + MLC_MLPERF_DEVICE: cpu + MLC_MLPERF_LOADGEN_SCENARIO: Offline MLC_MLPERF_RUN_STYLE: test MLC_MLPERF_SKIP_SUBMISSION_GENERATION: no MLC_DOCKER_PRIVILEGED_MODE: yes @@ -444,7 +447,6 @@ input_description: choices: - 'open' - 'closed' - default: 'open' desc: MLPerf division sort: 50 category: @@ -461,7 +463,6 @@ input_description: - cuda - rocm - qaic - default: cpu desc: MLPerf device sort: 100 model: @@ -486,7 +487,6 @@ input_description: - rgat - llama3_1-405b - pointpainting - default: resnet50 desc: MLPerf model sort: 200 precision: @@ -496,7 +496,6 @@ input_description: - bfloat16 - int8 - uint8 - default: '' desc: MLPerf model precision sort: 250 implementation: @@ -507,7 +506,6 @@ input_description: - intel - qualcomm - ctuning-cpp-tflite - default: mlcommons-python desc: MLPerf implementation sort: 300 backend: @@ -519,7 +517,6 @@ input_description: - tensorrt - glow - tvm-onnx - default: onnxruntime desc: MLPerf framework (backend) sort: 400 scenario: @@ -528,7 +525,6 @@ input_description: - Server - SingleStream - MultiStream - default: Offline desc: MLPerf scenario sort: 500 mode: @@ -536,7 +532,6 @@ input_description: - '' - accuracy - performance - default: '' desc: MLPerf benchmark mode sort: 600 execution_mode: @@ -544,24 +539,20 @@ input_description: - test - fast - valid - default: test desc: MLPerf execution mode sort: 700 sut: - default: '' desc: SUT configuration (if known) sort: 750 submitter: - default: CTuning + default: MLCommons desc: Submitter name (without space) sort: 800 results_dir: desc: Folder path to store results (defaults to the current working directory) - default: '' sort: 900 submission_dir: desc: Folder path to store MLPerf submission tree - default: '' sort: 1000 adr.compiler.tags: @@ -610,10 +601,6 @@ input_description: - 'no' default: 'no' desc: Whether to run compliance tests (applicable only for closed division) - dashboard_wb_project: - desc: W&B dashboard project - dashboard_wb_user: - desc: W&B dashboard user hw_name: desc: MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...) multistream_target_latency: @@ -632,10 +619,6 @@ input_description: desc: Set Target latency target_qps: desc: Set LoadGen target QPS - j: - desc: Print results dictionary to console at the end of the run - boolean: true - default: false repro: desc: Record input/output/state/info files to make it easier to reproduce results boolean: true