mlcommons · arjunsuresh · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025 · Jun 25, 2025
@@ -80,12 +80,14 @@ def generate_docs(metadata, script_path, generic_inputs):
         tags_string = script_tags_help
 
     script_input_mapping = metadata.get('input_mapping', {})
+    script_default_env = metadata.get('default_env', {})
     script_input_description = metadata.get('input_description', {})
 
     r = get_run_readme(
         tags_string,
         script_input_mapping,
         script_input_description,
+        script_default_env,
         generic_inputs)
     if r['return'] > 0:
         return r
@@ -102,7 +104,8 @@ def generate_docs(metadata, script_path, generic_inputs):
     return {'return': 0}
 
 
-def get_run_readme(tags, input_mapping, input_description, generic_inputs):
+def get_run_readme(tags, input_mapping, input_description,
+                   default_env, generic_inputs):
     run_readme = f"""## Run Commands
 
 ```bash
@@ -112,6 +115,10 @@ def get_run_readme(tags, input_mapping, input_description, generic_inputs):
 """
 
     if input_description:
+        for i in input_description:
+            if i in input_mapping and input_mapping[i] in default_env:
+                input_description[i]['default'] = default_env[input_mapping[i]]
+
         input_description_string = generate_markdown(
             "Script Inputs", input_description)
     else:
@@ -141,13 +148,14 @@ def infer_type(field):
 def generate_markdown(heading, input_dict):
     lines = [
         f"### {heading}\n",
-        "| Name | Description | Default | Type |",
+        "| Name | Description | Choices | Default |",
         "|------|-------------|---------|------|"]
     for key in sorted(
             input_dict, key=lambda k: input_dict[k].get("sort", 9999)):
         field = input_dict[key]
         desc = field.get("desc", "")
+        choices = field.get("choices", "")
         default = field.get("default", "")
         dtype = infer_type(field)
-        lines.append(f"| `--{key}` | {desc} | `{default}` | {dtype} |")
+        lines.append(f"| `--{key}` | {desc} | {choices} | `{default}` |")
     return "\n".join(lines)
@@ -9,64 +9,61 @@ mlcr run-mlperf,inference
 
 ### Script Inputs
 
-| Name | Description | Default | Type |
+| Name | Description | Choices | Default |
 |------|-------------|---------|------|
-| `--division` | MLPerf division | `open` | str |
-| `--category` | MLPerf category | `edge` | str |
-| `--device` | MLPerf device | `cpu` | str |
-| `--model` | MLPerf model | `resnet50` | str |
-| `--precision` | MLPerf model precision | `` | str |
-| `--implementation` | MLPerf implementation | `mlcommons-python` | str |
-| `--backend` | MLPerf framework (backend) | `onnxruntime` | str |
-| `--scenario` | MLPerf scenario | `Offline` | str |
-| `--mode` | MLPerf benchmark mode | `` | str |
-| `--execution_mode` | MLPerf execution mode | `test` | str |
-| `--sut` | SUT configuration (if known) | `` | str |
-| `--submitter` | Submitter name (without space) | `CTuning` | str |
-| `--results_dir` | Folder path to store results (defaults to the current working directory) | `` | str |
-| `--submission_dir` | Folder path to store MLPerf submission tree | `` | str |
-| `--power` | Measure power | `no` | str |
-| `--adr.mlperf-power-client.power_server` | MLPerf Power server IP address | `192.168.0.15` | str |
-| `--adr.mlperf-power-client.port` | MLPerf Power server port | `4950` | int |
-| `--adr.compiler.tags` | Compiler for loadgen and any C/C++ part of implementation | `` | str |
-| `--adr.inference-src-loadgen.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to build LoadGen (to enable non-reference implementations) | `` | str |
-| `--adr.inference-src.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to run benchmarks (to enable non-reference implementations) | `` | str |
-| `--adr.mlperf-inference-implementation.max_batchsize` | Maximum batchsize to be used | `` | str |
-| `--adr.mlperf-inference-implementation.num_threads` | Number of threads (reference & C++ implementation only) | `` | str |
-| `--adr.python.name` | Python virtual environment name (optional) | `` | str |
-| `--adr.python.version` | Force Python version (must have all system deps) | `` | str |
-| `--adr.python.version_min` | Minimal Python version | `3.8` | str |
-| `--clean` | Clean run | `False` | bool |
-| `--compliance` | Whether to run compliance tests (applicable only for closed division) | `no` | str |
-| `--dashboard_wb_project` | W&B dashboard project | `` | str |
-| `--dashboard_wb_user` | W&B dashboard user | `` | str |
-| `--hw_name` | MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...) | `` | str |
-| `--multistream_target_latency` | Set MultiStream target latency | `` | str |
-| `--offline_target_qps` | Set LoadGen Offline target QPS | `` | str |
-| `--quiet` | Quiet run (select default values for all questions) | `True` | bool |
-| `--server_target_qps` | Set Server target QPS | `` | str |
-| `--singlestream_target_latency` | Set SingleStream target latency | `` | str |
-| `--target_latency` | Set Target latency | `` | str |
-| `--target_qps` | Set LoadGen target QPS | `` | str |
-| `--j` | Print results dictionary to console at the end of the run | `False` | bool |
-| `--repro` | Record input/output/state/info files to make it easier to reproduce results | `False` | bool |
-| `--time` | Print script execution time at the end of the run | `True` | bool |
-| `--debug` | Debug this script | `False` | bool |
+| `--division` | MLPerf division | ['open', 'closed'] | `open` |
+| `--category` | MLPerf category | ['edge', 'datacenter', 'network'] | `edge` |
+| `--device` | MLPerf device | ['cpu', 'cuda', 'rocm', 'qaic'] | `cpu` |
+| `--model` | MLPerf model | ['resnet50', 'retinanet', 'bert-99', 'bert-99.9', '3d-unet-99', '3d-unet-99.9', 'rnnt', 'dlrm-v2-99', 'dlrm-v2-99.9', 'gptj-99', 'gptj-99.9', 'sdxl', 'llama2-70b-99', 'llama2-70b-99.9', 'mixtral-8x7b', 'mobilenet', 'efficientnet', 'rgat', 'llama3_1-405b', 'pointpainting'] | `resnet50` |
+| `--precision` | MLPerf model precision | ['float32', 'float16', 'bfloat16', 'int8', 'uint8'] | `` |
+| `--implementation` | MLPerf implementation | ['mlcommons-python', 'mlcommons-cpp', 'nvidia', 'intel', 'qualcomm', 'ctuning-cpp-tflite'] | `reference` |
+| `--backend` | MLPerf framework (backend) | ['onnxruntime', 'tf', 'pytorch', 'deepsparse', 'tensorrt', 'glow', 'tvm-onnx'] | `onnxruntime` |
+| `--scenario` | MLPerf scenario | ['Offline', 'Server', 'SingleStream', 'MultiStream'] | `Offline` |
+| `--mode` | MLPerf benchmark mode | ['', 'accuracy', 'performance'] | `` |
+| `--execution_mode` | MLPerf execution mode | ['test', 'fast', 'valid'] | `test` |
+| `--sut` | SUT configuration (if known) |  | `` |
+| `--submitter` | Submitter name (without space) |  | `MLCommons` |
+| `--results_dir` | Folder path to store results (defaults to the current working directory) |  | `` |
+| `--submission_dir` | Folder path to store MLPerf submission tree |  | `` |
+| `--power` | Measure power | ['yes', 'no'] | `no` |
+| `--adr.mlperf-power-client.power_server` | MLPerf Power server IP address |  | `192.168.0.15` |
+| `--adr.mlperf-power-client.port` | MLPerf Power server port |  | `4950` |
+| `--adr.compiler.tags` | Compiler for loadgen and any C/C++ part of implementation |  | `` |
+| `--adr.inference-src-loadgen.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to build LoadGen (to enable non-reference implementations) |  | `` |
+| `--adr.inference-src.env.MLC_GIT_URL` | Git URL for MLPerf inference sources to run benchmarks (to enable non-reference implementations) |  | `` |
+| `--adr.mlperf-inference-implementation.max_batchsize` | Maximum batchsize to be used |  | `` |
+| `--adr.mlperf-inference-implementation.num_threads` | Number of threads (reference & C++ implementation only) |  | `` |
+| `--adr.python.name` | Python virtual environment name (optional) |  | `` |
+| `--adr.python.version` | Force Python version (must have all system deps) |  | `` |
+| `--adr.python.version_min` | Minimal Python version |  | `3.8` |
+| `--clean` | Clean run |  | `False` |
+| `--compliance` | Whether to run compliance tests (applicable only for closed division) | ['yes', 'no'] | `no` |
+| `--hw_name` | MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...) |  | `` |
+| `--multistream_target_latency` | Set MultiStream target latency |  | `` |
+| `--offline_target_qps` | Set LoadGen Offline target QPS |  | `` |
+| `--quiet` | Quiet run (select default values for all questions) |  | `True` |
+| `--server_target_qps` | Set Server target QPS |  | `` |
+| `--singlestream_target_latency` | Set SingleStream target latency |  | `` |
+| `--target_latency` | Set Target latency |  | `` |
+| `--target_qps` | Set LoadGen target QPS |  | `` |
+| `--repro` | Record input/output/state/info files to make it easier to reproduce results |  | `False` |
+| `--time` | Print script execution time at the end of the run |  | `True` |
+| `--debug` | Debug this script |  | `False` |
 ### Generic Script Inputs
 
-| Name | Description | Default | Type |
+| Name | Description | Choices | Default |
 |------|-------------|---------|------|
-| `--input` | Input to the script passed using the env key `MLC_INPUT` | `` | str |
-| `--output` | Output from the script passed using the env key `MLC_OUTPUT` | `` | str |
-| `--outdirname` | The directory to store the script output | `cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory` | str |
-| `--outbasename` | The output file/folder name | `` | str |
-| `--name` |  | `` | str |
-| `--extra_cache_tags` | Extra cache tags to be added to the cached entry when the script results are saved | `` | str |
-| `--skip_compile` | Skip compilation | `False` | bool |
-| `--skip_run` | Skip run | `False` | bool |
-| `--accept_license` | Accept the required license requirement to run the script | `False` | bool |
-| `--skip_system_deps` | Skip installing any system dependencies | `False` | bool |
-| `--git_ssh` | Use SSH for git repos | `False` | bool |
-| `--gh_token` | Github Token | `` | str |
-| `--hf_token` | Huggingface Token | `` | str |
-| `--verify_ssl` | Verify SSL | `False` | bool |
+| `--input` | Input to the script passed using the env key `MLC_INPUT` |  | `` |
+| `--output` | Output from the script passed using the env key `MLC_OUTPUT` |  | `` |
+| `--outdirname` | The directory to store the script output |  | `cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory` |
+| `--outbasename` | The output file/folder name |  | `` |
+| `--name` |  |  | `` |
+| `--extra_cache_tags` | Extra cache tags to be added to the cached entry when the script results are saved |  | `` |
+| `--skip_compile` | Skip compilation |  | `False` |
+| `--skip_run` | Skip run |  | `False` |
+| `--accept_license` | Accept the required license requirement to run the script |  | `False` |
+| `--skip_system_deps` | Skip installing any system dependencies |  | `False` |
+| `--git_ssh` | Use SSH for git repos |  | `False` |
+| `--gh_token` | Github Token |  | `` |
+| `--hf_token` | Huggingface Token |  | `` |
+| `--verify_ssl` | Verify SSL |  | `False` |
@@ -31,6 +31,9 @@ predeps: False
 default_env:
   MLC_MLPERF_IMPLEMENTATION: reference
   MLC_MLPERF_MODEL: resnet50
+  MLC_MLPERF_BACKEND: onnxruntime
+  MLC_MLPERF_DEVICE: cpu
+  MLC_MLPERF_LOADGEN_SCENARIO: Offline
   MLC_MLPERF_RUN_STYLE: test
   MLC_MLPERF_SKIP_SUBMISSION_GENERATION: no
   MLC_DOCKER_PRIVILEGED_MODE: yes
@@ -444,7 +447,6 @@ input_description:
     choices:
     - 'open'
     - 'closed'
-    default: 'open'
     desc: MLPerf division
     sort: 50
   category:
@@ -461,7 +463,6 @@ input_description:
     - cuda
     - rocm
     - qaic
-    default: cpu
     desc: MLPerf device
     sort: 100
   model:
@@ -486,7 +487,6 @@ input_description:
     - rgat
     - llama3_1-405b
     - pointpainting
-    default: resnet50
     desc: MLPerf model
     sort: 200
   precision:
@@ -496,7 +496,6 @@ input_description:
     - bfloat16
     - int8
     - uint8
-    default: ''
     desc: MLPerf model precision
     sort: 250
   implementation:
@@ -507,7 +506,6 @@ input_description:
     - intel
     - qualcomm
     - ctuning-cpp-tflite
-    default: mlcommons-python
     desc: MLPerf implementation
     sort: 300
   backend:
@@ -519,7 +517,6 @@ input_description:
     - tensorrt
     - glow
     - tvm-onnx
-    default: onnxruntime
     desc: MLPerf framework (backend)
     sort: 400
   scenario:
@@ -528,40 +525,34 @@ input_description:
     - Server
     - SingleStream
     - MultiStream
-    default: Offline
     desc: MLPerf scenario
     sort: 500
   mode:
     choices:
     - ''
     - accuracy
     - performance
-    default: ''
     desc: MLPerf benchmark mode
     sort: 600
   execution_mode:
     choices:
     - test
     - fast
     - valid
-    default: test
     desc: MLPerf execution mode
     sort: 700
   sut:
-    default: ''
     desc: SUT configuration (if known)
     sort: 750
   submitter:
-    default: CTuning
+    default: MLCommons
     desc: Submitter name (without space)
     sort: 800
   results_dir:
     desc: Folder path to store results (defaults to the current working directory)
-    default: ''
     sort: 900
   submission_dir:
     desc: Folder path to store MLPerf submission tree 
-    default: ''
     sort: 1000
 
   adr.compiler.tags:
@@ -610,10 +601,6 @@ input_description:
     - 'no'
     default: 'no'
     desc: Whether to run compliance tests (applicable only for closed division)
-  dashboard_wb_project:
-    desc: W&B dashboard project
-  dashboard_wb_user:
-    desc: W&B dashboard user
   hw_name:
     desc: MLPerf hardware name (for example "gcp.c3_standard_8", "nvidia_orin", "lenovo_p14s_gen_4_windows_11", "macbook_pro_m1_2", "thundercomm_rb6" ...)
   multistream_target_latency:
@@ -632,10 +619,6 @@ input_description:
     desc: Set Target latency
   target_qps:
     desc: Set LoadGen target QPS
-  j:
-    desc: Print results dictionary to console at the end of the run
-    boolean: true
-    default: false
   repro:
     desc: Record input/output/state/info files to make it easier to reproduce results
     boolean: true