diff --git a/.ci/scripts/test_llama.sh b/.ci/scripts/test_llama.sh
index 9f183528719..f92a983a340 100644
--- a/.ci/scripts/test_llama.sh
+++ b/.ci/scripts/test_llama.sh
@@ -54,10 +54,7 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
 # Default CMake Build Type to release mode
 CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
 
-if [[ $# -lt 4 ]]; then # Assuming 4 mandatory args
-    echo "Expecting atleast 4 positional arguments"
-    echo "Usage: [...]"
-fi
+# Argument validation is done individually below for each required parameter
 if [[ -z "${MODEL_NAME:-}" ]]; then
   echo "Missing model name, exiting..."
   exit 1
@@ -224,34 +221,34 @@ fi
 # Export model.
 EXPORTED_MODEL_NAME="${EXPORTED_MODEL_NAME}.pte"
 echo "Exporting ${EXPORTED_MODEL_NAME}"
-EXPORT_ARGS="-c ${CHECKPOINT_FILE_NAME} -p ${PARAMS} -d ${DTYPE} -n ${EXPORTED_MODEL_NAME} -kv"
+EXPORT_ARGS="base.checkpoint=${CHECKPOINT_FILE_NAME} base.params=${PARAMS} model.dtype_override=${DTYPE} export.output_name=${EXPORTED_MODEL_NAME} model.use_kv_cache=true"
 if [[ "${XNNPACK}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -X --xnnpack-extended-ops -qmode 8da4w -G 128"
+  EXPORT_ARGS="${EXPORT_ARGS} backend.xnnpack.enabled=true backend.xnnpack.extended_ops=true quantization.qmode=8da4w quantization.group_size=128"
 fi
 if [[ "${CUSTOM}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} --use_sdpa_with_kv_cache"
+  EXPORT_ARGS="${EXPORT_ARGS} model.use_sdpa_with_kv_cache=true"
 fi
 if [[ "${QE}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} --embedding-quantize 8,1024"
+  EXPORT_ARGS="${EXPORT_ARGS} quantization.embedding_quantize=\"8,1024\""
 fi
 if [[ "${MPS}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --mps --disable_dynamic_shape"
+  EXPORT_ARGS="${EXPORT_ARGS} backend.mps.enabled=true model.enable_dynamic_shape=false debug.verbose=true"
 fi
 if [[ "${COREML}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --coreml --disable_dynamic_shape"
+  EXPORT_ARGS="${EXPORT_ARGS} backend.coreml.enabled=true model.enable_dynamic_shape=false debug.verbose=true"
 fi
 if [[ "${QNN}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
+  EXPORT_ARGS="${EXPORT_ARGS} backend.qnn.enabled=true model.enable_dynamic_shape=false debug.verbose=true"
   echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
   if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
-    EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
+    EXPORT_ARGS+=" base.tokenizer_path=tokenizer.model quantization.pt2e_quantize=qnn_16a16w quantization.calibration_tasks=[\"wikitext\"] quantization.calibration_limit=1 quantization.calibration_seq_length=128 quantization.calibration_data=\"Once\""
   fi
 fi
 if [[ "${QUANTIZE_KV_CACHE}" == "ON" ]]; then
-  EXPORT_ARGS="${EXPORT_ARGS} --quantize_kv_cache"
+  EXPORT_ARGS="${EXPORT_ARGS} model.quantize_kv_cache=true"
 fi
 # Add dynamically linked library location
-$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
+$PYTHON_EXECUTABLE -m extension.llm.export.export_llm ${EXPORT_ARGS}
 
 # Create tokenizer.bin.
 echo "Creating tokenizer.bin"
diff --git a/.ci/scripts/test_llama_torchao_lowbit.sh b/.ci/scripts/test_llama_torchao_lowbit.sh
index ac603cc5e83..21989d26770 100644
--- a/.ci/scripts/test_llama_torchao_lowbit.sh
+++ b/.ci/scripts/test_llama_torchao_lowbit.sh
@@ -70,16 +70,16 @@ QLINEAR_GROUP_SIZE=128 # Must be multiple of 16
 QEMBEDDING_BITWIDTH=4 # Can be 1-8
 QEMBEDDING_GROUP_SIZE=32 # Must be multiple of 16
 
-${PYTHON_EXECUTABLE} -m examples.models.llama.export_llama \
-    --checkpoint "${LLAMA_CHECKPOINT:?}" \
-    --params "${LLAMA_PARAMS:?}" \
-    -kv \
-    --use_sdpa_with_kv_cache \
-    --output_name=${MODEL_OUT} \
-    -qmode "torchao:8da${QLINEAR_BITWIDTH}w" \
-    --group_size ${QLINEAR_GROUP_SIZE} \
-    -E "torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}" \
-    -d fp32
+${PYTHON_EXECUTABLE} -m extension.llm.export.export_llm \
+    base.checkpoint="${LLAMA_CHECKPOINT:?}" \
+    base.params="${LLAMA_PARAMS:?}" \
+    model.use_kv_cache=true \
+    model.use_sdpa_with_kv_cache=true \
+    export.output_name="${MODEL_OUT}" \
+    quantization.qmode="torchao:8da${QLINEAR_BITWIDTH}w" \
+    quantization.group_size=${QLINEAR_GROUP_SIZE} \
+    quantization.embedding_quantize=\"torchao:${QEMBEDDING_BITWIDTH},${QEMBEDDING_GROUP_SIZE}\" \
+    model.dtype_override=fp32
 
 # Test run
 ./cmake-out/examples/models/llama/llama_main --model_path=$MODEL_OUT --tokenizer_path=$TOKENIZER --prompt="Once upon a time,"
diff --git a/.ci/scripts/test_model.sh b/.ci/scripts/test_model.sh
index 4f8dc7a30e5..bbf879295ae 100755
--- a/.ci/scripts/test_model.sh
+++ b/.ci/scripts/test_model.sh
@@ -86,8 +86,8 @@ test_model() {
   if [[ "${MODEL_NAME}" == "llama2" ]]; then
     # Install requirements for export_llama
     bash examples/models/llama/install_requirements.sh
-    # Test export_llama script: python3 -m examples.models.llama.export_llama
-    "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -c examples/models/llama/params/demo_rand_params.pth -p examples/models/llama/params/demo_config.json
+    # Test export_llm script: python3 -m extension.llm.export.export_llm
+    "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.checkpoint=examples/models/llama/params/demo_rand_params.pth base.params=examples/models/llama/params/demo_config.json
     run_portable_executor_runner
     rm "./${MODEL_NAME}.pte"
   fi
@@ -100,17 +100,17 @@ test_model() {
   if [[ "${MODEL_NAME}" == "qwen2_5" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
-      # Test export_llama script: python3 -m examples.models.llama.export_llama.
+      # Test export_llm script: python3 -m extension.llm.export.export_llm.
       # Use Llama random checkpoint with Qwen 2.5 1.5b model configuration.
-      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -p examples/models/qwen2_5/1_5b_config.json
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/qwen2_5/1_5b_config.json
       rm "./${MODEL_NAME}.pte"
       return  # Skip running with portable executor runnner since portable doesn't support Qwen's biased linears.
   fi
   if [[ "${MODEL_NAME}" == "phi_4_mini" ]]; then
       # Install requirements for export_llama
       bash examples/models/llama/install_requirements.sh
-      # Test export_llama script: python3 -m examples.models.llama.export_llama.
-      "${PYTHON_EXECUTABLE}" -m examples.models.llama.export_llama --model "${MODEL_NAME}" -p examples/models/phi_4_mini/config.json
+      # Test export_llm script: python3 -m extension.llm.export.export_llm.
+      "${PYTHON_EXECUTABLE}" -m extension.llm.export.export_llm base.model_class="${MODEL_NAME}" base.params=examples/models/phi_4_mini/config.json
       run_portable_executor_runner
       rm "./${MODEL_NAME}.pte"
       return
diff --git a/.github/workflows/android-perf.yml b/.github/workflows/android-perf.yml
index 1a6d63f1bd1..a7c2b9ca14c 100644
--- a/.github/workflows/android-perf.yml
+++ b/.github/workflows/android-perf.yml
@@ -214,23 +214,23 @@ jobs:
                         --files "tokenizer.model" "params.json" "consolidated.00.pth"
                     )
                     # Export using ExecuTorch's model definition
-                    python -m examples.models.llama.export_llama \
-                      --model "llama3_2" \
-                      --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                      --params "${DOWNLOADED_PATH}/params.json" \
-                      --use_sdpa_with_kv_cache \
-                      -X \
-                      --xnnpack-extended-ops \
-                      --preq_mode 8da4w_output_8da8w \
-                      --preq_group_size 32 \
-                      --max_seq_length 2048 \
-                      --max_context_length 2048 \
-                      --output_name "${OUT_ET_MODEL_NAME}.pte" \
-                      -kv \
-                      -d fp32 \
-                      --preq_embedding_quantize 8,0 \
-                      --use_spin_quant native \
-                      --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+                    python -m extension.llm.export.export_llm \
+                      base.model_class="llama3_2" \
+                      base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                      base.params="${DOWNLOADED_PATH}/params.json" \
+                      model.use_sdpa_with_kv_cache=true \
+                      backend.xnnpack.enabled=true \
+                      backend.xnnpack.extended_ops=true \
+                      base.preq_mode="8da4w_output_8da8w" \
+                      base.preq_group_size=32 \
+                      export.max_seq_length=2048 \
+                      export.max_context_length=2048 \
+                      export.output_name="${OUT_ET_MODEL_NAME}.pte" \
+                      model.use_kv_cache=true \
+                      model.dtype_override=fp32 \
+                      base.preq_embedding_quantize=\'8,0\' \
+                      quantization.use_spin_quant=native \
+                      base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"'
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then
                     # QAT + LoRA
@@ -241,53 +241,55 @@ jobs:
                         --files "tokenizer.model" "params.json" "consolidated.00.pth"
                     )
                     # Export using ExecuTorch's model definition
-                    python -m examples.models.llama.export_llama \
-                      --model "llama3_2" \
-                      --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                      --params "${DOWNLOADED_PATH}/params.json" \
-                      -qat \
-                      -lora 16 \
-                      --preq_mode 8da4w_output_8da8w \
-                      --preq_group_size 32 \
-                      --preq_embedding_quantize 8,0 \
-                      --use_sdpa_with_kv_cache \
-                      -kv \
-                      -X \
-                      --xnnpack-extended-ops \
-                      -d fp32 \
-                      --max_seq_length 2048 \
-                      --max_context_length 2048 \
-                      --output_name "${OUT_ET_MODEL_NAME}.pte" \
-                      --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+                    python -m extension.llm.export.export_llm \
+                      base.model_class="llama3_2" \
+                      base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                      base.params="${DOWNLOADED_PATH}/params.json" \
+                      quantization.use_qat=true \
+                      base.use_lora=16 \
+                      base.preq_mode="8da4w_output_8da8w" \
+                      base.preq_group_size=32 \
+                      base.preq_embedding_quantize=\'8,0\' \
+                      model.use_sdpa_with_kv_cache=true \
+                      model.use_kv_cache=true \
+                      backend.xnnpack.enabled=true \
+                      backend.xnnpack.extended_ops=true \
+                      model.dtype_override=fp32 \
+                      export.max_seq_length=2048 \
+                      export.max_context_length=2048 \
+                      export.output_name="${OUT_ET_MODEL_NAME}.pte" \
+                      base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"'
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then
                     # Original BF16 version, without any quantization
                     DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-                    python -m examples.models.llama.export_llama \
-                      --model "llama3_2" \
-                      --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                      --params "${DOWNLOADED_PATH}/params.json" \
-                      -kv \
-                      --use_sdpa_with_kv_cache \
-                      -X \
-                      -d bf16 \
-                      --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
-                      --output_name="${OUT_ET_MODEL_NAME}.pte"
+                    python -m extension.llm.export.export_llm \
+                      base.model_class="llama3_2" \
+                      base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                      base.params="${DOWNLOADED_PATH}/params.json" \
+                      model.use_kv_cache=true \
+                      model.use_sdpa_with_kv_cache=true \
+                      backend.xnnpack.enabled=true \
+                      model.dtype_override=bf16 \
+                      base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \
+                      export.output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
                     DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-                    python -m examples.models.llama.export_llama \
-                      --model llama3_2 \
-                      --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                      --params "${DOWNLOADED_PATH}/params.json" \
-                      -kv \
-                      --use_sdpa_with_kv_cache \
-                      -d fp32 \
-                      -X \
-                      --xnnpack-extended-ops \
-                      -qmode 8da4w -G 32 -E 8,0 \
-                      --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
-                      --output_name="${OUT_ET_MODEL_NAME}.pte"
+                    python -m extension.llm.export.export_llm \
+                      base.model_class=llama3_2 \
+                      base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                      base.params="${DOWNLOADED_PATH}/params.json" \
+                      model.use_kv_cache=true \
+                      model.use_sdpa_with_kv_cache=true \
+                      model.dtype_override=fp32 \
+                      backend.xnnpack.enabled=true \
+                      backend.xnnpack.extended_ops=true \
+                      quantization.qmode=8da4w \
+                      quantization.group_size=32 \
+                      quantization.embedding_quantize=\'8,0\' \
+                      base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \
+                      export.output_name="${OUT_ET_MODEL_NAME}.pte"
                     ls -lh "${OUT_ET_MODEL_NAME}.pte"
                 elif [[ ${{ matrix.config }} == "llama3_qnn_htp" ]]; then
                     export QNN_SDK_ROOT=/tmp/qnn/2.28.0.241029
@@ -313,19 +315,19 @@ jobs:
             elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then
               if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
-                python -m examples.models.llama.export_llama \
-                  --model qwen3-0_6b \
-                  --params examples/models/qwen3/0_6b_config.json \
-                  -kv \
-                  --use_sdpa_with_kv_cache \
-                  -d fp32 \
-                  -X \
-                  --xnnpack-extended-ops \
-                  -qmode 8da4w \
-                  -G 32 \
-                  -E 8,0 \
-                  --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
-                  --output_name="${OUT_ET_MODEL_NAME}.pte"
+                python -m extension.llm.export.export_llm \
+                  base.model_class=qwen3_0_6b \
+                  base.params=examples/models/qwen3/0_6b_config.json \
+                  model.use_kv_cache=true \
+                  model.use_sdpa_with_kv_cache=true \
+                  model.dtype_override=fp32 \
+                  backend.xnnpack.enabled=true \
+                  backend.xnnpack.extended_ops=true \
+                  quantization.qmode=8da4w \
+                  quantization.group_size=32 \
+                  quantization.embedding_quantize=\'8,0\' \
+                  base.metadata='"{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}"' \
+                  export.output_name="${OUT_ET_MODEL_NAME}.pte"
                 ls -lh "${OUT_ET_MODEL_NAME}.pte"
               fi
             fi
diff --git a/.github/workflows/apple-perf.yml b/.github/workflows/apple-perf.yml
index 0c03f55f82e..6b1666da642 100644
--- a/.github/workflows/apple-perf.yml
+++ b/.github/workflows/apple-perf.yml
@@ -223,23 +223,23 @@ jobs:
                   --files "tokenizer.model" "params.json" "consolidated.00.pth"
               )
               # Export using ExecuTorch's model definition
-              ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                --model "llama3_2" \
-                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                --params "${DOWNLOADED_PATH}/params.json" \
-                --use_sdpa_with_kv_cache \
-                -X \
-                --xnnpack-extended-ops \
-                --preq_mode 8da4w_output_8da8w \
-                --preq_group_size 32 \
-                --max_seq_length 2048 \
-                --max_context_length 2048 \
-                --output_name "${OUT_ET_MODEL_NAME}.pte" \
-                -kv \
-                -d fp32 \
-                --preq_embedding_quantize 8,0 \
-                --use_spin_quant native \
-                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+              ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                base.model_class="llama3_2" \
+                base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                base.params="${DOWNLOADED_PATH}/params.json" \
+                model.use_sdpa_with_kv_cache=true \
+                backend.xnnpack.enabled=true \
+                backend.xnnpack.extended_ops=true \
+                base.preq_mode="8da4w_output_8da8w" \
+                base.preq_group_size=32 \
+                export.max_seq_length=2048 \
+                export.max_context_length=2048 \
+                export.output_name="${OUT_ET_MODEL_NAME}.pte" \
+                model.use_kv_cache=true \
+                model.dtype_override=fp32 \
+                base.preq_embedding_quantize=\'8,0\' \
+                quantization.use_spin_quant=native \
+                base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"'
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
             elif [[ ${{ matrix.config }} == "llama3_qlora" ]]; then
               # QAT + LoRA
@@ -250,87 +250,89 @@ jobs:
                   --files "tokenizer.model" "params.json" "consolidated.00.pth"
               )
               # Export using ExecuTorch's model definition
-              ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                --model "llama3_2" \
-                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                --params "${DOWNLOADED_PATH}/params.json" \
-                -qat \
-                -lora 16 \
-                --preq_mode 8da4w_output_8da8w \
-                --preq_group_size 32 \
-                --preq_embedding_quantize 8,0 \
-                --use_sdpa_with_kv_cache \
-                -kv \
-                -X \
-                --xnnpack-extended-ops \
-                -d fp32 \
-                --max_seq_length 2048 \
-                --max_context_length 2048 \
-                --output_name "${OUT_ET_MODEL_NAME}.pte" \
-                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+              ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                base.model_class="llama3_2" \
+                base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                base.params="${DOWNLOADED_PATH}/params.json" \
+                quantization.use_qat=true \
+                base.use_lora=16 \
+                base.preq_mode="8da4w_output_8da8w" \
+                base.preq_group_size=32 \
+                base.preq_embedding_quantize=\'8,0\' \
+                model.use_sdpa_with_kv_cache=true \
+                model.use_kv_cache=true \
+                backend.xnnpack.enabled=true \
+                backend.xnnpack.extended_ops=true \
+                model.dtype_override=fp32 \
+                export.max_seq_length=2048 \
+                export.max_context_length=2048 \
+                export.output_name="${OUT_ET_MODEL_NAME}.pte" \
+                base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"'
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
             elif [[ ${{ matrix.config }} == "llama3_fb16" ]]; then
               # Original BF16 version, without any quantization
               DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-              ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                --model "llama3_2" \
-                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                --params "${DOWNLOADED_PATH}/params.json" \
-                -kv \
-                --use_sdpa_with_kv_cache \
-                -X \
-                -d bf16 \
-                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
-                --output_name="${OUT_ET_MODEL_NAME}.pte"
+              ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                base.model_class="llama3_2" \
+                base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                base.params="${DOWNLOADED_PATH}/params.json" \
+                model.use_kv_cache=true \
+                model.use_sdpa_with_kv_cache=true \
+                backend.xnnpack.enabled=true \
+                model.dtype_override=bf16 \
+                base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \
+                export.output_name="${OUT_ET_MODEL_NAME}.pte"
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
             elif [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
               DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-              ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                --model llama3_2 \
-                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                --params "${DOWNLOADED_PATH}/params.json" \
-                -kv \
-                --use_sdpa_with_kv_cache \
-                -d fp32 \
-                -X \
-                --xnnpack-extended-ops \
-                -qmode 8da4w -G 32 -E 8,0 \
-                --metadata '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}' \
-                --output_name="${OUT_ET_MODEL_NAME}.pte"
+              ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                base.model_class=llama3_2 \
+                base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                base.params="${DOWNLOADED_PATH}/params.json" \
+                model.use_kv_cache=true \
+                model.use_sdpa_with_kv_cache=true \
+                model.dtype_override=fp32 \
+                backend.xnnpack.enabled=true \
+                backend.xnnpack.extended_ops=true \
+                quantization.qmode=8da4w \
+                quantization.group_size=32 \
+                quantization.embedding_quantize=\'8,0\' \
+                base.metadata='"{\"get_bos_id\":128000,\"get_eos_ids\":[128009,128001]}"' \
+                export.output_name="${OUT_ET_MODEL_NAME}.pte"
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
             elif [[ ${{ matrix.config }} == "llama3_coreml_ane" ]]; then
               # ANE
               DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "original" --files "tokenizer.model" "params.json" "consolidated.00.pth")
-              ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                --checkpoint "${DOWNLOADED_PATH}/consolidated.00.pth" \
-                --params "${DOWNLOADED_PATH}/params.json" \
-                -E "4,32" \
-                -kv \
-                --disable_dynamic_shape \
-                --coreml \
-                --coreml-ios 18 \
-                --coreml-quantize c4w \
-                --coreml-compute-units cpu_and_ne \
-                --output_name="${OUT_ET_MODEL_NAME}.pte"
+              ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
+                base.params="${DOWNLOADED_PATH}/params.json" \
+                quantization.embedding_quantize=\'4,32\' \
+                model.use_kv_cache=true \
+                model.enable_dynamic_shape=false \
+                backend.coreml.enabled=true \
+                backend.coreml.ios=18 \
+                backend.coreml.quantize=c4w \
+                backend.coreml.compute_units=cpu_and_ne \
+                export.output_name="${OUT_ET_MODEL_NAME}.pte"
               ls -lh "${OUT_ET_MODEL_NAME}.pte"
             fi
           elif [[ "$HF_MODEL_REPO" == "Qwen/Qwen3-0.6B" ]]; then
             OUT_ET_MODEL_NAME=${OUT_ET_MODEL_NAME}_llm
             if [[ ${{ matrix.config }} == "et_xnnpack_custom_spda_kv_cache_8da4w" ]]; then
                 DOWNLOADED_PATH=$(bash .ci/scripts/download_hf_hub.sh --model_id "${HF_MODEL_REPO}" --subdir "." --files "tokenizer.json")
-                ${CONDA_RUN} python -m examples.models.llama.export_llama \
-                  --model qwen3-0_6b \
-                  --params examples/models/qwen3/0_6b_config.json \
-                  -kv \
-                  --use_sdpa_with_kv_cache \
-                  -d fp32 \
-                  -X \
-                  --xnnpack-extended-ops \
-                  -qmode 8da4w \
-                  -G 32 \
-                  -E 8,0 \
-                  --metadata '{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
-                  --output_name="${OUT_ET_MODEL_NAME}.pte"
+                ${CONDA_RUN} python -m extension.llm.export.export_llm \
+                  base.model_class=qwen3_0_6b \
+                  base.params=examples/models/qwen3/0_6b_config.json \
+                  model.use_kv_cache=true \
+                  model.use_sdpa_with_kv_cache=true \
+                  model.dtype_override=fp32 \
+                  backend.xnnpack.enabled=true \
+                  backend.xnnpack.extended_ops=true \
+                  quantization.qmode=8da4w \
+                  quantization.group_size=32 \
+                  quantization.embedding_quantize=\'8,0\' \
+                  base.metadata='"{\"get_bos_id\":151644,\"get_eos_ids\":[151645]}"' \
+                  export.output_name="${OUT_ET_MODEL_NAME}.pte"
                 ls -lh "${OUT_ET_MODEL_NAME}.pte"
             fi
           fi
diff --git a/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh b/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh
index ff59fc56b2c..8c1ad52ef8b 100644
--- a/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh
+++ b/examples/demo-apps/android/LlamaDemo/run_instrumentation_test.sh
@@ -14,7 +14,7 @@ curl -C - -Ls "https://raw.githubusercontent.com/karpathy/llama2.c/master/tokeni
 # Create params.json file
 touch params.json
 echo '{"dim": 768, "multiple_of": 32, "n_heads": 12, "n_layers": 12, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
-python -m examples.models.llama.export_llama -c stories110M.pt -p params.json -d fp16 -n stories110m_h.pte -kv
+python -m extension.llm.export.export_llm base.checkpoint=stories110M.pt base.params=params.json model.dtype_override=fp16 export.output_name=stories110m_h.pte model.use_kv_cache=true
 python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
 adb mkdir -p /data/local/tmp/llama
diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py
index 9acd633fb21..72342199dfd 100644
--- a/examples/models/llama/config/llm_config.py
+++ b/examples/models/llama/config/llm_config.py
@@ -10,6 +10,11 @@
 Configurations for exporting Llama.
 
 Uses dataclasses, which integrate with OmegaConf and Hydra.
+
+Note:
+- Hydra is a bit finnicky with string values that include quotations, please
+refer to https://hydra.cc/docs/1.2/advanced/override_grammar/basic/#quoted-values
+for more information.
 """
 
 import argparse
@@ -34,9 +39,9 @@ class ModelType(str, Enum):
     llama3_2_vision = "llama3_2_vision"
     static_llama = "static_llama"
     qwen2_5 = "qwen2_5"
-    qwen3_0_6b = "qwen3-0_6b"
-    qwen3_1_7b = "qwen3-1_7b"
-    qwen3_4b = "qwen3-4b"
+    qwen3_0_6b = "qwen3_0_6b"
+    qwen3_1_7b = "qwen3_1_7b"
+    qwen3_4b = "qwen3_4b"
     phi_4_mini = "phi_4_mini"
     smollm2 = "smollm2"
 
@@ -71,7 +76,7 @@ class BaseConfig:
         checkpoint_dir: Path to directory containing sharded checkpoint files.
         tokenizer_path: Path to the tokenizer file.
         metadata: Json string containing metadata information.
-            e.g. '{"get_bos_id":128000, "get_eos_ids":[128009, 128001]}'
+            e.g. '"{\"get_bos_id\":128000, \"get_eos_ids\":[128009, 128001]}"'
         use_lora: Rank of the LoRA, if set to 0 then this means no LoRA. For use with QAT.
         fairseq2: For legacy internal use cases, this is safe to ignore.
         preq_mode: Legacy option to specify how prequantized weights are loaded.
diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py
index 334f3ace712..685e9de9a2e 100644
--- a/examples/models/llama/export_llama_lib.py
+++ b/examples/models/llama/export_llama_lib.py
@@ -104,9 +104,9 @@
     "llama3_2",
     "static_llama",
     "qwen2_5",
-    "qwen3-0_6b",
-    "qwen3-1_7b",
-    "qwen3-4b",
+    "qwen3_0_6b",
+    "qwen3_1_7b",
+    "qwen3_4b",
     "phi_4_mini",
     "smollm2",
 ]
@@ -115,9 +115,9 @@
     "qwen2_5": "Qwen/Qwen2.5-1.5B",
     "phi_4_mini": "microsoft/Phi-4-mini-instruct",
     "smollm2": "HuggingFaceTB/SmolLM-135M",
-    "qwen3-0_6b": "Qwen/Qwen3-0.6B",
-    "qwen3-1_7b": "Qwen/Qwen3-1.7B",
-    "qwen3-4b": "Qwen/Qwen3-4B",
+    "qwen3_0_6b": "Qwen/Qwen3-0.6B",
+    "qwen3_1_7b": "Qwen/Qwen3-1.7B",
+    "qwen3_4b": "Qwen/Qwen3-4B",
 }
 
 
diff --git a/examples/models/qwen3/README.md b/examples/models/qwen3/README.md
index d31d491adf2..e24d8da2637 100644
--- a/examples/models/qwen3/README.md
+++ b/examples/models/qwen3/README.md
@@ -7,7 +7,7 @@ Qwen 3 uses the same example code as our optimized Llama model, while the checkp
 
 All commands for exporting and running Llama on various backends should also be applicable to Qwen 3, by swapping the following args:
 ```
-base.model_class=[qwen3-0_6b,qwen3-1_7b,qwen3-4b]
+base.model_class=[qwen3_0_6b,qwen3_1_7b,qwen3_4b]
 base.params=[examples/models/qwen3/0_6b_config.json,examples/models/qwen3/1_7b_config.json,examples/models/qwen3/4b_config.json]
 ```
 
@@ -17,7 +17,7 @@ Here is a basic example for exporting Qwen 3, although please refer to the Llama
 Export 0.6b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3-0_6b" \
+  base.model_class="qwen3_0_6b" \
   base.params="examples/models/qwen3/0_6b_config.json" \
   model.use_kv_cache=True \
   model.use_sdpa_with_kv_cache=True \
@@ -26,14 +26,14 @@ python -m extension.llm.export.export_llm \
   backend.xnnpack.extended_ops=True \
   quantization.qmode="8da4w" \
   base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3-0_6b.pte" \
+  export.output_name="qwen3_0_6b.pte" \
   debug.verbose=True
 ```
 
 Export 1.7b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3-1_7b" \
+  base.model_class="qwen3_1_7b" \
   base.params="examples/models/qwen3/1_7b_config.json" \
   model.use_kv_cache=True \
   model.use_sdpa_with_kv_cache=True \
@@ -42,14 +42,14 @@ python -m extension.llm.export.export_llm \
   backend.xnnpack.extended_ops=True \
   quantization.qmode="8da4w" \
   base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3-1_7b.pte" \
+  export.output_name="qwen3_1_7b.pte" \
   debug.verbose=True
 ```
 
 Export 4b to XNNPack, quantized with 8da4w:
 ```
 python -m extension.llm.export.export_llm \
-  base.model_class="qwen3-4b" \
+  base.model_class="qwen3_4b" \
   base.params="examples/models/qwen3/4b_config.json" \
   model.use_kv_cache=True \
   model.use_sdpa_with_kv_cache=True \
@@ -58,7 +58,7 @@ python -m extension.llm.export.export_llm \
   backend.xnnpack.extended_ops=True \
   quantization.qmode="8da4w" \
   base.metadata='"{\"get_bos_id\": 151644, \"get_eos_ids\":[151645]}"' \
-  export.output_name="qwen3-4b.pte" \
+  export.output_name="qwen3_4b.pte" \
   debug.verbose=True
 ```
 
@@ -66,8 +66,8 @@ python -m extension.llm.export.export_llm \
 With ExecuTorch pybindings:
 ```
 python -m examples.models.llama.runner.native
-  --model qwen3-0_6b \
-  --pte qwen3-0_6b.pte \
+  --model qwen3_0_6b \
+  --pte qwen3_0_6b.pte \
   --tokenizer ~/.cache/huggingface/hub/models--Qwen--Qwen3-0.6B/snapshots/a9c98e602b9d36d2a2f7ba1eb0f5f31e4e8e5143/tokenizer.json \
   --tokenizer_config ~/.cache/huggingface/hub/models--Qwen--Qwen3-0.6B/snapshots/a9c98e602b9d36d2a2f7ba1eb0f5f31e4e8e5143/tokenizer_config.json \
   --prompt "Who is the president of the US?" \
@@ -80,7 +80,7 @@ python -m examples.models.llama.runner.native
 With ExecuTorch's sample c++ runner (see the Llama README's [Step 3: Run on your computer to validate](../llama/README.md#step-3-run-on-your-computer-to-validate) to build the runner):
 ```
 cmake-out/examples/models/llama/llama_main
-  --model_path qwen3-0_6b.pte
+  --model_path qwen3_0_6b.pte
   --tokenizer_path ~/.cache/huggingface/hub/models--Qwen--Qwen3-0.6B/snapshots/a9c98e602b9d36d2a2f7ba1eb0f5f31e4e8e5143/tokenizer.json
   --prompt="Who is the president of the US?"
 ```
diff --git a/extension/android/executorch_android/android_test_setup.sh b/extension/android/executorch_android/android_test_setup.sh
index 682a1d16787..f521dac30c5 100644
--- a/extension/android/executorch_android/android_test_setup.sh
+++ b/extension/android/executorch_android/android_test_setup.sh
@@ -25,7 +25,7 @@ prepare_tinyllama() {
   # Create params.json file
   touch params.json
   echo '{"dim": 288, "multiple_of": 32, "n_heads": 6, "n_layers": 6, "norm_eps": 1e-05, "vocab_size": 32000}' > params.json
-  python -m examples.models.llama.export_llama -c stories15M.pt -p params.json -d fp16 -n stories15m_h.pte -kv
+  python -m extension.llm.export.export_llm base.checkpoint=stories15M.pt base.params=params.json model.dtype_override=fp16 export.output_name=stories15m_h.pte model.use_kv_cache=true
   python -m pytorch_tokenizers.tools.llama2c.convert -t tokenizer.model -o tokenizer.bin
 
   cp stories15m_h.pte "${BASEDIR}/src/androidTest/resources/stories.pte"
diff --git a/extension/llm/export/README.md b/extension/llm/export/README.md
index 1ac27306c86..96f36acc1b4 100644
--- a/extension/llm/export/README.md
+++ b/extension/llm/export/README.md
@@ -85,7 +85,7 @@ debug:
 ### Export Qwen3 0.6B with XNNPACK backend and quantization
 ```bash
 python -m extension.llm.export.export_llm \
-    base.model_class=qwen3-0_6b \
+    base.model_class=qwen3_0_6b \
     base.params=examples/models/qwen3/0_6b_config.json \
     base.metadata='{"get_bos_id": 151644, "get_eos_ids":[151645]}' \
     model.use_kv_cache=true \