diff --git a/examples/models/llama/config/llm_config.py b/examples/models/llama/config/llm_config.py index 034d8af7562..201e3a5414a 100644 --- a/examples/models/llama/config/llm_config.py +++ b/examples/models/llama/config/llm_config.py @@ -65,7 +65,9 @@ class BaseConfig: params: Model parameters, such as n_layers, hidden_size, etc. If left empty will use defaults specified in model_args.py. checkpoint: Path to the checkpoint file. - If left empty, the model will be initialized with random weights. + If left empty, the model will either be initialized with random weights + if it is a Llama model or the weights will be downloaded from HuggingFace + if it is a non-Llama model. checkpoint_dir: Path to directory containing sharded checkpoint files. tokenizer_path: Path to the tokenizer file. metadata: Json string containing metadata information. diff --git a/examples/models/llama/export_llama_lib.py b/examples/models/llama/export_llama_lib.py index 9d5fcfdba25..88b79d30eb2 100644 --- a/examples/models/llama/export_llama_lib.py +++ b/examples/models/llama/export_llama_lib.py @@ -53,6 +53,8 @@ ) from executorch.util.activation_memory_profiler import generate_memory_trace +from omegaconf import DictConfig + from ..model_factory import EagerModelFactory from .source_transformation.apply_spin_quant_r1_r2 import ( fuse_layer_norms, @@ -571,12 +573,14 @@ def canonical_path(path: Union[str, Path], *, dir: bool = False) -> str: def export_llama( - export_options: Union[argparse.Namespace, LlmConfig], + export_options: Union[argparse.Namespace, LlmConfig, DictConfig], ) -> str: if isinstance(export_options, argparse.Namespace): # Legacy CLI. llm_config = LlmConfig.from_args(export_options) - elif isinstance(export_options, LlmConfig): + elif isinstance(export_options, LlmConfig) or isinstance( + export_options, DictConfig + ): # Hydra CLI. llm_config = export_options else: diff --git a/extension/llm/export/README.md b/extension/llm/export/README.md new file mode 100644 index 00000000000..1ac27306c86 --- /dev/null +++ b/extension/llm/export/README.md @@ -0,0 +1,137 @@ +# LLM Export API + +This directory contains the unified API for exporting Large Language Models (LLMs) to ExecuTorch. The `export_llm` module provides a streamlined interface to convert various LLM architectures to optimized `.pte` files for on-device inference. + +## Overview + +The LLM export process transforms a model from its original format to an optimized representation suitable for mobile and edge devices. This involves several key steps: + +1. **Model Instantiation**: Load the model architecture and weights from sources like Hugging Face +2. **Source Transformations**: Apply model-specific optimizations and quantization +3. **IR Export**: Convert to intermediate representations (EXIR, Edge dialect) +4. **Graph Transformations**: Apply backend-specific optimizations and PT2E quantization +5. **Backend Delegation**: Partition operations to hardware-specific backends (XNNPACK, CoreML, QNN, etc.) +6. **Serialization**: Export to final ExecuTorch `.pte` format + +## Supported Models + +- **Llama**: Llama 2, Llama 3, Llama 3.1, Llama 3.2 (1B, 3B, 8B variants) +- **Qwen**: Qwen 2.5, Qwen 3 (0.6B, 1.7B, 4B variants) +- **Phi**: Phi-3-Mini, Phi-4-Mini +- **Stories**: Stories110M (educational model) +- **SmolLM**: SmolLM2 + +## Usage + +The export API supports two configuration approaches: + +### Option 1: Hydra CLI Arguments + +Use structured configuration arguments directly on the command line: + +```bash +python -m extension.llm.export.export_llm \ + base.model_class=llama3 \ + model.use_sdpa_with_kv_cache=True \ + model.use_kv_cache=True \ + export.max_seq_length=128 \ + debug.verbose=True \ + backend.xnnpack.enabled=True \ + backend.xnnpack.extended_ops=True \ + quantization.qmode=8da4w +``` + +### Option 2: Configuration File + +Create a YAML configuration file and reference it: + +```bash +python -m extension.llm.export.export_llm --config my_config.yaml +``` + +Example `my_config.yaml`: +```yaml +base: + model_class: llama3 + tokenizer_path: /path/to/tokenizer.json + +model: + use_kv_cache: true + use_sdpa_with_kv_cache: true + enable_dynamic_shape: true + +export: + max_seq_length: 512 + output_dir: ./exported_models + output_name: llama3_optimized.pte + +quantization: + qmode: 8da4w + group_size: 32 + +backend: + xnnpack: + enabled: true + extended_ops: true + +debug: + verbose: true +``` + +**Important**: You cannot mix both approaches. Use either CLI arguments OR a config file, not both. + +## Example Commands + +### Export Qwen3 0.6B with XNNPACK backend and quantization +```bash +python -m extension.llm.export.export_llm \ + base.model_class=qwen3-0_6b \ + base.params=examples/models/qwen3/0_6b_config.json \ + base.metadata='{"get_bos_id": 151644, "get_eos_ids":[151645]}' \ + model.use_kv_cache=true \ + model.use_sdpa_with_kv_cache=true \ + model.dtype_override=FP32 \ + export.max_seq_length=512 \ + export.output_name=qwen3_0_6b.pte \ + quantization.qmode=8da4w \ + backend.xnnpack.enabled=true \ + backend.xnnpack.extended_ops=true \ + debug.verbose=true +``` + +### Export Phi-4-Mini with custom checkpoint +```bash +python -m extension.llm.export.export_llm \ + base.model_class=phi_4_mini \ + base.checkpoint=/path/to/phi4_checkpoint.pth \ + base.params=examples/models/phi-4-mini/config.json \ + base.metadata='{"get_bos_id":151643, "get_eos_ids":[151643]}' \ + model.use_kv_cache=true \ + model.use_sdpa_with_kv_cache=true \ + export.max_seq_length=256 \ + export.output_name=phi4_mini.pte \ + backend.xnnpack.enabled=true \ + debug.verbose=true +``` + +### Export with CoreML backend (iOS optimization) +```bash +python -m extension.llm.export.export_llm \ + base.model_class=llama3 \ + model.use_kv_cache=true \ + export.max_seq_length=128 \ + backend.coreml.enabled=true \ + backend.coreml.compute_units=ALL \ + quantization.pt2e_quantize=coreml_c4w \ + debug.verbose=true +``` + +## Configuration Options + +For a complete reference of all available configuration options, see the [LlmConfig class definition](../../../examples/models/llama/config/llm_config.py) which documents all supported parameters for base, model, export, quantization, backend, and debug configurations. + +## Further Reading + +- [Llama Examples](../../../examples/models/llama/README.md) - Comprehensive Llama export guide +- [LLM Runner](../runner/) - Running exported models +- [ExecuTorch Documentation](https://pytorch.org/executorch/) - Framework overview \ No newline at end of file diff --git a/extension/llm/export/export_llm.py b/extension/llm/export/export_llm.py index 09a15d6ab58..e995b329f30 100644 --- a/extension/llm/export/export_llm.py +++ b/extension/llm/export/export_llm.py @@ -23,8 +23,16 @@ backend.xnnpack.enabled=True \ backend.xnnpack.extended_ops=True \ quantization.qmode="8da4w" + +Example usage using config file: +python -m extension.llm.export.export_llm \ + --config example_llm_config.yaml """ +import argparse +import sys +from typing import Any, List, Tuple + import hydra from executorch.examples.models.llama.config.llm_config import LlmConfig @@ -36,10 +44,50 @@ cs.store(name="llm_config", node=LlmConfig) -@hydra.main(version_base=None, config_path=None, config_name="llm_config") -def main(llm_config: LlmConfig) -> None: +def parse_config_arg() -> Tuple[str, List[Any]]: + """First parse out the arg for whether to use Hydra or the old CLI.""" + parser = argparse.ArgumentParser(add_help=True) + parser.add_argument("--config", type=str, help="Path to the LlmConfig file") + args, remaining = parser.parse_known_args() + return args.config, remaining + + +def pop_config_arg() -> str: + """ + Removes '--config' and its value from sys.argv. + Assumes --config is specified and argparse has already validated the args. + """ + idx = sys.argv.index("--config") + value = sys.argv[idx + 1] + del sys.argv[idx : idx + 2] + return value + + +@hydra.main(version_base=None, config_name="llm_config") +def hydra_main(llm_config: LlmConfig) -> None: export_llama(OmegaConf.to_object(llm_config)) +def main() -> None: + config, remaining_args = parse_config_arg() + if config: + # Check if there are any remaining hydra CLI args when --config is specified + # This might change in the future to allow overriding config file values + if remaining_args: + raise ValueError( + "Cannot specify additional CLI arguments when using --config. " + f"Found: {remaining_args}. Use either --config file or hydra CLI args, not both." + ) + + config_file_path = pop_config_arg() + default_llm_config = LlmConfig() + llm_config_from_file = OmegaConf.load(config_file_path) + # Override defaults with values specified in the .yaml provided by --config. + merged_llm_config = OmegaConf.merge(default_llm_config, llm_config_from_file) + export_llama(merged_llm_config) + else: + hydra_main() + + if __name__ == "__main__": main() diff --git a/extension/llm/export/test/test_export_llm.py b/extension/llm/export/test/test_export_llm.py new file mode 100644 index 00000000000..1f230233867 --- /dev/null +++ b/extension/llm/export/test/test_export_llm.py @@ -0,0 +1,129 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +import os +import sys +import tempfile +import unittest +from unittest.mock import MagicMock, patch + +from executorch.extension.llm.export.export_llm import ( + main, + parse_config_arg, + pop_config_arg, +) + + +class TestExportLlm(unittest.TestCase): + def test_parse_config_arg_with_config(self) -> None: + """Test parse_config_arg when --config is provided.""" + # Mock sys.argv to include --config + test_argv = ["script.py", "--config", "test_config.yaml", "extra", "args"] + with patch.object(sys, "argv", test_argv): + config_path, remaining = parse_config_arg() + self.assertEqual(config_path, "test_config.yaml") + self.assertEqual(remaining, ["extra", "args"]) + + def test_parse_config_arg_without_config(self) -> None: + """Test parse_config_arg when --config is not provided.""" + test_argv = ["script.py", "debug.verbose=True"] + with patch.object(sys, "argv", test_argv): + config_path, remaining = parse_config_arg() + self.assertIsNone(config_path) + self.assertEqual(remaining, ["debug.verbose=True"]) + + def test_pop_config_arg(self) -> None: + """Test pop_config_arg removes --config and its value from sys.argv.""" + test_argv = ["script.py", "--config", "test_config.yaml", "other", "args"] + with patch.object(sys, "argv", test_argv): + config_path = pop_config_arg() + self.assertEqual(config_path, "test_config.yaml") + self.assertEqual(sys.argv, ["script.py", "other", "args"]) + + @patch("executorch.extension.llm.export.export_llm.export_llama") + def test_with_config(self, mock_export_llama: MagicMock) -> None: + """Test main function with --config file and no hydra args.""" + # Create a temporary config file + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write( + """ +base: + tokenizer_path: /path/to/tokenizer.json +export: + max_seq_length: 256 +""" + ) + config_file = f.name + + try: + test_argv = ["script.py", "--config", config_file] + with patch.object(sys, "argv", test_argv): + main() + + # Verify export_llama was called with config + mock_export_llama.assert_called_once() + called_config = mock_export_llama.call_args[0][0] + self.assertEqual( + called_config["base"]["tokenizer_path"], "/path/to/tokenizer.json" + ) + self.assertEqual(called_config["export"]["max_seq_length"], 256) + finally: + os.unlink(config_file) + + def test_with_cli_args(self) -> None: + """Test main function with only hydra CLI args.""" + test_argv = ["script.py", "debug.verbose=True"] + with patch.object(sys, "argv", test_argv): + with patch( + "executorch.extension.llm.export.export_llm.hydra_main" + ) as mock_hydra: + main() + mock_hydra.assert_called_once() + + def test_config_with_cli_args_error(self) -> None: + """Test that --config rejects additional CLI arguments to prevent mixing approaches.""" + # Create a temporary config file + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write("base:\n checkpoint: /path/to/checkpoint.pth") + config_file = f.name + + try: + test_argv = ["script.py", "--config", config_file, "debug.verbose=True"] + with patch.object(sys, "argv", test_argv): + with self.assertRaises(ValueError) as cm: + main() + + error_msg = str(cm.exception) + self.assertIn( + "Cannot specify additional CLI arguments when using --config", + error_msg, + ) + finally: + os.unlink(config_file) + + def test_config_rejects_multiple_cli_args(self) -> None: + """Test that --config rejects multiple CLI arguments (not just single ones).""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f: + f.write("export:\n max_seq_length: 128") + config_file = f.name + + try: + test_argv = [ + "script.py", + "--config", + config_file, + "debug.verbose=True", + "export.output_dir=/tmp", + ] + with patch.object(sys, "argv", test_argv): + with self.assertRaises(ValueError): + main() + finally: + os.unlink(config_file) + + +if __name__ == "__main__": + unittest.main() diff --git a/requirements-dev.txt b/requirements-dev.txt index a4ed212fb65..07c63101eb8 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -9,3 +9,5 @@ wheel # For building the pip package archive. zstd # Imported by resolve_buck.py. lintrunner==0.12.7 lintrunner-adapters==0.12.4 +hydra-core>=1.3.0 +omegaconf>=2.3.0