Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 51 additions & 14 deletions .ci/scripts/test_llama_lora.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,17 @@ DOWNLOADED_PATH=$(
--model_id "${HF_MODEL_REPO}" \
--files "adapter_config.json" "adapter_model.pt" "consolidated.00.pth" "params.json" "tokenizer.model"
)
EXPORTED_MODEL_NAME="llama_3_2_1B_lora.pte"
# Export model.
# Build llama runner.
cmake_install_executorch_libraries
cmake_build_llama_runner

# Constants.
RUNTIME_ARGS="--tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"
PROMPT="What happens if you eat watermelon seeds?"
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"

# Export LoRA PTE file.
MODEL_NAME="llama_3_2_1B_lora"
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
base.params="${DOWNLOADED_PATH}/params.json" \
Expand All @@ -61,26 +70,17 @@ $PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
model.dtype_override="fp32" \
backend.xnnpack.enabled=true \
backend.xnnpack.extended_ops=true \
export.output_name="${EXPORTED_MODEL_NAME}"

# Build llama runner.
cmake_install_executorch_libraries
cmake_build_llama_runner
export.output_name="${MODEL_NAME}.pte"

PROMPT="What happens if you eat watermelon seeds?"
# Run llama runner
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --tokenizer_path=${DOWNLOADED_PATH}/tokenizer.model --temperature=0 --seq_len=20 --warmup=1"

NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama/llama_main --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_NAME}.pte --prompt="${PROMPT}" ${RUNTIME_ARGS} > result.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"

RESULT=$(cat result.txt)
EXPECTED_PREFIX="What happens if you eat watermelon seeds? Watermelon seeds are a good source of vitamin C,"

RESULT=$(cat lora.txt)
if [[ "${RESULT}" == "${EXPECTED_PREFIX}"* ]]; then
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
Expand All @@ -90,7 +90,44 @@ else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT}"
echo "Failure; results not the same"
cleanup_files
exit 1
fi

# Export LoRA PTE, PTD file.
MODEL_SEPARATE="${MODEL_NAME}_separate"
$PYTHON_EXECUTABLE -m extension.llm.export.export_llm \
base.checkpoint="${DOWNLOADED_PATH}/consolidated.00.pth" \
base.params="${DOWNLOADED_PATH}/params.json" \
base.adapter_checkpoint="${DOWNLOADED_PATH}/adapter_model.pt" \
base.adapter_config="${DOWNLOADED_PATH}/adapter_config.json" \
base.tokenizer_path="${DOWNLOADED_PATH}/tokenizer.model" \
model.use_kv_cache=true \
model.use_sdpa_with_kv_cache=true \
model.dtype_override="fp32" \
backend.xnnpack.enabled=true \
backend.xnnpack.extended_ops=true \
export.output_name="${MODEL_SEPARATE}.pte" \
serialization.foundation_weights_file="${MODEL_SEPARATE}.ptd"

# Run llama runner.
NOW=$(date +"%H:%M:%S")
echo "Starting to run llama runner at ${NOW}"
# shellcheck source=/dev/null
cmake-out/examples/models/llama/llama_main --model_path=${MODEL_SEPARATE}.pte --data_path=${MODEL_SEPARATE}.ptd --prompt="${PROMPT}" ${RUNTIME_ARGS} > result2.txt
NOW=$(date +"%H:%M:%S")
echo "Finished at ${NOW}"

RESULT2=$(cat result2.txt)
if [[ "${RESULT2}" == "${EXPECTED_PREFIX}"* ]]; then
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT2}"
echo "Success"
cleanup_files
else
echo "Expected result prefix: ${EXPECTED_PREFIX}"
echo "Actual result: ${RESULT2}"
echo "Failure; results not the same"
cleanup_files
exit 1
fi
4 changes: 3 additions & 1 deletion backends/xnnpack/operators/node_visitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,8 +621,10 @@ def get_serialized_buffer_index(
ConstantDataOffset(offset=UINT64_MAX, size=size, named_key=named_key)
)

external_tag = tensor.meta.get("delegate_constant_tag", None)
custom_meta = tensor.meta.get("custom", None)
external_tag = custom_meta.get("delegate_constant_tag", None) if custom_meta else None
if external_tag is not None:
external_tag = custom_meta.get("delegate_constant_tag", None)
logging.info(
f"Adding constant data with name {tensor.name}, key {named_key} and external_tag {external_tag} to named_data_store"
)
Expand Down
2 changes: 1 addition & 1 deletion examples/models/llama/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -153,10 +153,10 @@ runtime.python_library(
"//caffe2:torch",
"//executorch/extension/llm/export/config:llm_config",
"//executorch/backends/vulkan/_passes:vulkan_passes",
"//executorch/exir/passes:external_constants_pass",
"//executorch/exir/passes:init_mutable_pass",
"//executorch/examples/models:model_base",
"//executorch/examples/models:models",
"//executorch/exir/passes:init_mutable_pass",
"//executorch/extension/llm/custom_ops:custom_ops_aot_py",
"//executorch/extension/llm/export:export_lib",
# one definition has to be included in the user of the libarary
Expand Down
16 changes: 16 additions & 0 deletions examples/models/llama/export_llama_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,22 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
llm_config.backend.xnnpack.enabled = True

if llm_config.backend.xnnpack.enabled:
if llm_config.serialization.foundation_weights_file is not None:
gen_tag_fn: Callable[[torch.fx.Node], str] = lambda x: (
llm_config.serialization.foundation_weights_file
if "lora" not in x.name
else None
)

from executorch.exir.passes.external_constants_pass import (
delegate_external_constants_pass_unlifted,
)

delegate_external_constants_pass_unlifted(
gm=builder_exported.pre_autograd_graph_module,
gen_tag_fn=gen_tag_fn,
)

builder = _to_edge_and_lower_llama_xnnpack(
builder_exported,
modelname,
Expand Down
24 changes: 23 additions & 1 deletion exir/passes/external_constants_pass.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,28 @@ def delegate_external_constants_pass(
for node in module.graph.nodes:
if node.op == "placeholder" and is_param_node(ep, node):
if gen_tag_fn is not None:
node.meta["delegate_constant_tag"] = gen_tag_fn(node)
node.meta.setdefault("custom", {})
node.meta["custom"]["delegate_constant_tag"] = gen_tag_fn(node)
mutated = True
return PassResult(gm, mutated)


# Note: this pass must be run on an unlifted graph, e.g. ep.module(),
# and not on a lifted graph, e.g. ep.graph_module.
# This is using 'get_attr' to tag constants, which only appears in
# unlifted graphs.
def delegate_external_constants_pass_unlifted(
gm: GraphModule,
gen_tag_fn: Optional[Callable[[torch.fx.Node], str]] = None,
) -> PassResult:
mutated = False
for module in gm.modules():
if not isinstance(module, torch.fx.GraphModule):
continue
for node in module.graph.nodes:
if node.op == "get_attr":
if gen_tag_fn is not None:
node.meta.setdefault("custom", {})
node.meta["custom"]["delegate_constant_tag"] = gen_tag_fn(node)
mutated = True
return PassResult(gm, mutated)
3 changes: 3 additions & 0 deletions extension/llm/export/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ def save_to_pte(self, output_name: str) -> None:
filename = save_pte_program(self.export_program, output_name, self.output_dir)
self._saved_pte_filename = filename

# If there are PTD files.
self.export_program.write_tensor_data_to_file(self.output_dir)

def get_saved_pte_filename(self) -> Optional[str]:
"""
Return the filename of the most recenet saved .pte file. Return None if the model is not saved.
Expand Down
21 changes: 21 additions & 0 deletions extension/llm/export/config/llm_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,20 @@ def __post_init__(self):
)


@dataclass
class SerializationConfig:
"""
Configures properties relevant to the serialization process.

Attributes:
foundation_weights_file: configure the foundation weights of a model
to be placed in a separate file, external to the PTE. Pass the
intended file name here.
"""

foundation_weights_file: Optional[str] = None


################################################################################
################################# DebugConfig ##################################
################################################################################
Expand Down Expand Up @@ -466,6 +480,7 @@ class LlmConfig:
base: BaseConfig = field(default_factory=BaseConfig)
model: ModelConfig = field(default_factory=ModelConfig)
export: ExportConfig = field(default_factory=ExportConfig)
serialization: SerializationConfig = field(default_factory=SerializationConfig)
debug: DebugConfig = field(default_factory=DebugConfig)
quantization: QuantizationConfig = field(default_factory=QuantizationConfig)
backend: BackendConfig = field(default_factory=BackendConfig)
Expand Down Expand Up @@ -546,6 +561,12 @@ def from_args(cls, args: argparse.Namespace) -> "LlmConfig": # noqa: C901
if hasattr(args, "export_only"):
llm_config.export.export_only = args.export_only

# SerializationConfig
if hasattr(args, "foundation_weights_file"):
llm_config.serialization.foundation_weights_file = (
args.foundation_weights_file
)

# QuantizationConfig
if hasattr(args, "quantization_mode"):
llm_config.quantization.qmode = args.quantization_mode
Expand Down
Loading