Skip to content

Commit 8f43162

Browse files
committed
feat: Add multi LoRA support to internal model
1 parent 7403e00 commit 8f43162

File tree

1 file changed

+54
-0
lines changed

1 file changed

+54
-0
lines changed

llama_cpp/_internals.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,18 @@ def kv_cache_seq_keep(self, seq_id: int):
285285
def kv_cache_seq_shift(self, seq_id: int, p0: int, p1: int, shift: int):
286286
llama_cpp.llama_kv_cache_seq_add(self.ctx, seq_id, p0, p1, shift)
287287

288+
def lora_adapter_set(self, adapter: LlamaLoraAdapter, scale: float):
289+
return_code = llama_cpp.llama_lora_adapter_set(self.ctx, adapter.lora_adapter, scale)
290+
if return_code != 0:
291+
raise RuntimeError(f"lora_adapter_set returned {return_code}")
292+
293+
def lora_adapter_remove(self, adapter: LlamaLoraAdapter) -> bool:
294+
return_code = llama_cpp.llama_lora_adapter_remove(self.ctx, adapter.lora_adapter)
295+
return return_code != 0
296+
297+
def lora_adapter_clear(self):
298+
llama_cpp.llama_lora_adapter_clear(self.ctx)
299+
288300
def get_state_size(self) -> int:
289301
return llama_cpp.llama_get_state_size(self.ctx)
290302

@@ -875,3 +887,45 @@ def close(self):
875887

876888
def __del__(self):
877889
self.close()
890+
891+
class LlamaLoraAdapter:
892+
"""Intermediate Python wrapper for a llama.cpp llama_lora_adapter.
893+
NOTE: For stability it's recommended you use the Llama class instead."""
894+
895+
def __init__(
896+
self,
897+
model: LlamaModel,
898+
lora_path: str,
899+
*,
900+
verbose: bool = True,
901+
):
902+
self.model = model
903+
self.lora_path = lora_path
904+
905+
lora_adapter = None
906+
907+
if not os.path.exists(lora_path):
908+
raise ValueError(f"LoRA adapter path does not exist: {lora_path}")
909+
910+
with suppress_stdout_stderr(disable=verbose):
911+
lora_adapter = llama_cpp.llama_lora_adapter_init(
912+
self.model.model,
913+
self.lora_path.encode("utf-8"),
914+
)
915+
916+
if lora_adapter is None:
917+
raise RuntimeError(
918+
f"Failed to initialize LoRA adapter from lora path: {self.lora_path}"
919+
)
920+
921+
# The llama_lora_adapter will be freed by the llama_model as part of its
922+
# lifecycle. The llama_model destructor destroys each llama_lora_adapter,
923+
# and the destructor for llama_lora_adapter calls llama_lora_adapter_free.
924+
# All we do here is clear the wrapped reference when the LlamaModel wrapper
925+
# is closed, so that the LlamaLoraAdapter wrapper reference is cleared to
926+
# when the llama_lora_adapters are freed.
927+
def clear_lora_adapter():
928+
self.lora_adapter = None
929+
self.model._exit_stack.callback(clear_lora_adapter)
930+
931+
self.lora_adapter = lora_adapter

0 commit comments

Comments
 (0)