@@ -285,6 +285,18 @@ def kv_cache_seq_keep(self, seq_id: int):
285
285
def kv_cache_seq_shift (self , seq_id : int , p0 : int , p1 : int , shift : int ):
286
286
llama_cpp .llama_kv_cache_seq_add (self .ctx , seq_id , p0 , p1 , shift )
287
287
288
+ def lora_adapter_set (self , adapter : LlamaLoraAdapter , scale : float ):
289
+ return_code = llama_cpp .llama_lora_adapter_set (self .ctx , adapter .lora_adapter , scale )
290
+ if return_code != 0 :
291
+ raise RuntimeError (f"lora_adapter_set returned { return_code } " )
292
+
293
+ def lora_adapter_remove (self , adapter : LlamaLoraAdapter ) -> bool :
294
+ return_code = llama_cpp .llama_lora_adapter_remove (self .ctx , adapter .lora_adapter )
295
+ return return_code != 0
296
+
297
+ def lora_adapter_clear (self ):
298
+ llama_cpp .llama_lora_adapter_clear (self .ctx )
299
+
288
300
def get_state_size (self ) -> int :
289
301
return llama_cpp .llama_get_state_size (self .ctx )
290
302
@@ -875,3 +887,45 @@ def close(self):
875
887
876
888
def __del__ (self ):
877
889
self .close ()
890
+
891
+ class LlamaLoraAdapter :
892
+ """Intermediate Python wrapper for a llama.cpp llama_lora_adapter.
893
+ NOTE: For stability it's recommended you use the Llama class instead."""
894
+
895
+ def __init__ (
896
+ self ,
897
+ model : LlamaModel ,
898
+ lora_path : str ,
899
+ * ,
900
+ verbose : bool = True ,
901
+ ):
902
+ self .model = model
903
+ self .lora_path = lora_path
904
+
905
+ lora_adapter = None
906
+
907
+ if not os .path .exists (lora_path ):
908
+ raise ValueError (f"LoRA adapter path does not exist: { lora_path } " )
909
+
910
+ with suppress_stdout_stderr (disable = verbose ):
911
+ lora_adapter = llama_cpp .llama_lora_adapter_init (
912
+ self .model .model ,
913
+ self .lora_path .encode ("utf-8" ),
914
+ )
915
+
916
+ if lora_adapter is None :
917
+ raise RuntimeError (
918
+ f"Failed to initialize LoRA adapter from lora path: { self .lora_path } "
919
+ )
920
+
921
+ # The llama_lora_adapter will be freed by the llama_model as part of its
922
+ # lifecycle. The llama_model destructor destroys each llama_lora_adapter,
923
+ # and the destructor for llama_lora_adapter calls llama_lora_adapter_free.
924
+ # All we do here is clear the wrapped reference when the LlamaModel wrapper
925
+ # is closed, so that the LlamaLoraAdapter wrapper reference is cleared to
926
+ # when the llama_lora_adapters are freed.
927
+ def clear_lora_adapter ():
928
+ self .lora_adapter = None
929
+ self .model ._exit_stack .callback (clear_lora_adapter )
930
+
931
+ self .lora_adapter = lora_adapter
0 commit comments