@@ -27,6 +27,12 @@ class LlamaCpp(LLM):
27
27
model_path : str
28
28
"""The path to the Llama model file."""
29
29
30
+ lora_base : Optional [str ] = None
31
+ """The path to the Llama LoRA base model."""
32
+
33
+ lora_path : Optional [str ] = None
34
+ """The path to the Llama LoRA. If None, no LoRa is loaded."""
35
+
30
36
n_ctx : int = Field (512 , alias = "n_ctx" )
31
37
"""Token context window."""
32
38
@@ -87,13 +93,18 @@ class LlamaCpp(LLM):
87
93
last_n_tokens_size : Optional [int ] = 64
88
94
"""The number of tokens to look back when applying the repeat_penalty."""
89
95
96
+ use_mmap : Optional [bool ] = True
97
+ """Whether to keep the model loaded in RAM"""
98
+
90
99
streaming : bool = True
91
100
"""Whether to stream the results, token by token."""
92
101
93
102
@root_validator ()
94
103
def validate_environment (cls , values : Dict ) -> Dict :
95
104
"""Validate that llama-cpp-python library is installed."""
96
105
model_path = values ["model_path" ]
106
+ lora_path = values ["lora_path" ]
107
+ lora_base = values ["lora_base" ]
97
108
n_ctx = values ["n_ctx" ]
98
109
n_parts = values ["n_parts" ]
99
110
seed = values ["seed" ]
@@ -103,13 +114,16 @@ def validate_environment(cls, values: Dict) -> Dict:
103
114
use_mlock = values ["use_mlock" ]
104
115
n_threads = values ["n_threads" ]
105
116
n_batch = values ["n_batch" ]
117
+ use_mmap = values ["use_mmap" ]
106
118
last_n_tokens_size = values ["last_n_tokens_size" ]
107
119
108
120
try :
109
121
from llama_cpp import Llama
110
122
111
123
values ["client" ] = Llama (
112
124
model_path = model_path ,
125
+ lora_base = lora_base ,
126
+ lora_path = lora_path ,
113
127
n_ctx = n_ctx ,
114
128
n_parts = n_parts ,
115
129
seed = seed ,
@@ -119,6 +133,7 @@ def validate_environment(cls, values: Dict) -> Dict:
119
133
use_mlock = use_mlock ,
120
134
n_threads = n_threads ,
121
135
n_batch = n_batch ,
136
+ use_mmap = use_mmap ,
122
137
last_n_tokens_size = last_n_tokens_size ,
123
138
)
124
139
except ImportError :
0 commit comments