Skip to content

Commit a3766dd

Browse files
committed
feat: Multi-LoRA changes to match Llama wrapper for server
1 parent 2877e6e commit a3766dd

File tree

2 files changed

+3
-8
lines changed

2 files changed

+3
-8
lines changed

llama_cpp/server/model.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,8 +268,7 @@ def load_llama_from_model_settings(settings: ModelSettings) -> llama_cpp.Llama:
268268
# Sampling Params
269269
last_n_tokens_size=settings.last_n_tokens_size,
270270
# LoRA Params
271-
lora_base=settings.lora_base,
272-
lora_path=settings.lora_path,
271+
lora_adapters=settings.lora_adapters,
273272
# Backend Params
274273
numa=settings.numa,
275274
# Chat Format Params

llama_cpp/server/settings.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,9 @@ class ModelSettings(BaseSettings):
113113
description="Last n tokens to keep for repeat penalty calculation.",
114114
)
115115
# LoRA Params
116-
lora_base: Optional[str] = Field(
116+
lora_adapters: Optional[Dict[str, float]]= Field(
117117
default=None,
118-
description="Optional path to base model, useful if using a quantized base model and you want to apply LoRA to an f16 model.",
119-
)
120-
lora_path: Optional[str] = Field(
121-
default=None,
122-
description="Path to a LoRA file to apply to the model.",
118+
description="Paths to LoRA adapter files and the scale to apply to them at (scale of 0.0 will not be used during inference).",
123119
)
124120
# Backend Params
125121
numa: Union[bool, int] = Field(

0 commit comments

Comments
 (0)