Skip to content

Commit 53ebcc8

Browse files
authored
feat(server): Provide ability to dynamically allocate all threads if desired using -1 (#1364)
1 parent 507c1da commit 53ebcc8

File tree

1 file changed

+13
-3
lines changed

1 file changed

+13
-3
lines changed

llama_cpp/server/settings.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import multiprocessing
44

55
from typing import Optional, List, Literal, Union
6-
from pydantic import Field
6+
from pydantic import Field, root_validator
77
from pydantic_settings import BaseSettings
88

99
import llama_cpp
@@ -67,12 +67,12 @@ class ModelSettings(BaseSettings):
6767
n_threads: int = Field(
6868
default=max(multiprocessing.cpu_count() // 2, 1),
6969
ge=1,
70-
description="The number of threads to use.",
70+
description="The number of threads to use. Use -1 for max cpu threads",
7171
)
7272
n_threads_batch: int = Field(
7373
default=max(multiprocessing.cpu_count(), 1),
7474
ge=0,
75-
description="The number of threads to use when batch processing.",
75+
description="The number of threads to use when batch processing. Use -1 for max cpu threads",
7676
)
7777
rope_scaling_type: int = Field(
7878
default=llama_cpp.LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED
@@ -173,6 +173,16 @@ class ModelSettings(BaseSettings):
173173
default=True, description="Whether to print debug information."
174174
)
175175

176+
@root_validator(pre=True) # pre=True to ensure this runs before any other validation
177+
def set_dynamic_defaults(cls, values):
178+
# If n_threads or n_threads_batch is -1, set it to multiprocessing.cpu_count()
179+
cpu_count = multiprocessing.cpu_count()
180+
if values.get('n_threads', 0) == -1:
181+
values['n_threads'] = cpu_count
182+
if values.get('n_threads_batch', 0) == -1:
183+
values['n_threads_batch'] = cpu_count
184+
return values
185+
176186

177187
class ServerSettings(BaseSettings):
178188
"""Server settings used to configure the FastAPI and Uvicorn server."""

0 commit comments

Comments
 (0)