Skip to content

Commit a7a7076

Browse files
authored
Pass additional config settings to the server (#51)
* GPU split config settings * Speculative decoding config settings * Reasoning parsing config settings
1 parent e3a59c9 commit a7a7076

File tree

2 files changed

+81
-20
lines changed

2 files changed

+81
-20
lines changed

src/lmstudio/_kv_config.py

Lines changed: 43 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -77,28 +77,49 @@ def _gpu_offload_fields(
7777
offload_settings: DictObject,
7878
) -> Sequence[KvConfigFieldDict]:
7979
fields: list[KvConfigFieldDict] = []
80-
gpu_keys = (
81-
("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"),
82-
("mainGpu", "llama.load.mainGpu"),
83-
("splitStrategy", "llama.load.splitStrategy"),
84-
)
85-
for key, mapped_key in gpu_keys:
80+
remaining_keys = set(offload_settings.keys())
81+
simple_gpu_keys = (("ratio", f"{endpoint}.load.llama.acceleration.offloadRatio"),)
82+
for key, mapped_key in simple_gpu_keys:
8683
if key in offload_settings:
84+
remaining_keys.remove(key)
8785
fields.append({"key": mapped_key, "value": offload_settings[key]})
86+
split_config_keys = ("mainGpu", "splitStrategy", "disabledGpus")
87+
split_config_settings: dict[str, Any] = {}
88+
for key in split_config_keys:
89+
if key in offload_settings:
90+
remaining_keys.remove(key)
91+
split_config_settings[key] = offload_settings[key]
92+
if split_config_settings:
93+
fields.append({"key": "load.gpuSplitConfig", "value": split_config_settings})
94+
if remaining_keys:
95+
raise LMStudioValueError(
96+
f"Unknown GPU offload settings: {sorted(remaining_keys)}"
97+
)
8898
return fields
8999

90100

91101
# Some fields have different names in the client and server configs
102+
# (this map has also been used to avoid adding new key categories for new setting scopes)
92103
_CLIENT_TO_SERVER_KEYMAP = {
93104
"maxTokens": "maxPredictedTokens",
94105
"rawTools": "tools",
106+
# "reasoning" scope
107+
"reasoningParsing": "reasoning.parsing",
108+
# "speculativeDecoding" scope
109+
"draftModel": "speculativeDecoding.draftModel",
110+
"speculativeDecodingNumDraftTokensExact": "speculativeDecoding.numDraftTokensExact",
111+
"speculativeDecodingMinDraftLengthToConsider": "speculativeDecoding.minDraftLengthToConsider",
112+
"speculativeDecodingMinContinueDraftingProbability": "speculativeDecoding.minContinueDraftingProbability",
95113
}
96114

97115

98116
def _to_server_key(key: str) -> str:
99117
return _CLIENT_TO_SERVER_KEYMAP.get(key, key)
100118

101119

120+
_NOT_YET_SUPPORTED_KEYS: set[str] = set()
121+
122+
102123
def _to_kv_config_stack_base(
103124
config: DictObject,
104125
namespace: str,
@@ -114,9 +135,12 @@ def _to_kv_config_stack_base(
114135
# TODO: Define a JSON or TOML data file for mapping prediction config
115136
# fields to config stack entries (preferably JSON exported by
116137
# lmstudio-js rather than something maintained in the Python SDK)
138+
# https://github.com/lmstudio-ai/lmstudio-js/issues/253
139+
remaining_keys = set(config.keys() - _NOT_YET_SUPPORTED_KEYS)
117140

118141
for client_key in checkbox_keys:
119142
if client_key in config:
143+
remaining_keys.remove(client_key)
120144
server_key = _to_server_key(client_key)
121145
fields.append(
122146
_to_checkbox_kv(
@@ -125,12 +149,14 @@ def _to_kv_config_stack_base(
125149
)
126150
for client_key in simple_keys:
127151
if client_key in config:
152+
remaining_keys.remove(client_key)
128153
server_key = _to_server_key(client_key)
129154
fields.append(
130155
_to_simple_kv(f"{namespace}.{request}", server_key, config[client_key])
131156
)
132157
for client_key in llama_keys:
133158
if client_key in config:
159+
remaining_keys.remove(client_key)
134160
server_key = _to_server_key(client_key)
135161
fields.append(
136162
_to_simple_kv(
@@ -139,6 +165,7 @@ def _to_kv_config_stack_base(
139165
)
140166
for client_key in llama_checkbox_keys:
141167
if client_key in config:
168+
remaining_keys.remove(client_key)
142169
server_key = _to_server_key(client_key)
143170
fields.append(
144171
_to_checkbox_kv(
@@ -149,8 +176,12 @@ def _to_kv_config_stack_base(
149176
)
150177
for gpu_offload_key in gpu_offload_keys:
151178
if gpu_offload_key in config:
179+
remaining_keys.remove(gpu_offload_key)
152180
fields.extend(_gpu_offload_fields(namespace, config[gpu_offload_key]))
153181

182+
if remaining_keys:
183+
raise LMStudioValueError(f"Unknown config settings: {sorted(remaining_keys)}")
184+
154185
return fields
155186

156187

@@ -180,6 +211,7 @@ def _to_kv_config_stack_base(
180211
],
181212
}
182213

214+
183215
_EMBEDDING_LOAD_CONFIG_KEYS = {
184216
"checkbox_keys": [],
185217
"simple_keys": [
@@ -253,6 +285,11 @@ def load_config_to_kv_config_stack(
253285
"topKSampling",
254286
"toolCallStopStrings",
255287
"rawTools",
288+
"reasoningParsing",
289+
"draftModel",
290+
"speculativeDecodingNumDraftTokensExact",
291+
"speculativeDecodingMinDraftLengthToConsider",
292+
"speculativeDecodingMinContinueDraftingProbability",
256293
],
257294
"llama_keys": [
258295
"cpuThreads",

tests/test_kv_config.py

Lines changed: 38 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -247,15 +247,7 @@ def test_snake_case_conversion(
247247
config_type._from_api_dict(input_dict)
248248

249249

250-
_NOT_YET_SUPPORTED_KEYS = {
251-
"disabledGpus",
252-
"reasoningParsing",
253-
# "speculativeDecoding" scope
254-
"draftModel",
255-
"speculativeDecodingNumDraftTokensExact",
256-
"speculativeDecodingMinDraftLengthToConsider",
257-
"speculativeDecodingMinContinueDraftingProbability",
258-
}
250+
_NOT_YET_SUPPORTED_KEYS: set[str] = set()
259251

260252

261253
@pytest.mark.parametrize("keymap_dict,config_type", zip(KEYMAP_DICTS, KEYMAP_TYPES))
@@ -292,8 +284,14 @@ def test_kv_stack_field_coverage(
292284
"key": "embedding.load.llama.acceleration.offloadRatio",
293285
"value": 0.5,
294286
},
295-
{"key": "llama.load.mainGpu", "value": 0},
296-
{"key": "llama.load.splitStrategy", "value": "evenly"},
287+
{
288+
"key": "load.gpuSplitConfig",
289+
"value": {
290+
"mainGpu": 0,
291+
"splitStrategy": "evenly",
292+
"disabledGpus": [1, 2],
293+
},
294+
},
297295
],
298296
},
299297
"layerName": "apiOverride",
@@ -332,8 +330,14 @@ def test_kv_stack_field_coverage(
332330
"value": {"checked": True, "value": "f32"},
333331
},
334332
{"key": "llm.load.llama.acceleration.offloadRatio", "value": 0.5},
335-
{"key": "llama.load.mainGpu", "value": 0},
336-
{"key": "llama.load.splitStrategy", "value": "evenly"},
333+
{
334+
"key": "load.gpuSplitConfig",
335+
"value": {
336+
"mainGpu": 0,
337+
"splitStrategy": "evenly",
338+
"disabledGpus": [1, 2],
339+
},
340+
},
337341
]
338342
},
339343
}
@@ -392,7 +396,27 @@ def test_kv_stack_field_coverage(
392396
"value": ["yellow"],
393397
},
394398
{"key": "llm.prediction.tools", "value": {"type": "none"}},
395-
{"key": "llm.prediction.llama.cpuThreads", "value": 7.0},
399+
{
400+
"key": "llm.prediction.reasoning.parsing",
401+
"value": {"enabled": False, "startString": "", "endString": ""},
402+
},
403+
{
404+
"key": "llm.prediction.speculativeDecoding.draftModel",
405+
"value": "some-model-key",
406+
},
407+
{
408+
"key": "llm.prediction.speculativeDecoding.numDraftTokensExact",
409+
"value": 2,
410+
},
411+
{
412+
"key": "llm.prediction.speculativeDecoding.minDraftLengthToConsider",
413+
"value": 5,
414+
},
415+
{
416+
"key": "llm.prediction.speculativeDecoding.minContinueDraftingProbability",
417+
"value": 0.1,
418+
},
419+
{"key": "llm.prediction.llama.cpuThreads", "value": 7},
396420
],
397421
},
398422
"layerName": "apiOverride",

0 commit comments

Comments
 (0)