Skip to content

Commit cd2dc28

Browse files
authored
Merge pull request #14 from esolithe/concedo_experimental
Concedo experimental
2 parents c2454e5 + d4316aa commit cd2dc28

File tree

7 files changed

+208
-63
lines changed

7 files changed

+208
-63
lines changed

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ static ggml_cuda_device_info ggml_cuda_init() {
221221
//#else
222222
// GGML_LOG_INFO("%s: GGML_CUDA_FORCE_CUBLAS: no\n", __func__);
223223
//#endif // GGML_CUDA_FORCE_CUBLAS
224-
GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\nJust a moment, Please Be Patient...\n---\n");
224+
GGML_LOG_INFO("---\nInitializing CUDA/HIP, please wait, the following step may take a few minutes (only for first launch)...\n---\n");
225225
GGML_LOG_INFO("%s: found %d " GGML_CUDA_NAME " devices:\n", __func__, info.device_count);
226226
for (int id = 0; id < info.device_count; ++id) {
227227
int device_vmm = 0;

gpttype_adapter.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1942,8 +1942,8 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
19421942
{
19431943
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamp ignored in debug.\n");
19441944
} else {
1945-
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 16.\n");
1946-
kcpp_data->n_ubatch = 16;
1945+
printf("GLM-4 is broken on larger batch sizes in Vulkan. Clamping ubatch size to 8.\n");
1946+
kcpp_data->n_ubatch = 8;
19471947
}
19481948
}
19491949
#endif
@@ -2440,6 +2440,7 @@ ModelLoadResult gpttype_load_model(const load_model_inputs inputs, FileFormat in
24402440
add_bos_token = false;
24412441
}
24422442
}
2443+
printf("Starting model warm up, please wait a moment...\n");
24432444

24442445
//warmup at least 33 tokens to trigger batch
24452446
std::vector<int> tmp;

kcpp_adapters/AutoGuess.json

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -14,25 +14,25 @@
1414
"search": ["<|im_start|>assistant", "<|im_end|>", "You are provided with function signatures within <tools>"],
1515
"name": "ChatML (Qwen 2.5 based).",
1616
"adapter": {
17-
"system_start": "<|im_start|>system\n\n",
18-
"system_end": "<|im_end|>\n\n",
19-
"user_start": "<|im_start|>user\n\n",
20-
"user_end": "<|im_end|>\n\n",
21-
"assistant_start": "<|im_start|>assistant\n\n",
22-
"assistant_end": "<|im_end|>\n\n",
17+
"system_start": "<|im_start|>system\n",
18+
"system_end": "<|im_end|>\n",
19+
"user_start": "<|im_start|>user\n",
20+
"user_end": "<|im_end|>\n",
21+
"assistant_start": "<|im_start|>assistant\n",
22+
"assistant_end": "<|im_end|>\n",
2323
"tools_start": "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n\n<tools>\n",
2424
"tools_end": "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n"
2525
}
2626
}, {
2727
"search": ["<|im_start|>assistant", "<|im_end|>"],
2828
"name": "ChatML (Generic).",
2929
"adapter": {
30-
"system_start": "<|im_start|>system\n\n",
31-
"system_end": "<|im_end|>\n\n",
32-
"user_start": "<|im_start|>user\n\n",
33-
"user_end": "<|im_end|>\n\n",
34-
"assistant_start": "<|im_start|>assistant\n\n",
35-
"assistant_end": "<|im_end|>\n\n"
30+
"system_start": "<|im_start|>system\n",
31+
"system_end": "<|im_end|>\n",
32+
"user_start": "<|im_start|>user\n",
33+
"user_end": "<|im_end|>\n",
34+
"assistant_start": "<|im_start|>assistant\n",
35+
"assistant_end": "<|im_end|>\n"
3636
}
3737
}, {
3838
"search": ["System role not supported", "<start_of_turn>"],
@@ -61,11 +61,11 @@
6161
"name": "Llama 3.x.",
6262
"adapter": {
6363
"system_start": "<|start_header_id|>system<|end_header_id|>\n\n",
64-
"system_end": "<|eot_id|>\n\n",
64+
"system_end": "<|eot_id|>",
6565
"user_start": "<|start_header_id|>user<|end_header_id|>\n\n",
66-
"user_end": "<|eot_id|>\n\n",
66+
"user_end": "<|eot_id|>",
6767
"assistant_start": "<|start_header_id|>assistant<|end_header_id|>\n\n",
68-
"assistant_end": "<|eot_id|>\n\n"
68+
"assistant_end": "<|eot_id|>"
6969
}
7070
}, {
7171
"search": ["<|header_start|>assistant<|header_end|>"],
@@ -82,22 +82,22 @@
8282
"search": ["[/INST]", "[SYSTEM_PROMPT]"],
8383
"name": "Mistral V7 (with system prompt)",
8484
"adapter": {
85-
"system_start": "[SYSTEM_PROMPT] ",
85+
"system_start": "[SYSTEM_PROMPT]",
8686
"system_end": "[/SYSTEM_PROMPT]",
87-
"user_start": "[INST] ",
88-
"user_end": "[/INST]",
89-
"assistant_start": " ",
87+
"user_start": "[INST]",
88+
"user_end": "",
89+
"assistant_start": "[/INST]",
9090
"assistant_end": "</s>"
9191
}
9292
}, {
9393
"search": ["[/INST]", "\"[INST] \" + system_message"],
9494
"name": "Mistral V3",
9595
"adapter": {
9696
"system_start": "[INST] ",
97-
"system_end": "[/INST] ",
97+
"system_end": "[/INST]",
9898
"user_start": "[INST] ",
99-
"user_end": "[/INST] ",
100-
"assistant_start": "",
99+
"user_end": "",
100+
"assistant_start": "[/INST]",
101101
"assistant_end": "</s>"
102102
}
103103
}, {
@@ -107,8 +107,8 @@
107107
"system_start": "[INST]",
108108
"system_end": "[/INST]\n",
109109
"user_start": "[INST]",
110-
"user_end": "[/INST]\n",
111-
"assistant_start": "",
110+
"user_end": "",
111+
"assistant_start": "[/INST]\n",
112112
"assistant_end": "</s>"
113113
}
114114
}, {

0 commit comments

Comments
 (0)