File tree Expand file tree Collapse file tree 1 file changed +5
-1
lines changed
src/llama_cpp_agent/providers Expand file tree Collapse file tree 1 file changed +5
-1
lines changed Original file line number Diff line number Diff line change @@ -200,7 +200,7 @@ def create_completion(
200
200
)
201
201
data = response .json ()
202
202
203
- returned_data = { "choices" : [{ "text" : data [ "content" ]}]}
203
+ returned_data = data # This follows the same structure used by agent
204
204
return returned_data
205
205
206
206
def create_chat_completion (
@@ -309,7 +309,11 @@ def prepare_generation_settings(
309
309
if not self .llama_cpp_python_server :
310
310
settings_dictionary ["mirostat" ] = settings_dictionary .pop ("mirostat_mode" )
311
311
if self .llama_cpp_python_server :
312
+ # Max tokens shouldn't be -1
312
313
settings_dictionary ["max_tokens" ] = settings_dictionary .pop ("n_predict" )
314
+ if settings_dictionary ["max_tokens" ] == - 1 :
315
+ settings_dictionary ["max_tokens" ] = 8192 # A good value for non-limited responses
316
+ # But tests can be done in case of value stoping structured output generation
313
317
314
318
settings_dictionary ["stop" ] = settings_dictionary .pop (
315
319
"additional_stop_sequences"
You can’t perform that action at this time.
0 commit comments