Skip to content

Commit f944cc3

Browse files
Merge pull request #85 from orangethewell/patch-1
Update `llama_cpp_server.py` fixing bugs with non-streaming response
2 parents b4f2e52 + 574c2c0 commit f944cc3

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

src/llama_cpp_agent/providers/llama_cpp_server.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def create_completion(
200200
)
201201
data = response.json()
202202

203-
returned_data = {"choices": [{"text": data["content"]}]}
203+
returned_data = data # This follows the same structure used by agent
204204
return returned_data
205205

206206
def create_chat_completion(
@@ -309,7 +309,11 @@ def prepare_generation_settings(
309309
if not self.llama_cpp_python_server:
310310
settings_dictionary["mirostat"] = settings_dictionary.pop("mirostat_mode")
311311
if self.llama_cpp_python_server:
312+
# Max tokens shouldn't be -1
312313
settings_dictionary["max_tokens"] = settings_dictionary.pop("n_predict")
314+
if settings_dictionary["max_tokens"] == -1:
315+
settings_dictionary["max_tokens"] = 8192 # A good value for non-limited responses
316+
# But tests can be done in case of value stoping structured output generation
313317

314318
settings_dictionary["stop"] = settings_dictionary.pop(
315319
"additional_stop_sequences"

0 commit comments

Comments
 (0)