File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -87,21 +87,26 @@ async def create_completion_with_chunkwise_beam(
87
87
"""
88
88
Chunkwise beam search hack
89
89
"""
90
+
90
91
async def _process_prefix (request : CompletionRequest ):
91
92
og_max_tokens = request .max_tokens
92
93
og_n = request .n
93
- request .max_tokens = 1
94
+ request .max_tokens = 0
94
95
request .n = 1
96
+ request .echo = True
97
+ request .stream = False
95
98
res = await self .create_completion (
96
99
request ,
97
100
raw_request = raw_request ,
98
101
)
99
102
request .max_tokens = og_max_tokens
100
103
request .n = og_n
104
+ request .echo = False
105
+ request .stream = True
101
106
return res
102
107
103
108
res = await _process_prefix (request )
104
- input_str_len = len (request . prompt )
109
+ input_str_len = len (res . choices [ 0 ]. text )
105
110
106
111
async def _should_stop (final ):
107
112
return final .choices [0 ].finish_reason == "stop" or final .choices [0 ].is_filtered
@@ -121,6 +126,8 @@ async def _chunk_generator():
121
126
should_stop = await _should_stop (final )
122
127
final .choices [0 ].text = final .choices [0 ].text [input_str_len :]
123
128
output = final .choices [0 ].text
129
+ if self .request_logger :
130
+ logger .info (f"yielding chunk { num_chunks } text: { final .choices [0 ].text } " )
124
131
yield f"data: { final .model_dump_json ()} \n \n "
125
132
126
133
if should_stop :
You can’t perform that action at this time.
0 commit comments