File tree Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Expand file tree Collapse file tree 1 file changed +9
-2
lines changed Original file line number Diff line number Diff line change @@ -84,21 +84,26 @@ async def create_completion_with_chunkwise_beam(
84
84
"""
85
85
Chunkwise beam search hack
86
86
"""
87
+
87
88
async def _process_prefix (request : CompletionRequest ):
88
89
og_max_tokens = request .max_tokens
89
90
og_n = request .n
90
- request .max_tokens = 1
91
+ request .max_tokens = 0
91
92
request .n = 1
93
+ request .echo = True
94
+ request .stream = False
92
95
res = await self .create_completion (
93
96
request ,
94
97
raw_request = raw_request ,
95
98
)
96
99
request .max_tokens = og_max_tokens
97
100
request .n = og_n
101
+ request .echo = False
102
+ request .stream = True
98
103
return res
99
104
100
105
res = await _process_prefix (request )
101
- input_str_len = len (request . prompt )
106
+ input_str_len = len (res . choices [ 0 ]. text )
102
107
103
108
async def _should_stop (final ):
104
109
return final .choices [0 ].finish_reason == "stop" or final .choices [0 ].is_filtered
@@ -118,6 +123,8 @@ async def _chunk_generator():
118
123
should_stop = await _should_stop (final )
119
124
final .choices [0 ].text = final .choices [0 ].text [input_str_len :]
120
125
output = final .choices [0 ].text
126
+ if self .request_logger :
127
+ logger .info (f"yielding chunk { num_chunks } text: { final .choices [0 ].text } " )
121
128
yield f"data: { final .model_dump_json ()} \n \n "
122
129
123
130
if should_stop :
You can’t perform that action at this time.
0 commit comments