@@ -669,14 +669,16 @@ async def chat_completion_stream_generator(
669
669
and not reasoning_parser .is_reasoning_end (
670
670
previous_token_ids )):
671
671
assert reasoning_parser is not None
672
- delta_message = reasoning_parser .extract_reasoning_content_streaming (
673
- previous_text ,
674
- current_text ,
675
- delta_text ,
676
- previous_token_ids ,
677
- current_token_ids ,
678
- output .token_ids ,
679
- )
672
+ delta_message = (
673
+ reasoning_parser .
674
+ extract_reasoning_content_streaming (
675
+ previous_text ,
676
+ current_text ,
677
+ delta_text ,
678
+ previous_token_ids ,
679
+ current_token_ids ,
680
+ output .token_ids ,
681
+ ))
680
682
# When encountering think end id in delta_token_ids,
681
683
# process the `content`. Only keep 'content',
682
684
# remove 'reasoning_content'
@@ -747,14 +749,16 @@ async def chat_completion_stream_generator(
747
749
assert added_content_delta_arr is not None
748
750
assert reasoning_end_arr is not None
749
751
if not reasoning_end_arr [i ]:
750
- delta_message = reasoning_parser .extract_reasoning_content_streaming (
751
- previous_text ,
752
- current_text ,
753
- delta_text ,
754
- previous_token_ids ,
755
- current_token_ids ,
756
- output .token_ids ,
757
- )
752
+ delta_message = (
753
+ reasoning_parser .
754
+ extract_reasoning_content_streaming (
755
+ previous_text ,
756
+ current_text ,
757
+ delta_text ,
758
+ previous_token_ids ,
759
+ current_token_ids ,
760
+ output .token_ids ,
761
+ ))
758
762
# When encountering think end id in prompt_token_ids
759
763
# i.e {"enable_thinking": False},
760
764
# set reasoning status to end.
@@ -824,14 +828,15 @@ async def chat_completion_stream_generator(
824
828
))
825
829
# when only reasoning
826
830
elif self .reasoning_parser :
827
- delta_message = reasoning_parser .extract_reasoning_content_streaming (
828
- previous_text ,
829
- current_text ,
830
- delta_text ,
831
- previous_token_ids ,
832
- current_token_ids ,
833
- output .token_ids ,
834
- )
831
+ delta_message = (reasoning_parser .
832
+ extract_reasoning_content_streaming (
833
+ previous_text ,
834
+ current_text ,
835
+ delta_text ,
836
+ previous_token_ids ,
837
+ current_token_ids ,
838
+ output .token_ids ,
839
+ ))
835
840
# handle streaming just a content delta
836
841
else :
837
842
delta_message = DeltaMessage (content = delta_text )
0 commit comments