@@ -126,10 +126,10 @@ def mocked_requests_post(url: str, **kwargs: Any) -> MockResponse:
126
126
def test_invoke_vllm (* args : Any ) -> None :
127
127
"""Tests invoking vLLM endpoint."""
128
128
llm = ChatOCIModelDeploymentVLLM (endpoint = CONST_ENDPOINT , model = CONST_MODEL_NAME )
129
+ assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
129
130
output = llm .invoke (CONST_PROMPT )
130
131
assert isinstance (output , AIMessage )
131
132
assert output .content == CONST_COMPLETION
132
- assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
133
133
134
134
135
135
@pytest .mark .requires ("ads" )
@@ -139,10 +139,10 @@ def test_invoke_vllm(*args: Any) -> None:
139
139
def test_invoke_tgi (* args : Any ) -> None :
140
140
"""Tests invoking TGI endpoint using OpenAI Spec."""
141
141
llm = ChatOCIModelDeploymentTGI (endpoint = CONST_ENDPOINT , model = CONST_MODEL_NAME )
142
+ assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
142
143
output = llm .invoke (CONST_PROMPT )
143
144
assert isinstance (output , AIMessage )
144
145
assert output .content == CONST_COMPLETION
145
- assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
146
146
147
147
148
148
@pytest .mark .requires ("ads" )
@@ -154,6 +154,7 @@ def test_stream_vllm(*args: Any) -> None:
154
154
llm = ChatOCIModelDeploymentVLLM (
155
155
endpoint = CONST_ENDPOINT , model = CONST_MODEL_NAME , streaming = True
156
156
)
157
+ assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
157
158
output = None
158
159
count = 0
159
160
for chunk in llm .stream (CONST_PROMPT ):
@@ -167,7 +168,6 @@ def test_stream_vllm(*args: Any) -> None:
167
168
assert output is not None
168
169
if output is not None :
169
170
assert str (output .content ).strip () == CONST_COMPLETION
170
- assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
171
171
172
172
173
173
async def mocked_async_streaming_response (
@@ -193,11 +193,11 @@ async def test_stream_async(*args: Any) -> None:
193
193
llm = ChatOCIModelDeploymentVLLM (
194
194
endpoint = CONST_ENDPOINT , model = CONST_MODEL_NAME , streaming = True
195
195
)
196
+ assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
196
197
with mock .patch .object (
197
198
llm ,
198
199
"_aiter_sse" ,
199
200
mock .MagicMock (return_value = mocked_async_streaming_response ()),
200
201
):
201
202
chunks = [str (chunk .content ) async for chunk in llm .astream (CONST_PROMPT )]
202
203
assert "" .join (chunks ).strip () == CONST_COMPLETION
203
- assert llm .headers == {"route" : DEFAULT_INFERENCE_ENDPOINT_CHAT }
0 commit comments