16
16
Callable ,
17
17
Generic ,
18
18
Iterable ,
19
- Literal ,
20
19
Sequence ,
21
20
Type ,
22
21
TypeAlias ,
23
22
TypeVar ,
24
- overload ,
25
23
)
26
24
from typing_extensions import (
27
25
# Native in 3.11+
89
87
RemoteCallHandler ,
90
88
ResponseSchema ,
91
89
TModelInfo ,
92
- TPrediction ,
93
90
check_model_namespace ,
94
91
load_struct ,
95
92
_model_spec_to_api_dict ,
@@ -902,24 +899,23 @@ async def _fetch_file_handle(self, file_data: _LocalFileData) -> FileHandle:
902
899
return await self ._files_session ._fetch_file_handle (file_data )
903
900
904
901
905
- AsyncPredictionChannel : TypeAlias = AsyncChannel [PredictionResult [ T ] ]
906
- AsyncPredictionCM : TypeAlias = AsyncContextManager [AsyncPredictionChannel [ T ] ]
902
+ AsyncPredictionChannel : TypeAlias = AsyncChannel [PredictionResult ]
903
+ AsyncPredictionCM : TypeAlias = AsyncContextManager [AsyncPredictionChannel ]
907
904
908
905
909
- class AsyncPredictionStream (PredictionStreamBase [ TPrediction ] ):
906
+ class AsyncPredictionStream (PredictionStreamBase ):
910
907
"""Async context manager for an ongoing prediction process."""
911
908
912
909
def __init__ (
913
910
self ,
914
- channel_cm : AsyncPredictionCM [ TPrediction ] ,
915
- endpoint : PredictionEndpoint [ TPrediction ] ,
911
+ channel_cm : AsyncPredictionCM ,
912
+ endpoint : PredictionEndpoint ,
916
913
) -> None :
917
914
"""Initialize a prediction process representation."""
918
915
self ._resource_manager = AsyncExitStack ()
919
- self ._channel_cm : AsyncPredictionCM [TPrediction ] = channel_cm
920
- self ._channel : AsyncPredictionChannel [TPrediction ] | None = None
921
- # See comments in BasePrediction regarding not calling super().__init__() here
922
- self ._init_prediction (endpoint )
916
+ self ._channel_cm : AsyncPredictionCM = channel_cm
917
+ self ._channel : AsyncPredictionChannel | None = None
918
+ super ().__init__ (endpoint )
923
919
924
920
@sdk_public_api_async ()
925
921
async def start (self ) -> None :
@@ -976,7 +972,7 @@ async def __aiter__(self) -> AsyncIterator[LlmPredictionFragment]:
976
972
self ._mark_finished ()
977
973
978
974
@sdk_public_api_async ()
979
- async def wait_for_result (self ) -> PredictionResult [ TPrediction ] :
975
+ async def wait_for_result (self ) -> PredictionResult :
980
976
"""Wait for the result of the prediction."""
981
977
async for _ in self :
982
978
pass
@@ -1011,34 +1007,6 @@ def _create_handle(self, model_identifier: str) -> "AsyncLLM":
1011
1007
"""Create a symbolic handle to the specified LLM model."""
1012
1008
return AsyncLLM (model_identifier , self )
1013
1009
1014
- @overload
1015
- async def _complete_stream (
1016
- self ,
1017
- model_specifier : AnyModelSpecifier ,
1018
- prompt : str ,
1019
- * ,
1020
- response_format : Literal [None ] = ...,
1021
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1022
- preset : str | None = ...,
1023
- on_message : PredictionMessageCallback | None = ...,
1024
- on_first_token : PredictionFirstTokenCallback | None = ...,
1025
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1026
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1027
- ) -> AsyncPredictionStream [str ]: ...
1028
- @overload
1029
- async def _complete_stream (
1030
- self ,
1031
- model_specifier : AnyModelSpecifier ,
1032
- prompt : str ,
1033
- * ,
1034
- response_format : ResponseSchema = ...,
1035
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1036
- preset : str | None = ...,
1037
- on_message : PredictionMessageCallback | None = ...,
1038
- on_first_token : PredictionFirstTokenCallback | None = ...,
1039
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1040
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1041
- ) -> AsyncPredictionStream [DictObject ]: ...
1042
1010
async def _complete_stream (
1043
1011
self ,
1044
1012
model_specifier : AnyModelSpecifier ,
@@ -1051,7 +1019,7 @@ async def _complete_stream(
1051
1019
on_first_token : PredictionFirstTokenCallback | None = None ,
1052
1020
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1053
1021
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1054
- ) -> AsyncPredictionStream [ str ] | AsyncPredictionStream [ DictObject ] :
1022
+ ) -> AsyncPredictionStream :
1055
1023
"""Request a one-off prediction without any context and stream the generated tokens.
1056
1024
1057
1025
Note: details of configuration fields may change in SDK feature releases.
@@ -1071,34 +1039,6 @@ async def _complete_stream(
1071
1039
prediction_stream = AsyncPredictionStream (channel_cm , endpoint )
1072
1040
return prediction_stream
1073
1041
1074
- @overload
1075
- async def _respond_stream (
1076
- self ,
1077
- model_specifier : AnyModelSpecifier ,
1078
- history : Chat | ChatHistoryDataDict | str ,
1079
- * ,
1080
- response_format : Literal [None ] = ...,
1081
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1082
- preset : str | None = ...,
1083
- on_message : PredictionMessageCallback | None = ...,
1084
- on_first_token : PredictionFirstTokenCallback | None = ...,
1085
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1086
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1087
- ) -> AsyncPredictionStream [str ]: ...
1088
- @overload
1089
- async def _respond_stream (
1090
- self ,
1091
- model_specifier : AnyModelSpecifier ,
1092
- history : Chat | ChatHistoryDataDict | str ,
1093
- * ,
1094
- response_format : ResponseSchema = ...,
1095
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1096
- preset : str | None = ...,
1097
- on_message : PredictionMessageCallback | None = ...,
1098
- on_first_token : PredictionFirstTokenCallback | None = ...,
1099
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1100
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1101
- ) -> AsyncPredictionStream [DictObject ]: ...
1102
1042
async def _respond_stream (
1103
1043
self ,
1104
1044
model_specifier : AnyModelSpecifier ,
@@ -1111,7 +1051,7 @@ async def _respond_stream(
1111
1051
on_first_token : PredictionFirstTokenCallback | None = None ,
1112
1052
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1113
1053
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1114
- ) -> AsyncPredictionStream [ str ] | AsyncPredictionStream [ DictObject ] :
1054
+ ) -> AsyncPredictionStream :
1115
1055
"""Request a response in an ongoing assistant chat session and stream the generated tokens.
1116
1056
1117
1057
Note: details of configuration fields may change in SDK feature releases.
@@ -1250,32 +1190,6 @@ async def get_context_length(self) -> int:
1250
1190
class AsyncLLM (AsyncModelHandle [AsyncSessionLlm ]):
1251
1191
"""Reference to a loaded LLM model."""
1252
1192
1253
- @overload
1254
- async def complete_stream (
1255
- self ,
1256
- prompt : str ,
1257
- * ,
1258
- response_format : Literal [None ] = ...,
1259
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1260
- preset : str | None = ...,
1261
- on_message : PredictionMessageCallback | None = ...,
1262
- on_first_token : PredictionFirstTokenCallback | None = ...,
1263
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1264
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1265
- ) -> AsyncPredictionStream [str ]: ...
1266
- @overload
1267
- async def complete_stream (
1268
- self ,
1269
- prompt : str ,
1270
- * ,
1271
- response_format : ResponseSchema = ...,
1272
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1273
- preset : str | None = ...,
1274
- on_message : PredictionMessageCallback | None = ...,
1275
- on_first_token : PredictionFirstTokenCallback | None = ...,
1276
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1277
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1278
- ) -> AsyncPredictionStream [DictObject ]: ...
1279
1193
@sdk_public_api_async ()
1280
1194
async def complete_stream (
1281
1195
self ,
@@ -1288,7 +1202,7 @@ async def complete_stream(
1288
1202
on_first_token : PredictionFirstTokenCallback | None = None ,
1289
1203
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1290
1204
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1291
- ) -> AsyncPredictionStream [ str ] | AsyncPredictionStream [ DictObject ] :
1205
+ ) -> AsyncPredictionStream :
1292
1206
"""Request a one-off prediction without any context and stream the generated tokens.
1293
1207
1294
1208
Note: details of configuration fields may change in SDK feature releases.
@@ -1305,32 +1219,6 @@ async def complete_stream(
1305
1219
on_prompt_processing_progress = on_prompt_processing_progress ,
1306
1220
)
1307
1221
1308
- @overload
1309
- async def complete (
1310
- self ,
1311
- prompt : str ,
1312
- * ,
1313
- response_format : Literal [None ] = ...,
1314
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1315
- preset : str | None = ...,
1316
- on_message : PredictionMessageCallback | None = ...,
1317
- on_first_token : PredictionFirstTokenCallback | None = ...,
1318
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1319
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1320
- ) -> PredictionResult [str ]: ...
1321
- @overload
1322
- async def complete (
1323
- self ,
1324
- prompt : str ,
1325
- * ,
1326
- response_format : ResponseSchema = ...,
1327
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1328
- preset : str | None = ...,
1329
- on_message : PredictionMessageCallback | None = ...,
1330
- on_first_token : PredictionFirstTokenCallback | None = ...,
1331
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1332
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1333
- ) -> PredictionResult [DictObject ]: ...
1334
1222
@sdk_public_api_async ()
1335
1223
async def complete (
1336
1224
self ,
@@ -1343,7 +1231,7 @@ async def complete(
1343
1231
on_first_token : PredictionFirstTokenCallback | None = None ,
1344
1232
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1345
1233
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1346
- ) -> PredictionResult [ str ] | PredictionResult [ DictObject ] :
1234
+ ) -> PredictionResult :
1347
1235
"""Request a one-off prediction without any context.
1348
1236
1349
1237
Note: details of configuration fields may change in SDK feature releases.
@@ -1365,32 +1253,6 @@ async def complete(
1365
1253
pass
1366
1254
return prediction_stream .result ()
1367
1255
1368
- @overload
1369
- async def respond_stream (
1370
- self ,
1371
- history : Chat | ChatHistoryDataDict | str ,
1372
- * ,
1373
- response_format : Literal [None ] = ...,
1374
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1375
- preset : str | None = ...,
1376
- on_message : PredictionMessageCallback | None = ...,
1377
- on_first_token : PredictionFirstTokenCallback | None = ...,
1378
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1379
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1380
- ) -> AsyncPredictionStream [str ]: ...
1381
- @overload
1382
- async def respond_stream (
1383
- self ,
1384
- history : Chat | ChatHistoryDataDict | str ,
1385
- * ,
1386
- response_format : ResponseSchema = ...,
1387
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1388
- preset : str | None = ...,
1389
- on_message : PredictionMessageCallback | None = ...,
1390
- on_first_token : PredictionFirstTokenCallback | None = ...,
1391
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1392
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1393
- ) -> AsyncPredictionStream [DictObject ]: ...
1394
1256
@sdk_public_api_async ()
1395
1257
async def respond_stream (
1396
1258
self ,
@@ -1403,7 +1265,7 @@ async def respond_stream(
1403
1265
on_first_token : PredictionFirstTokenCallback | None = None ,
1404
1266
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1405
1267
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1406
- ) -> AsyncPredictionStream [ str ] | AsyncPredictionStream [ DictObject ] :
1268
+ ) -> AsyncPredictionStream :
1407
1269
"""Request a response in an ongoing assistant chat session and stream the generated tokens.
1408
1270
1409
1271
Note: details of configuration fields may change in SDK feature releases.
@@ -1420,32 +1282,6 @@ async def respond_stream(
1420
1282
on_prompt_processing_progress = on_prompt_processing_progress ,
1421
1283
)
1422
1284
1423
- @overload
1424
- async def respond (
1425
- self ,
1426
- history : Chat | ChatHistoryDataDict | str ,
1427
- * ,
1428
- response_format : Literal [None ] = ...,
1429
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1430
- preset : str | None = ...,
1431
- on_message : PredictionMessageCallback | None = ...,
1432
- on_first_token : PredictionFirstTokenCallback | None = ...,
1433
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1434
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1435
- ) -> PredictionResult [str ]: ...
1436
- @overload
1437
- async def respond (
1438
- self ,
1439
- history : Chat | ChatHistoryDataDict | str ,
1440
- * ,
1441
- response_format : ResponseSchema = ...,
1442
- config : LlmPredictionConfig | LlmPredictionConfigDict | None = ...,
1443
- preset : str | None = ...,
1444
- on_message : PredictionMessageCallback | None = ...,
1445
- on_first_token : PredictionFirstTokenCallback | None = ...,
1446
- on_prediction_fragment : PredictionFragmentCallback | None = ...,
1447
- on_prompt_processing_progress : PromptProcessingCallback | None = ...,
1448
- ) -> PredictionResult [DictObject ]: ...
1449
1285
@sdk_public_api_async ()
1450
1286
async def respond (
1451
1287
self ,
@@ -1458,7 +1294,7 @@ async def respond(
1458
1294
on_first_token : PredictionFirstTokenCallback | None = None ,
1459
1295
on_prediction_fragment : PredictionFragmentCallback | None = None ,
1460
1296
on_prompt_processing_progress : PromptProcessingCallback | None = None ,
1461
- ) -> PredictionResult [ str ] | PredictionResult [ DictObject ] :
1297
+ ) -> PredictionResult :
1462
1298
"""Request a response in an ongoing assistant chat session.
1463
1299
1464
1300
Note: details of configuration fields may change in SDK feature releases.
0 commit comments