19
19
from dataclasses import dataclass
20
20
from typing import (
21
21
Any ,
22
+ Dict ,
22
23
Iterable ,
23
24
Iterator ,
24
25
List ,
25
26
Optional ,
26
27
Sequence ,
28
+ Tuple ,
27
29
Type ,
30
+ Union ,
28
31
TYPE_CHECKING ,
29
32
)
30
33
46
49
"DataSourceStreamWriter" ,
47
50
"DataSourceRegistration" ,
48
51
"InputPartition" ,
52
+ "SimpleDataSourceStreamReader" ,
49
53
"WriterCommitMessage" ,
50
54
"Filter" ,
51
55
"EqualTo" ,
@@ -80,7 +84,7 @@ class DataSource(ABC):
80
84
.. versionadded: 4.0.0
81
85
"""
82
86
83
- def __init__ (self , options : dict [str , str ]) -> None :
87
+ def __init__ (self , options : Dict [str , str ]) -> None :
84
88
"""
85
89
Initializes the data source with user-provided options.
86
90
@@ -110,7 +114,7 @@ def name(cls) -> str:
110
114
"""
111
115
return cls .__name__
112
116
113
- def schema (self ) -> StructType | str :
117
+ def schema (self ) -> Union [ StructType , str ] :
114
118
"""
115
119
Returns the schema of the data source.
116
120
@@ -257,7 +261,7 @@ def streamReader(self, schema: StructType) -> "DataSourceStreamReader":
257
261
)
258
262
259
263
260
- ColumnPath = tuple [str , ...]
264
+ ColumnPath = Tuple [str , ...]
261
265
"""
262
266
A tuple of strings representing a column reference.
263
267
@@ -403,7 +407,7 @@ class In(Filter):
403
407
"""
404
408
405
409
attribute : ColumnPath
406
- value : tuple [Any , ...]
410
+ value : Tuple [Any , ...]
407
411
408
412
409
413
@dataclass (frozen = True )
@@ -627,7 +631,7 @@ def partitions(self) -> Sequence[InputPartition]:
627
631
)
628
632
629
633
@abstractmethod
630
- def read (self , partition : InputPartition ) -> Iterator [tuple ] | Iterator ["RecordBatch" ]:
634
+ def read (self , partition : InputPartition ) -> Union [ Iterator [Tuple ], Iterator ["RecordBatch" ] ]:
631
635
"""
632
636
Generates data for a given partition and returns an iterator of tuples or rows.
633
637
@@ -756,7 +760,7 @@ def partitions(self, start: dict, end: dict) -> Sequence[InputPartition]:
756
760
)
757
761
758
762
@abstractmethod
759
- def read (self , partition : InputPartition ) -> Iterator [tuple ] | Iterator ["RecordBatch" ]:
763
+ def read (self , partition : InputPartition ) -> Union [ Iterator [Tuple ], Iterator ["RecordBatch" ] ]:
760
764
"""
761
765
Generates data for a given partition and returns an iterator of tuples or rows.
762
766
@@ -848,7 +852,7 @@ def initialOffset(self) -> dict:
848
852
messageParameters = {"feature" : "initialOffset" },
849
853
)
850
854
851
- def read (self , start : dict ) -> tuple [Iterator [tuple ], dict ]:
855
+ def read (self , start : dict ) -> Tuple [Iterator [Tuple ], dict ]:
852
856
"""
853
857
Read all available data from start offset and return the offset that next read attempt
854
858
starts from.
@@ -860,7 +864,7 @@ def read(self, start: dict) -> tuple[Iterator[tuple], dict]:
860
864
861
865
Returns
862
866
-------
863
- A :class:`tuple ` of an iterator of :class:`tuple ` and a dict\\ s
867
+ A :class:`Tuple ` of an iterator of :class:`Tuple ` and a dict\\ s
864
868
The iterator contains all the available records after start offset.
865
869
The dict is the end offset of this read attempt and the start of next read attempt.
866
870
"""
@@ -869,7 +873,7 @@ def read(self, start: dict) -> tuple[Iterator[tuple], dict]:
869
873
messageParameters = {"feature" : "read" },
870
874
)
871
875
872
- def readBetweenOffsets (self , start : dict , end : dict ) -> Iterator [tuple ]:
876
+ def readBetweenOffsets (self , start : dict , end : dict ) -> Iterator [Tuple ]:
873
877
"""
874
878
Read all available data from specific start offset and end offset.
875
879
This is invoked during failure recovery to re-read a batch deterministically.
@@ -884,7 +888,7 @@ def readBetweenOffsets(self, start: dict, end: dict) -> Iterator[tuple]:
884
888
885
889
Returns
886
890
-------
887
- iterator of :class:`tuple `\\ s
891
+ iterator of :class:`Tuple `\\ s
888
892
All the records between start offset and end offset.
889
893
"""
890
894
raise PySparkNotImplementedError (
0 commit comments