4
4
from rocksdict import ReadOptions
5
5
6
6
from quixstreams .state .base .transaction import PartitionTransaction
7
- from quixstreams .state .exceptions import InvalidChangelogOffset
8
7
from quixstreams .state .metadata import DEFAULT_PREFIX , PREFIX_SEPARATOR
9
8
from quixstreams .state .recovery import ChangelogProducer
10
- from quixstreams .state .serialization import (
11
- DumpsFunc ,
12
- LoadsFunc ,
13
- serialize ,
14
- )
9
+ from quixstreams .state .serialization import DumpsFunc , LoadsFunc , serialize
15
10
16
- from .metadata import LATEST_EXPIRED_WINDOW_CF_NAME , LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY
11
+ from .metadata import (
12
+ LATEST_EXPIRED_WINDOW_CF_NAME ,
13
+ LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY ,
14
+ LATEST_TIMESTAMP_KEY ,
15
+ LATEST_TIMESTAMPS_CF_NAME ,
16
+ )
17
17
from .serialization import encode_window_key , encode_window_prefix , parse_window_key
18
18
from .state import WindowedTransactionState
19
19
22
22
23
23
24
24
class WindowedRocksDBPartitionTransaction (PartitionTransaction ):
25
- __slots__ = ("_latest_timestamp_ms" ,)
26
-
27
25
def __init__ (
28
26
self ,
29
27
partition : "WindowedRocksDBStorePartition" ,
30
28
dumps : DumpsFunc ,
31
29
loads : LoadsFunc ,
32
- latest_timestamp_ms : int ,
33
30
changelog_producer : Optional [ChangelogProducer ] = None ,
34
31
):
35
32
super ().__init__ (
@@ -39,7 +36,11 @@ def __init__(
39
36
changelog_producer = changelog_producer ,
40
37
)
41
38
self ._partition = cast ("WindowedRocksDBStorePartition" , self ._partition )
42
- self ._latest_timestamp_ms = latest_timestamp_ms
39
+ # Cache the metadata separately to avoid serdes on each access
40
+ # (we are 100% sure that the underlying types are immutable, while windows'
41
+ # values are not)
42
+ self ._latest_timestamps : dict [bytes , int ] = {}
43
+ self ._last_expired_timestamps : dict [bytes , int ] = {}
43
44
44
45
def as_state (self , prefix : Any = DEFAULT_PREFIX ) -> WindowedTransactionState :
45
46
return WindowedTransactionState (
@@ -51,15 +52,19 @@ def as_state(self, prefix: Any = DEFAULT_PREFIX) -> WindowedTransactionState:
51
52
),
52
53
)
53
54
54
- def get_latest_timestamp (self ) -> int :
55
- return self ._latest_timestamp_ms
55
+ def get_latest_timestamp (self , prefix : bytes ) -> int :
56
+ cached_ts = self ._latest_timestamps .get (prefix )
57
+ if cached_ts is not None :
58
+ return cached_ts
56
59
57
- def _validate_duration (self , start_ms : int , end_ms : int ):
58
- if end_ms <= start_ms :
59
- raise ValueError (
60
- f"Invalid window duration: window end { end_ms } is smaller or equal "
61
- f"than window start { start_ms } "
62
- )
60
+ stored_ts = self .get (
61
+ key = LATEST_TIMESTAMP_KEY ,
62
+ prefix = prefix ,
63
+ cf_name = LATEST_TIMESTAMPS_CF_NAME ,
64
+ default = 0 ,
65
+ )
66
+ self ._latest_timestamps [prefix ] = stored_ts
67
+ return stored_ts
63
68
64
69
def get_window (
65
70
self ,
@@ -81,34 +86,16 @@ def update_window(
81
86
82
87
key = encode_window_key (start_ms , end_ms )
83
88
self .set (key = key , value = value , prefix = prefix )
84
- self ._latest_timestamp_ms = max (self ._latest_timestamp_ms , timestamp_ms )
89
+ latest_timestamp_ms = self .get_latest_timestamp (prefix = prefix )
90
+ self ._set_latest_timestamp (
91
+ prefix = prefix , timestamp_ms = max (latest_timestamp_ms , timestamp_ms )
92
+ )
85
93
86
94
def delete_window (self , start_ms : int , end_ms : int , prefix : bytes ):
87
95
self ._validate_duration (start_ms = start_ms , end_ms = end_ms )
88
96
key = encode_window_key (start_ms , end_ms )
89
97
self .delete (key = key , prefix = prefix )
90
98
91
- def _flush (self , processed_offset : Optional [int ], changelog_offset : Optional [int ]):
92
- if self ._update_cache .is_empty ():
93
- return
94
-
95
- if changelog_offset is not None :
96
- current_changelog_offset = self ._partition .get_changelog_offset ()
97
- if (
98
- current_changelog_offset is not None
99
- and changelog_offset < current_changelog_offset
100
- ):
101
- raise InvalidChangelogOffset (
102
- "Cannot set changelog offset lower than already saved one"
103
- )
104
-
105
- self ._partition .write (
106
- cache = self ._update_cache ,
107
- processed_offset = processed_offset ,
108
- changelog_offset = changelog_offset ,
109
- latest_timestamp_ms = self ._latest_timestamp_ms ,
110
- )
111
-
112
99
def expire_windows (
113
100
self , duration_ms : int , prefix : bytes , grace_ms : int = 0
114
101
) -> list [tuple [tuple [int , int ], Any ]]:
@@ -134,16 +121,12 @@ def expire_windows(
134
121
Defaults to 0, meaning no grace period is applied.
135
122
:return: A generator that yields sorted tuples in the format `((start, end), value)`.
136
123
"""
137
- latest_timestamp = self ._latest_timestamp_ms
124
+ latest_timestamp = self .get_latest_timestamp ( prefix = prefix )
138
125
start_to = latest_timestamp - duration_ms - grace_ms
139
126
start_from = - 1
140
127
141
128
# Find the latest start timestamp of the expired windows for the given key
142
- last_expired = self .get (
143
- key = LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY ,
144
- prefix = prefix ,
145
- cf_name = LATEST_EXPIRED_WINDOW_CF_NAME ,
146
- )
129
+ last_expired = self ._get_last_expired_timestamp (prefix = prefix )
147
130
if last_expired is not None :
148
131
start_from = max (start_from , last_expired )
149
132
@@ -160,22 +143,15 @@ def expire_windows(
160
143
# Save the start of the latest expired window to the expiration index
161
144
latest_window = expired_windows [- 1 ]
162
145
last_expired__gt = latest_window [0 ][0 ]
163
- self .set (
164
- key = LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY ,
165
- value = last_expired__gt ,
166
- prefix = prefix ,
167
- cf_name = LATEST_EXPIRED_WINDOW_CF_NAME ,
146
+
147
+ self ._set_last_expired_timestamp (
148
+ prefix = prefix , timestamp_ms = last_expired__gt
168
149
)
169
150
# Delete expired windows from the state
170
151
for (start , end ), _ in expired_windows :
171
152
self .delete_window (start , end , prefix = prefix )
172
153
return expired_windows
173
154
174
- def _serialize_key (self , key : Any , prefix : bytes ) -> bytes :
175
- # Allow bytes keys in WindowedStore
176
- key_bytes = key if isinstance (key , bytes ) else serialize (key , dumps = self ._dumps )
177
- return prefix + PREFIX_SEPARATOR + key_bytes
178
-
179
155
def get_windows (
180
156
self ,
181
157
start_from_ms : int ,
@@ -240,3 +216,46 @@ def get_windows(
240
216
result .append (((start , end ), value ))
241
217
242
218
return result
219
+
220
+ def _set_latest_timestamp (self , prefix : bytes , timestamp_ms : int ):
221
+ self ._latest_timestamps [prefix ] = timestamp_ms
222
+ self .set (
223
+ key = LATEST_TIMESTAMP_KEY ,
224
+ value = timestamp_ms ,
225
+ prefix = prefix ,
226
+ cf_name = LATEST_TIMESTAMPS_CF_NAME ,
227
+ )
228
+
229
+ def _get_last_expired_timestamp (self , prefix : bytes ) -> Optional [int ]:
230
+ cached_ts = self ._last_expired_timestamps .get (prefix )
231
+ if cached_ts is not None :
232
+ return cached_ts
233
+
234
+ stored_ts = self .get (
235
+ key = LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY ,
236
+ prefix = prefix ,
237
+ cf_name = LATEST_EXPIRED_WINDOW_CF_NAME ,
238
+ )
239
+ self ._last_expired_timestamps [prefix ] = stored_ts
240
+ return stored_ts
241
+
242
+ def _set_last_expired_timestamp (self , prefix : bytes , timestamp_ms : int ):
243
+ self ._last_expired_timestamps [prefix ] = timestamp_ms
244
+ self .set (
245
+ key = LATEST_EXPIRED_WINDOW_TIMESTAMP_KEY ,
246
+ value = timestamp_ms ,
247
+ prefix = prefix ,
248
+ cf_name = LATEST_EXPIRED_WINDOW_CF_NAME ,
249
+ )
250
+
251
+ def _validate_duration (self , start_ms : int , end_ms : int ):
252
+ if end_ms <= start_ms :
253
+ raise ValueError (
254
+ f"Invalid window duration: window end { end_ms } is smaller or equal "
255
+ f"than window start { start_ms } "
256
+ )
257
+
258
+ def _serialize_key (self , key : Any , prefix : bytes ) -> bytes :
259
+ # Allow bytes keys in WindowedStore
260
+ key_bytes = key if isinstance (key , bytes ) else serialize (key , dumps = self ._dumps )
261
+ return prefix + PREFIX_SEPARATOR + key_bytes
0 commit comments