Skip to content

Commit 4084bf4

Browse files
authored
Refactor WindowedRocksDBPartitionTransaction.get_windows (#558)
* Create store and transaction_state fixtures for TestWindowedRocksDBPartitionTransactionState Moved some common parts to fixtures * Remove TestWindowedRocksDBPartitionTransactionState namespace This namespace is redundant, there are no other classes in this module. * Add test case for update cache precedence over db * Make WindowedRocksDBPartitionTransaction.get_windows a public method * Refactor WindowedRocksDBPartitionTransaction.get_windows into a generator This change brings ~35% performance improvement tested under conditions: * 100k windows in RocksDB of which * 50k windows in update cache of which * 25k is marked as deleted * Add missing prefix param in docstring * Enable backwards iteration from get_windows * Introduce pytest.mark.timeit and the --timeit argument Performance testing is crucial for our work. These tests help future-proof our efforts to stay ahead in terms of performance. However, we don't want these tests to run alongside the rest of the test suite by default. * Add or correct docstrings and comments * Rollback get_windows to return list * Correct typing and docstring of expire_windows
1 parent a83a792 commit 4084bf4

File tree

6 files changed

+454
-130
lines changed

6 files changed

+454
-130
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,3 +81,5 @@ addopts = "--log-disable=urllib3.connectionpool --log-disable=parso --log-disabl
8181
log_cli = true
8282
log_cli_level = "INFO"
8383
log_cli_format = "[%(levelname)s] %(name)s: %(message)s"
84+
# Custom markers
85+
markers = ["timeit"]

quixstreams/state/rocksdb/windowed/state.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from typing import Any, Optional, List, Tuple, TYPE_CHECKING
1+
from typing import Any, Optional, TYPE_CHECKING
22

33
from quixstreams.state.types import WindowedState
44

@@ -71,15 +71,37 @@ def get_latest_timestamp(self) -> int:
7171

7272
def expire_windows(
7373
self, duration_ms: int, grace_ms: int = 0
74-
) -> List[Tuple[Tuple[int, int], Any]]:
74+
) -> list[tuple[tuple[int, int], Any]]:
7575
"""
76-
Get a list of expired windows from RocksDB considering the current
77-
latest timestamp, window duration and grace period.
76+
Get all expired windows from RocksDB based on the latest timestamp,
77+
window duration, and an optional grace period.
7878
79-
It also marks the latest found window as expired in the expiration index, so
80-
calling this method multiple times will yield different results for the same
81-
"latest timestamp".
79+
This method marks the latest found window as expired in the expiration index,
80+
so consecutive calls may yield different results for the same "latest timestamp".
81+
82+
:param duration_ms: The duration of each window in milliseconds.
83+
:param grace_ms: An optional grace period in milliseconds to delay expiration.
84+
Defaults to 0, meaning no grace period is applied.
85+
:return: A sorted list of tuples in the format `((start, end), value)`.
8286
"""
8387
return self._transaction.expire_windows(
8488
duration_ms=duration_ms, grace_ms=grace_ms, prefix=self._prefix
8589
)
90+
91+
def get_windows(
92+
self, start_from_ms: int, start_to_ms: int, backwards: bool = False
93+
) -> list[tuple[tuple[int, int], Any]]:
94+
"""
95+
Get all windows that start between "start_from_ms" and "start_to_ms".
96+
97+
:param start_from_ms: The minimal window start time, exclusive.
98+
:param start_to_ms: The maximum window start time, inclusive.
99+
:param backwards: If True, yields windows in reverse order.
100+
:return: A sorted list of tuples in the format `((start, end), value)`.
101+
"""
102+
return self._transaction.get_windows(
103+
start_from_ms=start_from_ms,
104+
start_to_ms=start_to_ms,
105+
prefix=self._prefix,
106+
backwards=backwards,
107+
)

quixstreams/state/rocksdb/windowed/transaction.py

Lines changed: 73 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
from typing import Any, Optional, List, Tuple, TYPE_CHECKING, cast
1+
from itertools import chain
2+
from typing import Any, Optional, TYPE_CHECKING, cast
23

34
from rocksdict import ReadOptions
45

@@ -110,26 +111,29 @@ def _flush(self, processed_offset: Optional[int], changelog_offset: Optional[int
110111

111112
def expire_windows(
112113
self, duration_ms: int, prefix: bytes, grace_ms: int = 0
113-
) -> List[Tuple[Tuple[int, int], Any]]:
114+
) -> list[tuple[tuple[int, int], Any]]:
114115
"""
115-
Get a list of expired windows from RocksDB considering latest timestamp,
116-
window size and grace period.
117-
It marks the latest found window as expired in the expiration index, so
118-
calling this method multiple times will yield different results for the same
119-
"latest timestamp".
116+
Get all expired windows from RocksDB based on the latest timestamp,
117+
window duration, and an optional grace period.
118+
119+
This method marks the latest found window as expired in the expiration index,
120+
so consecutive calls may yield different results for the same "latest timestamp".
120121
121122
How it works:
122-
- First, it looks for the start time of the last expired window for the current
123-
prefix using expiration cache. If it's found, it will be used to reduce
124-
the search space and to avoid returning already expired windows.
125-
- Then it goes over window segments and fetches the windows
126-
that should be expired.
127-
- At last, it updates the expiration cache with the start time of the latest
128-
found windows
129-
130-
:return: sorted list of tuples in format `((start, end), value)`
123+
- First, it checks the expiration cache for the start time of the last expired
124+
window for the current prefix. If found, this value helps reduce the search
125+
space and prevents returning previously expired windows.
126+
- Next, it iterates over window segments and identifies the windows that should
127+
be marked as expired.
128+
- Finally, it updates the expiration cache with the start time of the latest
129+
windows found.
130+
131+
:param duration_ms: The duration of each window in milliseconds.
132+
:param prefix: The key prefix for filtering windows.
133+
:param grace_ms: An optional grace period in milliseconds to delay expiration.
134+
Defaults to 0, meaning no grace period is applied.
135+
:return: A generator that yields sorted tuples in the format `((start, end), value)`.
131136
"""
132-
133137
latest_timestamp = self._latest_timestamp_ms
134138
start_to = latest_timestamp - duration_ms - grace_ms
135139
start_from = -1
@@ -145,10 +149,12 @@ def expire_windows(
145149

146150
# Use the latest expired timestamp to limit the iteration over
147151
# only those windows that have not been expired before
148-
expired_windows = self._get_windows(
149-
start_from_ms=start_from,
150-
start_to_ms=start_to,
151-
prefix=prefix,
152+
expired_windows = list(
153+
self.get_windows(
154+
start_from_ms=start_from,
155+
start_to_ms=start_to,
156+
prefix=prefix,
157+
)
152158
)
153159
if expired_windows:
154160
# Save the start of the latest expired window to the expiration index
@@ -170,52 +176,65 @@ def _serialize_key(self, key: Any, prefix: bytes) -> bytes:
170176
key_bytes = key if isinstance(key, bytes) else serialize(key, dumps=self._dumps)
171177
return prefix + PREFIX_SEPARATOR + key_bytes
172178

173-
def _get_windows(
174-
self, start_from_ms: int, start_to_ms: int, prefix: bytes
175-
) -> List[Tuple[Tuple[int, int], Any]]:
179+
def get_windows(
180+
self,
181+
start_from_ms: int,
182+
start_to_ms: int,
183+
prefix: bytes,
184+
backwards: bool = False,
185+
) -> list[tuple[tuple[int, int], Any]]:
176186
"""
177-
Get all windows starting between "start_from" and "start_to"
178-
within the given prefix.
179-
187+
Get all windows that start between "start_from_ms" and "start_to_ms"
188+
within the specified prefix.
180189
181-
This function also checks the update cache in case some updates have not
182-
been committed to RocksDB yet.
190+
This function also checks the update cache for any updates not yet
191+
committed to RocksDB.
183192
184-
:param start_from_ms: minimal window start time, exclusive
185-
:param start_to_ms: maximum window start time, inclusive
186-
:return: sorted list of tuples in format `((start, end), value)`
193+
:param start_from_ms: The minimal window start time, exclusive.
194+
:param start_to_ms: The maximum window start time, inclusive.
195+
:param prefix: The key prefix for filtering windows.
196+
:param backwards: If True, yields windows in reverse order.
197+
:return: A sorted list of tuples in the format `((start, end), value)`.
187198
"""
188-
189-
# Iterate over rocksdb within the given prefix and (start_form, start_to)
190-
# timestamps
191199
seek_from = max(start_from_ms, 0)
192200
seek_from_key = encode_window_prefix(prefix=prefix, start_ms=seek_from)
193201

194-
# Add +1 to make the "start_to" inclusive
202+
# Add +1 to make the upper bound inclusive
195203
seek_to = start_to_ms + 1
196204
seek_to_key = encode_window_prefix(prefix=prefix, start_ms=seek_to)
197205

198-
# Set iterator bounds to reduce the potential IO
206+
# Set iterator bounds to reduce IO by limiting the range of keys fetched
199207
read_opt = ReadOptions()
200208
read_opt.set_iterate_lower_bound(seek_from_key)
201209
read_opt.set_iterate_upper_bound(seek_to_key)
202210

203-
windows = {}
204-
for key, value in self._partition.iter_items(
211+
# Create an iterator over the state store
212+
db_windows = self._partition.iter_items(
205213
read_opt=read_opt, from_key=seek_from_key
206-
):
207-
message_key, start, end = parse_window_key(key)
214+
)
215+
216+
# Get cached updates with matching keys
217+
cached_windows = [
218+
(k, v)
219+
for k, v in self._update_cache.get("default", {}).get(prefix, {}).items()
220+
if seek_from_key < k <= seek_to_key
221+
]
222+
223+
# Iterate over stored and cached windows (cached come first) and
224+
# merge them in a single dict
225+
deleted_windows = set()
226+
merged_windows = {}
227+
for key, value in chain(cached_windows, db_windows):
228+
if value is DELETED:
229+
deleted_windows.add(key)
230+
elif key not in merged_windows and key not in deleted_windows:
231+
merged_windows[key] = value
232+
233+
final_windows = []
234+
for key in sorted(merged_windows, reverse=backwards):
235+
_, start, end = parse_window_key(key)
208236
if start_from_ms < start <= start_to_ms:
209-
windows[(start, end)] = self._deserialize_value(value)
210-
211-
for window_key, window_value in (
212-
self._update_cache.get("default", {}).get(prefix, {}).items()
213-
):
214-
message_key, start, end = parse_window_key(window_key)
215-
if window_value is DELETED:
216-
windows.pop((start, end), None)
217-
continue
218-
elif start_from_ms < start <= start_to_ms:
219-
windows[(start, end)] = self._deserialize_value(window_value)
220-
221-
return sorted(windows.items())
237+
value = self._deserialize_value(merged_windows[key])
238+
final_windows.append(((start, end), value))
239+
240+
return final_windows

quixstreams/state/types.py

Lines changed: 54 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Protocol, Any, Optional, Tuple, List
2+
from typing import Any, Optional, Protocol, Tuple
33

44
logger = logging.getLogger(__name__)
55

@@ -50,17 +50,31 @@ def get_latest_timestamp(self) -> int:
5050

5151
def expire_windows(
5252
self, duration_ms: int, grace_ms: int = 0
53-
) -> List[Tuple[Tuple[int, int], Any]]:
53+
) -> list[tuple[tuple[int, int], Any]]:
5454
"""
55-
Get a list of expired windows from RocksDB considering the current
56-
latest timestamp, window duration and grace period.
55+
Get all expired windows from RocksDB based on the latest timestamp,
56+
window duration, and an optional grace period.
5757
58-
It also marks the latest found window as expired in the expiration index, so
59-
calling this method multiple times will yield different results for the same
60-
"latest timestamp".
58+
This method marks the latest found window as expired in the expiration index,
59+
so consecutive calls may yield different results for the same "latest timestamp".
6160
62-
:param duration_ms: duration of the windows in milliseconds
63-
:param grace_ms: grace period in milliseconds. Default - "0"
61+
:param duration_ms: The duration of each window in milliseconds.
62+
:param grace_ms: An optional grace period in milliseconds to delay expiration.
63+
Defaults to 0, meaning no grace period is applied.
64+
:return: A sorted list of tuples in the format `((start, end), value)`.
65+
"""
66+
...
67+
68+
def get_windows(
69+
self, start_from_ms: int, start_to_ms: int, backwards: bool = False
70+
) -> list[tuple[tuple[int, int], Any]]:
71+
"""
72+
Get all windows that start between "start_from_ms" and "start_to_ms".
73+
74+
:param start_from_ms: The minimal window start time, exclusive.
75+
:param start_to_ms: The maximum window start time, inclusive.
76+
:param backwards: If True, yields windows in reverse order.
77+
:return: A sorted list of tuples in the format `((start, end), value)`.
6478
"""
6579
...
6680

@@ -160,18 +174,40 @@ def get_latest_timestamp(self) -> int:
160174
"""
161175
...
162176

163-
def expire_windows(self, duration_ms: int, prefix: bytes, grace_ms: int = 0):
177+
def expire_windows(
178+
self, duration_ms: int, prefix: bytes, grace_ms: int = 0
179+
) -> list[tuple[tuple[int, int], Any]]:
164180
"""
165-
Get a list of expired windows from RocksDB considering the current
166-
latest timestamp, window duration and grace period.
181+
Get all expired windows from RocksDB based on the latest timestamp,
182+
window duration, and an optional grace period.
167183
168-
It also marks the latest found window as expired in the expiration index, so
169-
calling this method multiple times will yield different results for the same
170-
"latest timestamp".
184+
This method marks the latest found window as expired in the expiration index,
185+
so consecutive calls may yield different results for the same "latest timestamp".
171186
172-
:param duration_ms: duration of the windows in milliseconds
173-
:param prefix: a key prefix
174-
:param grace_ms: grace period in milliseconds. Default - "0"
187+
:param duration_ms: The duration of each window in milliseconds.
188+
:param prefix: The key prefix for filtering windows.
189+
:param grace_ms: An optional grace period in milliseconds to delay expiration.
190+
Defaults to 0, meaning no grace period is applied.
191+
:return: A sorted list of tuples in the format `((start, end), value)`.
192+
"""
193+
...
194+
195+
def get_windows(
196+
self,
197+
start_from_ms: int,
198+
start_to_ms: int,
199+
prefix: bytes,
200+
backwards: bool = False,
201+
) -> list[tuple[tuple[int, int], Any]]:
202+
"""
203+
Get all windows that start between "start_from_ms" and "start_to_ms"
204+
within the specified prefix.
205+
206+
:param start_from_ms: The minimal window start time, exclusive.
207+
:param start_to_ms: The maximum window start time, inclusive.
208+
:param prefix: The key prefix for filtering windows.
209+
:param backwards: If True, yields windows in reverse order.
210+
:return: A sorted list of tuples in the format `((start, end), value)`.
175211
"""
176212
...
177213

tests/conftest.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,19 @@
3030
test_logger = logging.getLogger("quixstreams.tests")
3131

3232

33+
def pytest_addoption(parser):
34+
# Adds the --timeit argument to pytest, enabling tests that measure execution times.
35+
# Usage example:
36+
# pytest -k test_get_windows --timeit
37+
parser.addoption("--timeit", action="store_true", default=False)
38+
39+
40+
def pytest_runtest_setup(item):
41+
# Skips `timeit` tests by default to avoid inflating overall test suite run times.
42+
if "timeit" in item.keywords and not item.config.option.timeit:
43+
pytest.skip("Skipping timeit test; use --timeit to include it")
44+
45+
3346
@pytest.fixture(autouse=True, scope="session")
3447
def configure_logging():
3548
logging.config.dictConfig(LOGGING_CONFIG)

0 commit comments

Comments
 (0)