2
2
from abc import ABC , abstractmethod
3
3
from typing import TYPE_CHECKING , Literal , Optional , Union
4
4
5
- from quixstreams .models import SuccessfulConfluentKafkaMessageProto
6
- from quixstreams .state .exceptions import ColumnFamilyHeaderMissing
7
5
from quixstreams .state .metadata import (
8
- CHANGELOG_CF_MESSAGE_HEADER ,
9
- CHANGELOG_PROCESSED_OFFSET_MESSAGE_HEADER ,
10
6
Marker ,
11
7
)
12
8
from quixstreams .state .serialization import DumpsFunc , LoadsFunc
13
- from quixstreams .utils .json import loads as json_loads
14
9
15
10
from .transaction import PartitionTransaction , PartitionTransactionCache
16
11
@@ -45,42 +40,34 @@ def __init__(
45
40
def close (self ): ...
46
41
47
42
@abstractmethod
48
- def _recover_from_changelog_message (
49
- self ,
50
- changelog_message : SuccessfulConfluentKafkaMessageProto ,
51
- cf_name : str ,
52
- processed_offset : Optional [int ],
53
- committed_offset : int ,
54
- ): ...
55
-
56
- @abstractmethod
57
- def get_processed_offset (self ) -> Optional [int ]:
43
+ def get_changelog_offset (self ) -> Optional [int ]:
58
44
"""
59
- Get last processed offset for the given partition
45
+ Get the changelog offset that the state is up-to-date with.
60
46
:return: offset or `None` if there's no processed offset yet
61
47
"""
62
48
...
63
49
64
50
@abstractmethod
65
- def get_changelog_offset (self ) -> Optional [ int ] :
51
+ def write_changelog_offset (self , offset : int ) :
66
52
"""
67
- Get offset that the changelog is up-to-date with.
68
- :return: offset or `None` if there's no processed offset yet
53
+ Write a new changelog offset to the db.
54
+
55
+ To be used when we simply need to update the changelog offset without touching
56
+ the actual data.
57
+
58
+ :param offset: new changelog offset
69
59
"""
70
- ...
71
60
72
61
@abstractmethod
73
62
def write (
74
63
self ,
75
64
cache : PartitionTransactionCache ,
76
- processed_offset : Optional [int ],
77
65
changelog_offset : Optional [int ],
78
66
):
79
67
"""
80
68
Update the state with data from the update cache
81
69
82
70
:param cache: The modified data
83
- :param processed_offset: The offset processed to generate the data.
84
71
:param changelog_offset: The changelog message offset of the data.
85
72
"""
86
73
@@ -92,7 +79,6 @@ def get(
92
79
Get a key from the store
93
80
94
81
:param key: a key encoded to `bytes`
95
- :param default: a default value to return if the key is not found.
96
82
:param cf_name: rocksdb column family name. Default - "default"
97
83
:return: a value if the key is present in the store. Otherwise, `default`
98
84
"""
@@ -107,6 +93,19 @@ def exists(self, key: bytes, cf_name: str = "default") -> bool:
107
93
:return: `True` if the key is present, `False` otherwise.
108
94
"""
109
95
96
+ @abstractmethod
97
+ def recover_from_changelog_message (
98
+ self , key : bytes , value : Optional [bytes ], cf_name : str , offset : int
99
+ ):
100
+ """
101
+ Updates state from a given changelog message.
102
+
103
+ :param key: changelog message key
104
+ :param value: changelog message value
105
+ :param cf_name: column family name
106
+ :param offset: changelog message offset
107
+ """
108
+
110
109
def begin (self ) -> PartitionTransaction :
111
110
"""
112
111
Start a new `PartitionTransaction`
@@ -120,58 +119,6 @@ def begin(self) -> PartitionTransaction:
120
119
changelog_producer = self ._changelog_producer ,
121
120
)
122
121
123
- def recover_from_changelog_message (
124
- self ,
125
- changelog_message : SuccessfulConfluentKafkaMessageProto ,
126
- committed_offset : int ,
127
- ) -> None :
128
- """
129
- Updates state from a given changelog message.
130
-
131
- :param changelog_message: A raw Confluent message read from a changelog topic.
132
- :param committed_offset: latest committed offset for the partition
133
- """
134
- headers = dict (changelog_message .headers () or ())
135
- # Parse the column family name from message headers
136
- cf_name = headers .get (CHANGELOG_CF_MESSAGE_HEADER , b"" ).decode ()
137
- if not cf_name :
138
- raise ColumnFamilyHeaderMissing (
139
- f"Header '{ CHANGELOG_CF_MESSAGE_HEADER } ' missing from changelog message"
140
- )
141
-
142
- # Parse the processed topic-partition-offset info from the changelog message
143
- # headers to determine whether the update should be applied or skipped.
144
- # It can be empty if the message was produced by the older version of the lib.
145
- processed_offset = json_loads (
146
- headers .get (CHANGELOG_PROCESSED_OFFSET_MESSAGE_HEADER , b"null" )
147
- )
148
-
149
- self ._recover_from_changelog_message (
150
- changelog_message ,
151
- cf_name ,
152
- processed_offset ,
153
- committed_offset ,
154
- )
155
-
156
- def _should_apply_changelog (
157
- self , processed_offset : Optional [int ], committed_offset : int
158
- ) -> bool :
159
- """
160
- Determine whether the changelog update should be skipped.
161
-
162
- :param processed_offset: changelog message processed offset.
163
- :param committed_offset: latest committed offset of the source topic partition
164
- :return: True if update should be applied, else False.
165
- """
166
- if processed_offset is not None :
167
- # Skip recovering from the message if its processed offset is ahead of the
168
- # current committed offset.
169
- # This way it will recover to a consistent state if the checkpointing code
170
- # produced the changelog messages but failed to commit
171
- # the source topic offset.
172
- return processed_offset < committed_offset
173
- return True
174
-
175
122
def __enter__ (self ):
176
123
return self
177
124
0 commit comments