1
1
import logging
2
2
import sys
3
3
import time
4
- from typing import Dict , Tuple
5
4
6
5
from confluent_kafka import TopicPartition
7
6
8
7
from quixstreams .kafka import BaseConsumer
8
+ from quixstreams .models import TopicManager
9
9
10
10
logger = logging .getLogger (__name__ )
11
11
@@ -18,91 +18,79 @@ class PausingManager:
18
18
the timeout is elapsed.
19
19
"""
20
20
21
- _paused_tps : Dict [ Tuple [ str , int ], float ]
21
+ _resume_at : float
22
22
23
- def __init__ (self , consumer : BaseConsumer ):
23
+ def __init__ (self , consumer : BaseConsumer , topic_manager : TopicManager ):
24
24
self ._consumer = consumer
25
- self ._paused_tps = {}
26
- self ._next_resume_at = _MAX_FLOAT
25
+ self ._topic_manager = topic_manager
26
+ self .reset ()
27
27
28
28
def pause (
29
29
self ,
30
- topic : str ,
31
- partition : int ,
32
- offset_to_seek : int ,
30
+ offsets_to_seek : dict [tuple [str , int ], int ],
33
31
resume_after : float ,
34
32
):
35
33
"""
36
- Pause the topic-partition for a certain period of time.
34
+ Pause all partitions for the certain period of time and seek the partitions
35
+ provided in the `offsets_to_seek` dict.
37
36
38
37
This method is supposed to be called in case of backpressure from Sinks.
39
38
"""
40
- if self .is_paused (topic = topic , partition = partition ):
41
- # Exit early if the TP is already paused
42
- return
43
-
44
- # Add a TP to the dict to avoid repetitive pausing
45
39
resume_at = time .monotonic () + resume_after
46
- self ._paused_tps [(topic , partition )] = resume_at
47
- # Remember when the next TP should be resumed to exit early
48
- # in the resume_if_ready() calls.
49
- # Partitions are rarely paused, but the resume checks can be done
50
- # thousands times a sec.
51
- self ._next_resume_at = min (self ._next_resume_at , resume_at )
52
- tp = TopicPartition (topic = topic , partition = partition , offset = offset_to_seek )
53
- position , * _ = self ._consumer .position ([tp ])
54
- logger .debug (
55
- f'Pausing topic partition "{ topic } [{ partition } ]" for { resume_after } s; '
56
- f"current_offset={ position .offset } "
57
- )
58
- self ._consumer .pause (partitions = [tp ])
59
- # Seek the TP back to the "offset_to_seek" to start from it on resume.
60
- # The "offset_to_seek" is provided by the Checkpoint and is expected to be the
61
- # first offset processed in the checkpoint.
62
- logger .debug (
63
- f'Seek the paused partition "{ topic } [{ partition } ]" back to '
64
- f"offset { tp .offset } "
65
- )
66
- self ._consumer .seek (partition = tp )
67
-
68
- def is_paused (self , topic : str , partition : int ) -> bool :
69
- """
70
- Check if the topic-partition is already paused
71
- """
72
- return (topic , partition ) in self ._paused_tps
40
+ self ._resume_at = min (self ._resume_at , resume_at )
41
+
42
+ # Pause only data TPs excluding changelog TPs
43
+ non_changelog_tps = self ._get_non_changelog_assigned_tps ()
44
+
45
+ for tp in non_changelog_tps :
46
+ position , * _ = self ._consumer .position ([tp ])
47
+ logger .debug (
48
+ f'Pausing topic partition "{ tp .topic } [{ tp .partition } ]" for { resume_after } s; '
49
+ f"position={ position .offset } "
50
+ )
51
+ self ._consumer .pause (partitions = [tp ])
52
+ # Seek the TP back to the "offset_to_seek" to start from it on resume.
53
+ # The "offset_to_seek" is provided by the Checkpoint and is expected to be the
54
+ # first offset processed in the checkpoint.
55
+ seek_offset = offsets_to_seek .get ((tp .topic , tp .partition ))
56
+ if seek_offset is not None :
57
+ logger .debug (
58
+ f'Seek the paused partition "{ tp .topic } [{ tp .partition } ]" back to '
59
+ f"offset { seek_offset } "
60
+ )
61
+ self ._consumer .seek (
62
+ partition = TopicPartition (
63
+ topic = tp .topic , partition = tp .partition , offset = seek_offset
64
+ )
65
+ )
73
66
74
67
def resume_if_ready (self ):
75
68
"""
76
- Resume consuming from topic- partitions after the wait period has elapsed.
69
+ Resume consuming from assigned data partitions after the wait period has elapsed.
77
70
"""
78
- now = time .monotonic ()
79
- if self ._next_resume_at > now :
80
- # Nothing to resume yet, exit early
71
+ if self ._resume_at > time .monotonic ():
81
72
return
82
73
83
- tps_to_resume = [
84
- tp for tp , resume_at in self ._paused_tps . items () if resume_at <= now
85
- ]
86
- for topic , partition in tps_to_resume :
87
- logger .debug (f'Resuming topic partition "{ topic } [{ partition } ]"' )
74
+ # Resume only data TPs excluding changelog TPs
75
+ non_changelog_tps = self ._get_non_changelog_assigned_tps ()
76
+
77
+ for tp in non_changelog_tps :
78
+ logger .debug (f'Resuming topic partition "{ tp . topic } [{ tp . partition } ]"' )
88
79
self ._consumer .resume (
89
- partitions = [TopicPartition (topic = topic , partition = partition )]
80
+ partitions = [TopicPartition (topic = tp . topic , partition = tp . partition )]
90
81
)
91
- self ._paused_tps .pop ((topic , partition ))
92
- self ._reset_next_resume_at ()
82
+ self .reset ()
93
83
94
- def revoke (self , topic : str , partition : int ):
84
+ def reset (self ):
85
+ # Reset the timeout back to its initial state
86
+ self ._resume_at = _MAX_FLOAT
87
+
88
+ def _get_non_changelog_assigned_tps (self ) -> list [TopicPartition ]:
95
89
"""
96
- Remove partition from the list of paused TPs if it's revoked
90
+ Get assigned topic partitions for non-changelog topics.
97
91
"""
98
- tp = (topic , partition )
99
- if tp not in self ._paused_tps :
100
- return
101
- self ._paused_tps .pop (tp )
102
- self ._reset_next_resume_at ()
103
-
104
- def _reset_next_resume_at (self ):
105
- if self ._paused_tps :
106
- self ._next_resume_at = min (self ._paused_tps .values ())
107
- else :
108
- self ._next_resume_at = _MAX_FLOAT
92
+ return [
93
+ tp
94
+ for tp in self ._consumer .assignment ()
95
+ if tp .topic in self ._topic_manager .non_changelog_topics
96
+ ]
0 commit comments