quixio
diff --git a/‎quixstreams/app.py
Lines changed: 3 additions & 1 deletion b/‎quixstreams/app.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎quixstreams/checkpointing/checkpoint.py
Lines changed: 41 additions & 62 deletions b/‎quixstreams/checkpointing/checkpoint.py
Lines changed: 41 additions & 62 deletions
diff --git a/‎quixstreams/processing/context.py
Lines changed: 1 addition & 1 deletion b/‎quixstreams/processing/context.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎quixstreams/processing/pausing.py
Lines changed: 54 additions & 66 deletions b/‎quixstreams/processing/pausing.py
Lines changed: 54 additions & 66 deletions
diff --git a/‎quixstreams/sinks/base/exceptions.py
Lines changed: 4 additions & 8 deletions b/‎quixstreams/sinks/base/exceptions.py
Lines changed: 4 additions & 8 deletions
@@ -337,7 +337,9 @@ def __init__(
 
         self._source_manager = SourceManager()
         self._sink_manager = SinkManager()
-        self._pausing_manager = PausingManager(consumer=self._consumer)
+        self._pausing_manager = PausingManager(
+            consumer=self._consumer, topic_manager=self._topic_manager
+        )
         self._processing_context = ProcessingContext(
             commit_interval=self._config.commit_interval,
             commit_every=self._config.commit_every,
 
@@ -181,13 +181,46 @@ def commit(self):
         Commit the checkpoint.
 
         This method will:
-         1. Produce the changelogs for each state store
-         2. Flush the producer to ensure everything is delivered.
-         3. Commit topic offsets.
-         4. Flush each state store partition to the disk.
+         1. Flush the registered sinks if any
+         2. Produce the changelogs for each state store
+         3. Flush the producer to ensure everything is delivered.
+         4. Commit topic offsets.
+         5. Flush each state store partition to the disk.
         """
 
-        # Step 1. Produce the changelogs
+        # Step 1. Flush sinks
+        logger.debug("Checkpoint: flushing sinks")
+        backpressured = False
+        for sink in self._sink_manager.sinks:
+            if backpressured:
+                # Drop the accumulated data for the other sinks
+                # if one of them is backpressured to limit the number of duplicates
+                # when the data is reprocessed again
+                sink.on_paused()
+                continue
+
+            try:
+                sink.flush()
+            except SinkBackpressureError as exc:
+                logger.warning(
+                    f'Backpressure for sink "{sink}" is detected, '
+                    f"all partitions will be paused and resumed again "
+                    f"in {exc.retry_after}s"
+                )
+                # The backpressure is detected from the sink
+                # Pause the assignment to let it cool down and seek it back to
+                # the first processed offsets of this Checkpoint (it must be equal
+                # to the last committed offset).
+                self._pausing_manager.pause(
+                    resume_after=exc.retry_after,
+                    offsets_to_seek=self._starting_tp_offsets.copy(),
+                )
+                backpressured = True
+        if backpressured:
+            # Exit early if backpressure is detected
+            return
+
+        # Step 2. Produce the changelogs
         for (
             topic,
             partition,
@@ -201,7 +234,7 @@ def commit(self):
                 )
             transaction.prepare(processed_offsets={topic: offset})
 
-        # Step 2. Flush producer to trigger all delivery callbacks and ensure that
+        # Step 3. Flush producer to trigger all delivery callbacks and ensure that
         # all messages are produced
         logger.debug("Checkpoint: flushing producer")
         unproduced_msg_count = self._producer.flush()
@@ -211,55 +244,10 @@ def commit(self):
                 f"the producer flush timeout"
             )
 
-        logger.debug("Checkpoint: flushing sinks")
-        sinks = self._sink_manager.sinks
-        # Step 3. Flush sinks
-        for (topic, partition), offset in self._tp_offsets.items():
-            for sink in sinks:
-                if self._pausing_manager.is_paused(topic=topic, partition=partition):
-                    # The topic-partition is paused, skip flushing other sinks for
-                    # this TP.
-                    # Note: when flushing multiple sinks for the same TP, some
-                    # of them can be flushed before one of the sinks is backpressured.
-                    sink.on_paused(topic=topic, partition=partition)
-                    continue
-
-                try:
-                    sink.flush(topic=topic, partition=partition)
-                except SinkBackpressureError as exc:
-                    logger.warning(
-                        f'Backpressure for sink "{sink}" is detected, '
-                        f"the partition will be paused and resumed again "
-                        f"in {exc.retry_after}s; "
-                        f'partition="{topic}[{partition}]" '
-                        f"processed_offset={offset}"
-                    )
-                    # The backpressure is detected from the sink
-                    # Pause the partition to let it cool down and seek it back to
-                    # the first processed offset of this Checkpoint (it must be equal
-                    # to the last committed offset).
-                    offset_to_seek = self._starting_tp_offsets[(topic, partition)]
-                    self._pausing_manager.pause(
-                        topic=topic,
-                        partition=partition,
-                        resume_after=exc.retry_after,
-                        offset_to_seek=offset_to_seek,
-                    )
-
         # Step 4. Commit offsets to Kafka
-        # First, filter out offsets of the paused topic partitions.
-        tp_offsets = {
-            (topic, partition): offset
-            for (topic, partition), offset in self._tp_offsets.items()
-            if not self._pausing_manager.is_paused(topic=topic, partition=partition)
-        }
-        if not tp_offsets:
-            # No offsets to commit because every partition is paused, exiting early
-            return
-
         offsets = [
             TopicPartition(topic=topic, partition=partition, offset=offset + 1)
-            for (topic, partition), offset in tp_offsets.items()
+            for (topic, partition), offset in self._tp_offsets.items()
         ]
 
         if self._exactly_once:
@@ -281,16 +269,7 @@ def commit(self):
         # offsets.
         # Get produced offsets after flushing the producer
         produced_offsets = self._producer.offsets
-        for (
-            topic,
-            partition,
-            store_name,
-        ), transaction in self._store_transactions.items():
-            offset = tp_offsets.get((topic, partition))
-            # Offset can be None if the partition is paused
-            if offset is None:
-                continue
-
+        for transaction in self._store_transactions.values():
             # Get the changelog topic-partition for the given transaction
             # It can be None if changelog topics are disabled in the app config
             changelog_tp = transaction.changelog_topic_partition
 
@@ -98,7 +98,7 @@ def resume_ready_partitions(self):
         self.pausing_manager.resume_if_ready()
 
     def on_partition_revoke(self, topic: str, partition: int):
-        self.pausing_manager.revoke(topic=topic, partition=partition)
+        self.pausing_manager.reset()
 
     def __enter__(self):
         self.sink_manager.start_sinks()
 
@@ -1,11 +1,11 @@
 import logging
 import sys
 import time
-from typing import Dict, Tuple
 
 from confluent_kafka import TopicPartition
 
 from quixstreams.kafka import BaseConsumer
+from quixstreams.models import TopicManager
 
 logger = logging.getLogger(__name__)
 
@@ -18,91 +18,79 @@ class PausingManager:
     the timeout is elapsed.
     """
 
-    _paused_tps: Dict[Tuple[str, int], float]
+    _resume_at: float
 
-    def __init__(self, consumer: BaseConsumer):
+    def __init__(self, consumer: BaseConsumer, topic_manager: TopicManager):
         self._consumer = consumer
-        self._paused_tps = {}
-        self._next_resume_at = _MAX_FLOAT
+        self._topic_manager = topic_manager
+        self.reset()
 
     def pause(
         self,
-        topic: str,
-        partition: int,
-        offset_to_seek: int,
+        offsets_to_seek: dict[tuple[str, int], int],
         resume_after: float,
     ):
         """
-        Pause the topic-partition for a certain period of time.
+        Pause all partitions for the certain period of time and seek the partitions
+        provided in the `offsets_to_seek` dict.
 
         This method is supposed to be called in case of backpressure from Sinks.
         """
-        if self.is_paused(topic=topic, partition=partition):
-            # Exit early if the TP is already paused
-            return
-
-        # Add a TP to the dict to avoid repetitive pausing
         resume_at = time.monotonic() + resume_after
-        self._paused_tps[(topic, partition)] = resume_at
-        # Remember when the next TP should be resumed to exit early
-        # in the resume_if_ready() calls.
-        # Partitions are rarely paused, but the resume checks can be done
-        # thousands times a sec.
-        self._next_resume_at = min(self._next_resume_at, resume_at)
-        tp = TopicPartition(topic=topic, partition=partition, offset=offset_to_seek)
-        position, *_ = self._consumer.position([tp])
-        logger.debug(
-            f'Pausing topic partition "{topic}[{partition}]" for {resume_after}s; '
-            f"current_offset={position.offset}"
-        )
-        self._consumer.pause(partitions=[tp])
-        # Seek the TP back to the "offset_to_seek" to start from it on resume.
-        # The "offset_to_seek" is provided by the Checkpoint and is expected to be the
-        # first offset processed in the checkpoint.
-        logger.debug(
-            f'Seek the paused partition "{topic}[{partition}]" back to '
-            f"offset {tp.offset}"
-        )
-        self._consumer.seek(partition=tp)
-
-    def is_paused(self, topic: str, partition: int) -> bool:
-        """
-        Check if the topic-partition is already paused
-        """
-        return (topic, partition) in self._paused_tps
+        self._resume_at = min(self._resume_at, resume_at)
+
+        # Pause only data TPs excluding changelog TPs
+        non_changelog_tps = self._get_non_changelog_assigned_tps()
+
+        for tp in non_changelog_tps:
+            position, *_ = self._consumer.position([tp])
+            logger.debug(
+                f'Pausing topic partition "{tp.topic}[{tp.partition}]" for {resume_after}s; '
+                f"position={position.offset}"
+            )
+            self._consumer.pause(partitions=[tp])
+            # Seek the TP back to the "offset_to_seek" to start from it on resume.
+            # The "offset_to_seek" is provided by the Checkpoint and is expected to be the
+            # first offset processed in the checkpoint.
+            seek_offset = offsets_to_seek.get((tp.topic, tp.partition))
+            if seek_offset is not None:
+                logger.debug(
+                    f'Seek the paused partition "{tp.topic}[{tp.partition}]" back to '
+                    f"offset {seek_offset}"
+                )
+                self._consumer.seek(
+                    partition=TopicPartition(
+                        topic=tp.topic, partition=tp.partition, offset=seek_offset
+                    )
+                )
 
     def resume_if_ready(self):
         """
-        Resume consuming from topic-partitions after the wait period has elapsed.
+        Resume consuming from assigned data partitions after the wait period has elapsed.
         """
-        now = time.monotonic()
-        if self._next_resume_at > now:
-            # Nothing to resume yet, exit early
+        if self._resume_at > time.monotonic():
             return
 
-        tps_to_resume = [
-            tp for tp, resume_at in self._paused_tps.items() if resume_at <= now
-        ]
-        for topic, partition in tps_to_resume:
-            logger.debug(f'Resuming topic partition "{topic}[{partition}]"')
+        # Resume only data TPs excluding changelog TPs
+        non_changelog_tps = self._get_non_changelog_assigned_tps()
+
+        for tp in non_changelog_tps:
+            logger.debug(f'Resuming topic partition "{tp.topic}[{tp.partition}]"')
             self._consumer.resume(
-                partitions=[TopicPartition(topic=topic, partition=partition)]
+                partitions=[TopicPartition(topic=tp.topic, partition=tp.partition)]
             )
-            self._paused_tps.pop((topic, partition))
-        self._reset_next_resume_at()
+        self.reset()
 
-    def revoke(self, topic: str, partition: int):
+    def reset(self):
+        # Reset the timeout back to its initial state
+        self._resume_at = _MAX_FLOAT
+
+    def _get_non_changelog_assigned_tps(self) -> list[TopicPartition]:
         """
-        Remove partition from the list of paused TPs if it's revoked
+        Get assigned topic partitions for non-changelog topics.
         """
-        tp = (topic, partition)
-        if tp not in self._paused_tps:
-            return
-        self._paused_tps.pop(tp)
-        self._reset_next_resume_at()
-
-    def _reset_next_resume_at(self):
-        if self._paused_tps:
-            self._next_resume_at = min(self._paused_tps.values())
-        else:
-            self._next_resume_at = _MAX_FLOAT
+        return [
+            tp
+            for tp in self._consumer.assignment()
+            if tp.topic in self._topic_manager.non_changelog_topics
+        ]
@@ -8,16 +8,12 @@ class SinkBackpressureError(QuixException):
     An exception to be raised by Sinks during flush() call
     to signal a backpressure event to the application.
 
-    When raised, the app will drop the accumulated sink batch,
-    pause the corresponding topic partition for
-    a timeout specified in `retry_after`, and resume it when it's elapsed.
+    When raised, the app will drop the accumulated sink batches,
+    pause all assigned topic partitions for
+    a timeout specified in `retry_after`, and resume them when it's elapsed.
 
     :param retry_after: a timeout in seconds to pause for
-    :param topic: a topic name to pause
-    :param partition: a partition number to pause
     """
 
-    def __init__(self, retry_after: float, topic: str, partition: int):
+    def __init__(self, retry_after: float):
         self.retry_after = retry_after
-        self.topic = topic
-        self.partition = partition