diff --git a/mountpoint-s3-fs/CHANGELOG.md b/mountpoint-s3-fs/CHANGELOG.md index cea9496ae..19bea3240 100644 --- a/mountpoint-s3-fs/CHANGELOG.md +++ b/mountpoint-s3-fs/CHANGELOG.md @@ -5,6 +5,7 @@ * Adopt a unified memory pool to reduce overall memory usage. ([#1511](https://github.com/awslabs/mountpoint-s3/pull/1511)) * Replace `S3Uri` with `S3Path` and consolidate related types like `Bucket` and `Prefix` into the `s3` module. ([#1535](https://github.com/awslabs/mountpoint-s3/pull/1535)) +* `PrefetchGetObject` now has an updated backpressure algorithm advancing the read window with each call to `PrefetchGetObject::read`, with the aim of higher sequential-read throughput. ([#1453](https://github.com/awslabs/mountpoint-s3/pull/1453)) ## v0.6.0 (July 23, 2025) diff --git a/mountpoint-s3-fs/src/prefetch/backpressure_controller.rs b/mountpoint-s3-fs/src/prefetch/backpressure_controller.rs index cb7a87d03..8274b7f4b 100644 --- a/mountpoint-s3-fs/src/prefetch/backpressure_controller.rs +++ b/mountpoint-s3-fs/src/prefetch/backpressure_controller.rs @@ -126,40 +126,33 @@ impl BackpressureController { BackpressureFeedbackEvent::DataRead { offset, length } => { self.next_read_offset = offset + length as u64; self.mem_limiter.release(BufferArea::Prefetch, length as u64); - let remaining_window = self.read_window_end_offset.saturating_sub(self.next_read_offset) as usize; - - // Increment the read window only if the remaining window reaches some threshold i.e. half of it left. - // When memory is low the `preferred_read_window_size` will be scaled down so we have to keep trying - // until we have enough read window. - while remaining_window < (self.preferred_read_window_size / 2) - && self.read_window_end_offset < self.request_end_offset - { + + loop { let new_read_window_end_offset = self .next_read_offset .saturating_add(self.preferred_read_window_size as u64) .min(self.request_end_offset); - // We can skip if the new `read_window_end_offset` is less than or equal to the current one, this - // could happen after the read window is scaled down. - if new_read_window_end_offset <= self.read_window_end_offset { - break; - } let to_increase = new_read_window_end_offset.saturating_sub(self.read_window_end_offset) as usize; - // Force incrementing read window regardless of available memory when we are already at minimum - // read window size. - if self.preferred_read_window_size <= self.min_read_window_size { - self.mem_limiter.reserve(BufferArea::Prefetch, to_increase as u64); - self.increment_read_window(to_increase).await; + if to_increase == 0 { + // There's nothing to increment, just accept the feedback. + // This can happen with random read patterns or prefetcher scale down. break; } - // Try to reserve the memory for the length we want to increase before sending the request, - // scale down the read window if it fails. - if self.mem_limiter.try_reserve(BufferArea::Prefetch, to_increase as u64) { + // Force incrementing window regardless of available memory when at minimum window size. + if self.preferred_read_window_size <= self.min_read_window_size { + self.mem_limiter.reserve(BufferArea::Prefetch, to_increase as u64); self.increment_read_window(to_increase).await; break; } else { - self.scale_down(); + // Try to reserve memory and inc. read window, otherwise scale down and try again. + if self.mem_limiter.try_reserve(BufferArea::Prefetch, to_increase as u64) { + self.increment_read_window(to_increase).await; + break; + } else { + self.scale_down(); + } } } } @@ -174,7 +167,10 @@ impl BackpressureController { let next_window_end_offset = prev_window_end_offset + len as u64; trace!( next_read_offset = self.next_read_offset, - prev_window_end_offset, next_window_end_offset, len, "incrementing read window", + prev_window_end_offset, + next_window_end_offset, + len, + "incrementing read window", ); // This should not block since the channel is unbounded diff --git a/mountpoint-s3/CHANGELOG.md b/mountpoint-s3/CHANGELOG.md index ac9d4241c..fd4ed3a3e 100644 --- a/mountpoint-s3/CHANGELOG.md +++ b/mountpoint-s3/CHANGELOG.md @@ -8,6 +8,10 @@ * Add support for renaming files using the RenameObject API when mounting directory buckets in S3 Express One Zone. ([#1468](https://github.com/awslabs/mountpoint-s3/pull/1468)) +### Other changes + +* Mountpoint's prefetcher has an updated backpressure algorithm which advances the amount of data prefetched with each read rather than waiting for half of the read window to be consumed. The aim of the change is to achieve higher sequential-read throughput. ([#1453](https://github.com/awslabs/mountpoint-s3/pull/1453)) + ## v1.18.0 (May 30, 2025) ### New features