Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 120 additions & 31 deletions mountpoint-s3-fs/src/superblock/readdir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ use std::collections::VecDeque;
use std::ffi::OsString;

use super::{InodeKindData, LookedUpInode, RemoteLookup, SuperblockInner};
use crate::metablock::{InodeError, InodeKind, InodeNo, InodeStat};
use crate::metablock::{InodeError, InodeKind, InodeNo, InodeStat, ValidName};
use crate::sync::atomic::{AtomicI64, Ordering};
use crate::sync::{AsyncMutex, Mutex};
use mountpoint_s3_client::ObjectClient;
Expand Down Expand Up @@ -114,9 +114,8 @@ impl ReaddirHandle {
})
}

/// Return the next inode for the directory stream. If the stream is finished, returns
/// `Ok(None)`. Does not increment the lookup count of the returned inodes: the caller
/// is responsible for calling [`remember()`] if required.
/// Get the next file or folder in the directory listing.
/// Returns None when no more items. Skips items with invalid names.
pub(super) async fn next<OC: ObjectClient + Send + Sync>(
&self,
inner: &SuperblockInner<OC>,
Expand All @@ -134,42 +133,42 @@ impl ReaddirHandle {
};

if let Some(next) = next {
let Ok(name) = next.name().try_into() else {
// Short-circuit the update if we know it'll fail because the name is invalid
warn!("{} has an invalid name and will be unavailable", next.description());
continue;
};
let remote_lookup = self.remote_lookup_from_entry(inner, &next);
let lookup = inner.update_from_remote(self.dir_ino, name, remote_lookup)?;
return Ok(Some(lookup));
let name: OsString = next.name().into();

if let Some(result) = self.lookup_from_entry(inner, &next, name)? {
return Ok(Some(result));
}
// Continue to next entry if None returned
} else {
return Ok(None);
}
}
}

/// Re-add an entry to the front of the queue if the consumer wasn't able to use it
pub fn readd(&self, entry: LookedUpInode) {
let old = self.readded.lock().unwrap().replace(entry);
assert!(old.is_none(), "cannot readd more than one entry");
}

/// Return the inode number of the parent directory of this directory handle
pub fn parent(&self) -> InodeNo {
self.parent_ino
}

/// Create a [RemoteLookup] for the given ReaddirEntry if appropriate.
fn remote_lookup_from_entry<OC: ObjectClient + Send + Sync>(
/// Process a ReaddirEntry and return the final LookedUpInode result.
/// Returns None if the entry should be skipped.
fn lookup_from_entry<OC: ObjectClient + Send + Sync>(
&self,
inner: &SuperblockInner<OC>,
entry: &ReaddirEntry,
) -> Option<RemoteLookup> {
match entry {
// If we made it this far with a local inode, we know there's nothing on the remote with
// the same name, because [LocalInode] is last in the ordering and so otherwise would
// have been deduplicated by now.
ReaddirEntry::LocalInode { .. } => None,
name: OsString,
) -> Result<Option<LookedUpInode>, InodeError> {
let remote_lookup = match entry {
ReaddirEntry::LocalInode { lookup } => {
// Check if local inode still exists
if let Ok(parent_inode) = inner.get(self.dir_ino)
&& let Ok(parent_state) = parent_inode.get_inode_state()
&& let InodeKindData::Directory { writing_children, .. } = &parent_state.kind_data
&& writing_children.contains(&lookup.inode.ino())
{
Some(RemoteLookup {
stat: lookup.stat.clone(),
kind: lookup.inode.kind(),
})
} else {
None // This will cause the entry to be skipped
}
}
ReaddirEntry::RemotePrefix { .. } => {
let stat = InodeStat::for_directory(inner.mount_time, inner.config.cache_config.dir_ttl);
Some(RemoteLookup {
Expand Down Expand Up @@ -198,8 +197,48 @@ impl ReaddirHandle {
kind: InodeKind::File,
})
}
};

// Handle the result based on entry type and lookup result
match (entry, remote_lookup) {
(ReaddirEntry::LocalInode { lookup }, Some(_)) => {
// Valid local inode, return it directly
Ok(Some(lookup.clone()))
}
(ReaddirEntry::LocalInode { .. }, None) => {
// Invalid local inode, skip it
Ok(None)
}
(_, Some(remote_lookup)) => {
// Remote entry, use update_from_remote
match ValidName::parse_os_str(&name) {
Ok(valid_name) => {
let lookup = inner.update_from_remote(self.dir_ino, valid_name, Some(remote_lookup))?;
Ok(Some(lookup))
}
Err(_) => {
// Invalid name, skip this entry
Ok(None)
}
}
}
(_, None) => {
// This shouldn't happen for remote entries
Ok(None)
}
}
}

/// Re-add an entry to the front of the queue if the consumer wasn't able to use it
pub fn readd(&self, entry: LookedUpInode) {
let old = self.readded.lock().unwrap().replace(entry);
assert!(old.is_none(), "cannot readd more than one entry");
}

/// Return the inode number of the parent directory of this directory handle
pub fn parent(&self) -> InodeNo {
self.parent_ino
}
}

/// A single entry in a readdir stream. Remote entries have not yet been converted to inodes -- that
Expand Down Expand Up @@ -638,3 +677,53 @@ impl DirHandle {
self.offset.store(0, Ordering::SeqCst);
}
}

#[cfg(test)]
mod tests {
use crate::fs::FUSE_ROOT_INODE;
use crate::metablock::{InodeKind, Metablock};
use crate::s3::S3Path;
use crate::superblock::Superblock;
use mountpoint_s3_client::mock_client::MockClient;
use std::sync::Arc;
/// Verifies readdir handles gracefully skip deleted local inodes.
/// Creates a file, obtains a readdir handle, deletes the file, then uses the handle.
/// Should complete successfully when local inodes are deleted concurrently.
#[tokio::test]
async fn test_readdir_race_condition() {
let bucket = crate::s3::Bucket::new("test-bucket").unwrap();
let client = Arc::new(MockClient::config().bucket(bucket.to_string()).build());
let superblock = Superblock::new(
client.clone(),
S3Path::new(bucket, Default::default()),
Default::default(),
);

let filename = "test_file.txt";

let lookup = superblock
.create(FUSE_ROOT_INODE, filename.as_ref(), InodeKind::File)
.await
.expect("Create failed");

superblock
.start_writing(lookup.ino(), &Default::default(), false)
.await
.expect("Start writing failed");

let handle_id = superblock
.new_readdir_handle(FUSE_ROOT_INODE)
.await
.expect("Failed to create readdir handle");

superblock
.finish_writing(lookup.ino(), None)
.await
.expect("Finish writing failed");

superblock
.readdir(FUSE_ROOT_INODE, handle_id, 0, false, Box::new(|_, _, _, _| false))
.await
.expect("Readdir failed");
}
}
1 change: 1 addition & 0 deletions mountpoint-s3/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
## Unreleased (v1.21.0)

* Change default logging level from WARN to INFO to improve visibility of important operational messages. ([#1605](https://github.com/awslabs/mountpoint-s3/pull/1605))
* Fixed "file does not exist" errors during concurrent directory listing operations ([#1648](https://github.com/awslabs/mountpoint-s3/pull/1648))

## v1.20.0 (Sep 12, 2025)

Expand Down
Loading