From 7b636bdb4230fc4ef01c81e696ed29db302e369e Mon Sep 17 00:00:00 2001 From: zyfjeff Date: Thu, 11 Apr 2024 14:45:57 +0800 Subject: [PATCH] fix request size too large when use direct io The fuse directio call path is as follows fuse_read_fill+0xa8/0xb0 fuse_send_read+0x3f/0xb0 fuse_direct_io+0x34a/0x5f0 __fuse_direct_read+0x4e/0x70 fuse_file_read_iter+0x9e/0x140 new_sync_read+0xde/0x120 __vfs_read+0x27/0x40 vfs_read+0x94/0x190 ksys_read+0x4e/0xd0 under the direct io path, fuse initiates a request whose request size is determined by the combination of the user-supplied buffer size and max_read mount parameters. size_t nmax = write ? fc->max_write : fc->max_read; size_t count = iov_iter_count(iter); size_t nbytes = min(count, nmax); nres = fuse_send_read(req, io, pos, nbytes, owner); so we have a problem with the checking of the request size in the code, we always compare the request size with MAX_BUFFER_SIZE, but in fact the maximum value of the request size depends on max_read, in virtiofs scenario max_read is UINT_MAX by default, and in fuse scenario it is possible to adjust max_read by the mounting parameter. the current implementation of fuse-backend-rs uses a fixed buffer to store the fuse response the default value of this buffer is as follows, but in fact, the kernel in the direct io path, the size of the request may be larger than the length of this buffer, which leads to the buffer is not enough to fill the read content, resulting in read failure. so here we limit the size of max_read to the length of our buffer so that the fuse kernel will not send requests that exceed the length of the buffer. in virtiofs scene max_read can't be adjusted, his default is UINT_MAX, but we don't have to worry about it, because the buffer is allocated by the kernel driver, we just use this buffer to fill the response, so we don't need to do any adjustment. Signed-off-by: zyfjeff --- src/api/server/async_io.rs | 6 ------ src/api/server/sync_io.rs | 19 ------------------- src/transport/fusedev/linux_session.rs | 15 ++++++++++++++- 3 files changed, 14 insertions(+), 26 deletions(-) diff --git a/src/api/server/async_io.rs b/src/api/server/async_io.rs index 463c292c2..87ce8b1cd 100644 --- a/src/api/server/async_io.rs +++ b/src/api/server/async_io.rs @@ -322,12 +322,6 @@ impl Server { .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx - .async_reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)) - .await; - } - let owner = if read_flags & READ_LOCKOWNER != 0 { Some(lock_owner) } else { diff --git a/src/api/server/sync_io.rs b/src/api/server/sync_io.rs index 32a22ecff..3ce15b2f3 100644 --- a/src/api/server/sync_io.rs +++ b/src/api/server/sync_io.rs @@ -436,10 +436,6 @@ impl Server { .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); - } - let owner = if read_flags & READ_LOCKOWNER != 0 { Some(lock_owner) } else { @@ -495,10 +491,6 @@ impl Server { .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); - } - let owner = if fuse_flags & WRITE_LOCKOWNER != 0 { Some(lock_owner) } else { @@ -618,9 +610,6 @@ impl Server { pub(super) fn getxattr(&self, mut ctx: SrvContext<'_, F, S>) -> Result { let GetxattrIn { size, .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); - } let buf = ServerUtil::get_message_body(&mut ctx.r, &ctx.in_header, size_of::())?; @@ -647,10 +636,6 @@ impl Server { pub(super) fn listxattr(&self, mut ctx: SrvContext<'_, F, S>) -> Result { let GetxattrIn { size, .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); - } - match self.fs.listxattr(ctx.context(), ctx.nodeid(), size) { Ok(ListxattrReply::Names(val)) => ctx.reply_ok(None::, Some(&val)), Ok(ListxattrReply::Count(count)) => { @@ -820,10 +805,6 @@ impl Server { fh, offset, size, .. } = ctx.r.read_obj().map_err(Error::DecodeMessage)?; - if size > MAX_BUFFER_SIZE { - return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); - } - let available_bytes = ctx.w.available_bytes(); if available_bytes < size as usize { return ctx.reply_error_explicit(io::Error::from_raw_os_error(libc::ENOMEM)); diff --git a/src/transport/fusedev/linux_session.rs b/src/transport/fusedev/linux_session.rs index 3e880dff4..0d3ba8362 100644 --- a/src/transport/fusedev/linux_session.rs +++ b/src/transport/fusedev/linux_session.rs @@ -434,12 +434,25 @@ fn fuse_kern_mount( let meta = mountpoint .metadata() .map_err(|e| SessionFailure(format!("stat {mountpoint:?}: {e}")))?; + // the current implementation of fuse-backend-rs uses a fixed buffer to store the fuse response, + // the default value of this buffer is as follows, but in fact, the kernel in the direct io path, + // the size of the request may be larger than the length of this buffer (this is determined by + // the max_read option to determine the maximum size of kernel requests, the default value is + // a very large number), which leads to the buffer is not enough to fill the read content, + // resulting in read failure. so here we limit the size of max_read to the length of our buffer, + // so that the fuse kernel will not send requests that exceed the length of the buffer. + // in virtiofs scene max_read can't be adjusted, his default is UINT_MAX, but we don't have to + // worry about it, because the buffer is allocated by the kernel driver, we just use this buffer + // to fill the response, so we don't need to do any adjustment. + let max_read = FUSE_KERN_BUF_PAGES * pagesize() + FUSE_HEADER_SIZE; + let mut opts = format!( - "default_permissions,fd={},rootmode={:o},user_id={},group_id={}", + "default_permissions,fd={},rootmode={:o},user_id={},group_id={},max_read={}", file.as_raw_fd(), meta.permissions().mode() & libc::S_IFMT, getuid(), getgid(), + max_read ); if allow_other { opts.push_str(",allow_other");