Skip to content

Commit 40f45fe

Browse files
committed
userfaultfd: convert to ->read_iter()
Rather than use the older style ->read() hook, use ->read_iter() so that userfaultfd can support both O_NONBLOCK and IOCB_NOWAIT for non-blocking read attempts. Split the fd setup into two parts, so that userfaultfd can mark the file mode with FMODE_NOWAIT before installing it into the process table. With that, we can also defer grabbing the mm until we know the rest will succeed, as the fd isn't visible before then. Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent d949799 commit 40f45fe

File tree

1 file changed

+28
-16
lines changed

1 file changed

+28
-16
lines changed

fs/userfaultfd.c

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <linux/hugetlb.h>
3232
#include <linux/swapops.h>
3333
#include <linux/miscdevice.h>
34+
#include <linux/uio.h>
3435

3536
static int sysctl_unprivileged_userfaultfd __read_mostly;
3637

@@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
282283
/*
283284
* Verify the pagetables are still not ok after having reigstered into
284285
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
285-
* userfault that has already been resolved, if userfaultfd_read and
286+
* userfault that has already been resolved, if userfaultfd_read_iter and
286287
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
287288
* threads.
288289
*/
@@ -1177,34 +1178,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
11771178
return ret;
11781179
}
11791180

1180-
static ssize_t userfaultfd_read(struct file *file, char __user *buf,
1181-
size_t count, loff_t *ppos)
1181+
static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
11821182
{
1183+
struct file *file = iocb->ki_filp;
11831184
struct userfaultfd_ctx *ctx = file->private_data;
11841185
ssize_t _ret, ret = 0;
11851186
struct uffd_msg msg;
1186-
int no_wait = file->f_flags & O_NONBLOCK;
11871187
struct inode *inode = file_inode(file);
1188+
bool no_wait;
11881189

11891190
if (!userfaultfd_is_initialized(ctx))
11901191
return -EINVAL;
11911192

1193+
no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
11921194
for (;;) {
1193-
if (count < sizeof(msg))
1195+
if (iov_iter_count(to) < sizeof(msg))
11941196
return ret ? ret : -EINVAL;
11951197
_ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
11961198
if (_ret < 0)
11971199
return ret ? ret : _ret;
1198-
if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg)))
1200+
_ret = !copy_to_iter_full(&msg, sizeof(msg), to);
1201+
if (_ret)
11991202
return ret ? ret : -EFAULT;
12001203
ret += sizeof(msg);
1201-
buf += sizeof(msg);
1202-
count -= sizeof(msg);
12031204
/*
12041205
* Allow to read more than one fault at time but only
12051206
* block if waiting for the very first one.
12061207
*/
1207-
no_wait = O_NONBLOCK;
1208+
no_wait = true;
12081209
}
12091210
}
12101211

@@ -2172,7 +2173,7 @@ static const struct file_operations userfaultfd_fops = {
21722173
#endif
21732174
.release = userfaultfd_release,
21742175
.poll = userfaultfd_poll,
2175-
.read = userfaultfd_read,
2176+
.read_iter = userfaultfd_read_iter,
21762177
.unlocked_ioctl = userfaultfd_ioctl,
21772178
.compat_ioctl = compat_ptr_ioctl,
21782179
.llseek = noop_llseek,
@@ -2192,6 +2193,7 @@ static void init_once_userfaultfd_ctx(void *mem)
21922193
static int new_userfaultfd(int flags)
21932194
{
21942195
struct userfaultfd_ctx *ctx;
2196+
struct file *file;
21952197
int fd;
21962198

21972199
BUG_ON(!current->mm);
@@ -2215,16 +2217,26 @@ static int new_userfaultfd(int flags)
22152217
init_rwsem(&ctx->map_changing_lock);
22162218
atomic_set(&ctx->mmap_changing, 0);
22172219
ctx->mm = current->mm;
2218-
/* prevent the mm struct to be freed */
2219-
mmgrab(ctx->mm);
2220+
2221+
fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
2222+
if (fd < 0)
2223+
goto err_out;
22202224

22212225
/* Create a new inode so that the LSM can block the creation. */
2222-
fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
2226+
file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
22232227
O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
2224-
if (fd < 0) {
2225-
mmdrop(ctx->mm);
2226-
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
2228+
if (IS_ERR(file)) {
2229+
put_unused_fd(fd);
2230+
fd = PTR_ERR(file);
2231+
goto err_out;
22272232
}
2233+
/* prevent the mm struct to be freed */
2234+
mmgrab(ctx->mm);
2235+
file->f_mode |= FMODE_NOWAIT;
2236+
fd_install(fd, file);
2237+
return fd;
2238+
err_out:
2239+
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
22282240
return fd;
22292241
}
22302242

0 commit comments

Comments
 (0)