Skip to content

Commit 3a93dae

Browse files
committed
Merge branch 'read_iter' of git://git.kernel.dk/linux
Pull read_iter updates from Jens Axboe: There are still a few users of fops->read() in the core parts of the fs stack. Which is a shame, since it'd be nice to get rid of the non-iterator parts of down the line, and reclaim that part of the file_operations struct. Outside of moving in that direction as a cleanup, using ->read_iter() enables us to mark them with FMODE_NOWAIT. This is important for users like io_uring, where per-IO nonblocking hints make a difference in how efficiently IO can be done. Those two things are my main motivation for starting this work, with hopefully more to come down the line. All patches have been booted and tested, and the corresponding test cases from ltp have been run. * 'read_iter' of git://git.kernel.dk/linux: (4 commits) signalfd: convert to ->read_iter() userfaultfd: convert to ->read_iter() timerfd: convert to ->read_iter() new helper: copy_to_iter_full() Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents fec50db + fbe3812 commit 3a93dae

File tree

5 files changed

+93
-50
lines changed

5 files changed

+93
-50
lines changed

fs/signalfd.c

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,7 @@ static __poll_t signalfd_poll(struct file *file, poll_table *wait)
6868
/*
6969
* Copied from copy_siginfo_to_user() in kernel/signal.c
7070
*/
71-
static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
72-
kernel_siginfo_t const *kinfo)
71+
static int signalfd_copyinfo(struct iov_iter *to, kernel_siginfo_t const *kinfo)
7372
{
7473
struct signalfd_siginfo new;
7574

@@ -146,10 +145,10 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
146145
break;
147146
}
148147

149-
if (copy_to_user(uinfo, &new, sizeof(struct signalfd_siginfo)))
148+
if (!copy_to_iter_full(&new, sizeof(struct signalfd_siginfo), to))
150149
return -EFAULT;
151150

152-
return sizeof(*uinfo);
151+
return sizeof(struct signalfd_siginfo);
153152
}
154153

155154
static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info,
@@ -199,28 +198,27 @@ static ssize_t signalfd_dequeue(struct signalfd_ctx *ctx, kernel_siginfo_t *info
199198
* error code. The "count" parameter must be at least the size of a
200199
* "struct signalfd_siginfo".
201200
*/
202-
static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
203-
loff_t *ppos)
201+
static ssize_t signalfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
204202
{
203+
struct file *file = iocb->ki_filp;
205204
struct signalfd_ctx *ctx = file->private_data;
206-
struct signalfd_siginfo __user *siginfo;
207-
int nonblock = file->f_flags & O_NONBLOCK;
205+
size_t count = iov_iter_count(to);
208206
ssize_t ret, total = 0;
209207
kernel_siginfo_t info;
208+
bool nonblock;
210209

211210
count /= sizeof(struct signalfd_siginfo);
212211
if (!count)
213212
return -EINVAL;
214213

215-
siginfo = (struct signalfd_siginfo __user *) buf;
214+
nonblock = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
216215
do {
217216
ret = signalfd_dequeue(ctx, &info, nonblock);
218217
if (unlikely(ret <= 0))
219218
break;
220-
ret = signalfd_copyinfo(siginfo, &info);
219+
ret = signalfd_copyinfo(to, &info);
221220
if (ret < 0)
222221
break;
223-
siginfo++;
224222
total += ret;
225223
nonblock = 1;
226224
} while (--count);
@@ -246,7 +244,7 @@ static const struct file_operations signalfd_fops = {
246244
#endif
247245
.release = signalfd_release,
248246
.poll = signalfd_poll,
249-
.read = signalfd_read,
247+
.read_iter = signalfd_read_iter,
250248
.llseek = noop_llseek,
251249
};
252250

@@ -265,20 +263,34 @@ static int do_signalfd4(int ufd, sigset_t *mask, int flags)
265263
signotset(mask);
266264

267265
if (ufd == -1) {
266+
struct file *file;
267+
268268
ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
269269
if (!ctx)
270270
return -ENOMEM;
271271

272272
ctx->sigmask = *mask;
273273

274+
ufd = get_unused_fd_flags(flags & O_CLOEXEC);
275+
if (ufd < 0) {
276+
kfree(ctx);
277+
return ufd;
278+
}
279+
280+
file = anon_inode_getfile("[signalfd]", &signalfd_fops, ctx,
281+
O_RDWR | (flags & O_NONBLOCK));
282+
if (IS_ERR(file)) {
283+
put_unused_fd(ufd);
284+
kfree(ctx);
285+
return ufd;
286+
}
287+
file->f_mode |= FMODE_NOWAIT;
288+
274289
/*
275290
* When we call this, the initialization must be complete, since
276291
* anon_inode_getfd() will install the fd.
277292
*/
278-
ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
279-
O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
280-
if (ufd < 0)
281-
kfree(ctx);
293+
fd_install(ufd, file);
282294
} else {
283295
struct fd f = fdget(ufd);
284296
if (!f.file)

fs/timerfd.c

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -262,17 +262,18 @@ static __poll_t timerfd_poll(struct file *file, poll_table *wait)
262262
return events;
263263
}
264264

265-
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
266-
loff_t *ppos)
265+
static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
267266
{
267+
struct file *file = iocb->ki_filp;
268268
struct timerfd_ctx *ctx = file->private_data;
269269
ssize_t res;
270270
u64 ticks = 0;
271271

272-
if (count < sizeof(ticks))
272+
if (iov_iter_count(to) < sizeof(ticks))
273273
return -EINVAL;
274+
274275
spin_lock_irq(&ctx->wqh.lock);
275-
if (file->f_flags & O_NONBLOCK)
276+
if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT)
276277
res = -EAGAIN;
277278
else
278279
res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
@@ -312,8 +313,11 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
312313
ctx->ticks = 0;
313314
}
314315
spin_unlock_irq(&ctx->wqh.lock);
315-
if (ticks)
316-
res = put_user(ticks, (u64 __user *) buf) ? -EFAULT: sizeof(ticks);
316+
if (ticks) {
317+
res = copy_to_iter(&ticks, sizeof(ticks), to);
318+
if (!res)
319+
res = -EFAULT;
320+
}
317321
return res;
318322
}
319323

@@ -384,7 +388,7 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
384388
static const struct file_operations timerfd_fops = {
385389
.release = timerfd_release,
386390
.poll = timerfd_poll,
387-
.read = timerfd_read,
391+
.read_iter = timerfd_read_iter,
388392
.llseek = noop_llseek,
389393
.show_fdinfo = timerfd_show,
390394
.unlocked_ioctl = timerfd_ioctl,
@@ -407,6 +411,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
407411
{
408412
int ufd;
409413
struct timerfd_ctx *ctx;
414+
struct file *file;
410415

411416
/* Check the TFD_* constants for consistency. */
412417
BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC);
@@ -443,11 +448,22 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
443448

444449
ctx->moffs = ktime_mono_to_real(0);
445450

446-
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
447-
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
448-
if (ufd < 0)
451+
ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS);
452+
if (ufd < 0) {
453+
kfree(ctx);
454+
return ufd;
455+
}
456+
457+
file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx,
458+
O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
459+
if (IS_ERR(file)) {
460+
put_unused_fd(ufd);
449461
kfree(ctx);
462+
return PTR_ERR(file);
463+
}
450464

465+
file->f_mode |= FMODE_NOWAIT;
466+
fd_install(ufd, file);
451467
return ufd;
452468
}
453469

fs/userfaultfd.c

Lines changed: 28 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
#include <linux/hugetlb.h>
3232
#include <linux/swapops.h>
3333
#include <linux/miscdevice.h>
34+
#include <linux/uio.h>
3435

3536
static int sysctl_unprivileged_userfaultfd __read_mostly;
3637

@@ -282,7 +283,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx,
282283
/*
283284
* Verify the pagetables are still not ok after having reigstered into
284285
* the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any
285-
* userfault that has already been resolved, if userfaultfd_read and
286+
* userfault that has already been resolved, if userfaultfd_read_iter and
286287
* UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different
287288
* threads.
288289
*/
@@ -1177,34 +1178,34 @@ static ssize_t userfaultfd_ctx_read(struct userfaultfd_ctx *ctx, int no_wait,
11771178
return ret;
11781179
}
11791180

1180-
static ssize_t userfaultfd_read(struct file *file, char __user *buf,
1181-
size_t count, loff_t *ppos)
1181+
static ssize_t userfaultfd_read_iter(struct kiocb *iocb, struct iov_iter *to)
11821182
{
1183+
struct file *file = iocb->ki_filp;
11831184
struct userfaultfd_ctx *ctx = file->private_data;
11841185
ssize_t _ret, ret = 0;
11851186
struct uffd_msg msg;
1186-
int no_wait = file->f_flags & O_NONBLOCK;
11871187
struct inode *inode = file_inode(file);
1188+
bool no_wait;
11881189

11891190
if (!userfaultfd_is_initialized(ctx))
11901191
return -EINVAL;
11911192

1193+
no_wait = file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT;
11921194
for (;;) {
1193-
if (count < sizeof(msg))
1195+
if (iov_iter_count(to) < sizeof(msg))
11941196
return ret ? ret : -EINVAL;
11951197
_ret = userfaultfd_ctx_read(ctx, no_wait, &msg, inode);
11961198
if (_ret < 0)
11971199
return ret ? ret : _ret;
1198-
if (copy_to_user((__u64 __user *) buf, &msg, sizeof(msg)))
1200+
_ret = !copy_to_iter_full(&msg, sizeof(msg), to);
1201+
if (_ret)
11991202
return ret ? ret : -EFAULT;
12001203
ret += sizeof(msg);
1201-
buf += sizeof(msg);
1202-
count -= sizeof(msg);
12031204
/*
12041205
* Allow to read more than one fault at time but only
12051206
* block if waiting for the very first one.
12061207
*/
1207-
no_wait = O_NONBLOCK;
1208+
no_wait = true;
12081209
}
12091210
}
12101211

@@ -2172,7 +2173,7 @@ static const struct file_operations userfaultfd_fops = {
21722173
#endif
21732174
.release = userfaultfd_release,
21742175
.poll = userfaultfd_poll,
2175-
.read = userfaultfd_read,
2176+
.read_iter = userfaultfd_read_iter,
21762177
.unlocked_ioctl = userfaultfd_ioctl,
21772178
.compat_ioctl = compat_ptr_ioctl,
21782179
.llseek = noop_llseek,
@@ -2192,6 +2193,7 @@ static void init_once_userfaultfd_ctx(void *mem)
21922193
static int new_userfaultfd(int flags)
21932194
{
21942195
struct userfaultfd_ctx *ctx;
2196+
struct file *file;
21952197
int fd;
21962198

21972199
BUG_ON(!current->mm);
@@ -2215,16 +2217,26 @@ static int new_userfaultfd(int flags)
22152217
init_rwsem(&ctx->map_changing_lock);
22162218
atomic_set(&ctx->mmap_changing, 0);
22172219
ctx->mm = current->mm;
2218-
/* prevent the mm struct to be freed */
2219-
mmgrab(ctx->mm);
2220+
2221+
fd = get_unused_fd_flags(flags & UFFD_SHARED_FCNTL_FLAGS);
2222+
if (fd < 0)
2223+
goto err_out;
22202224

22212225
/* Create a new inode so that the LSM can block the creation. */
2222-
fd = anon_inode_create_getfd("[userfaultfd]", &userfaultfd_fops, ctx,
2226+
file = anon_inode_create_getfile("[userfaultfd]", &userfaultfd_fops, ctx,
22232227
O_RDONLY | (flags & UFFD_SHARED_FCNTL_FLAGS), NULL);
2224-
if (fd < 0) {
2225-
mmdrop(ctx->mm);
2226-
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
2228+
if (IS_ERR(file)) {
2229+
put_unused_fd(fd);
2230+
fd = PTR_ERR(file);
2231+
goto err_out;
22272232
}
2233+
/* prevent the mm struct to be freed */
2234+
mmgrab(ctx->mm);
2235+
file->f_mode |= FMODE_NOWAIT;
2236+
fd_install(fd, file);
2237+
return fd;
2238+
err_out:
2239+
kmem_cache_free(userfaultfd_ctx_cachep, ctx);
22282240
return fd;
22292241
}
22302242

include/linux/uio.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,16 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
205205
return 0;
206206
}
207207

208+
static __always_inline __must_check
209+
bool copy_to_iter_full(const void *addr, size_t bytes, struct iov_iter *i)
210+
{
211+
size_t copied = copy_to_iter(addr, bytes, i);
212+
if (likely(copied == bytes))
213+
return true;
214+
iov_iter_revert(i, copied);
215+
return false;
216+
}
217+
208218
static __always_inline __must_check
209219
bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
210220
{

include/net/udp.h

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -379,14 +379,7 @@ static inline bool udp_skb_is_linear(struct sk_buff *skb)
379379
static inline int copy_linear_skb(struct sk_buff *skb, int len, int off,
380380
struct iov_iter *to)
381381
{
382-
int n;
383-
384-
n = copy_to_iter(skb->data + off, len, to);
385-
if (n == len)
386-
return 0;
387-
388-
iov_iter_revert(to, n);
389-
return -EFAULT;
382+
return copy_to_iter_full(skb->data + off, len, to) ? 0 : -EFAULT;
390383
}
391384

392385
/*

0 commit comments

Comments
 (0)