Skip to content

Commit 54126fa

Browse files
committed
Merge tag 'vfs-6.9.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull iomap updates from Christian Brauner: - Restore read-write hints in struct bio through the bi_write_hint member for the sake of UFS devices in mobile applications. This can result in up to 40% lower write amplification in UFS devices. The patch series that builds on this will be coming in via the SCSI maintainers (Bart) - Overhaul the iomap writeback code. Afterwards ->map_blocks() is able to map multiple blocks at once as long as they're in the same folio. This reduces CPU usage for buffered write workloads on e.g., xfs on systems with lots of cores (Christoph) - Record processed bytes in iomap_iter() trace event (Kassey) - Extend iomap_writepage_map() trace event after Christoph's ->map_block() changes to map mutliple blocks at once (Zhang) * tag 'vfs-6.9.iomap' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (22 commits) iomap: Add processed for iomap_iter iomap: add pos and dirty_len into trace_iomap_writepage_map block, fs: Restore the per-bio/request data lifetime fields fs: Propagate write hints to the struct block_device inode fs: Move enum rw_hint into a new header file fs: Split fcntl_rw_hint() fs: Verify write lifetime constants at compile time fs: Fix rw_hint validation iomap: pass the length of the dirty region to ->map_blocks iomap: map multiple blocks at a time iomap: submit ioends immediately iomap: factor out a iomap_writepage_map_block helper iomap: only call mapping_set_error once for each failed bio iomap: don't chain bios iomap: move the iomap_sector sector calculation out of iomap_add_to_ioend iomap: clean up the iomap_alloc_ioend calling convention iomap: move all remaining per-folio logic into iomap_writepage_map iomap: factor out a iomap_writepage_handle_eof helper iomap: move the PF_MEMALLOC check to iomap_writepages iomap: move the io_folios field out of struct iomap_ioend ...
2 parents 7741794 + 86835c3 commit 54126fa

File tree

23 files changed

+455
-350
lines changed

23 files changed

+455
-350
lines changed

block/bio.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
251251
bio->bi_opf = opf;
252252
bio->bi_flags = 0;
253253
bio->bi_ioprio = 0;
254+
bio->bi_write_hint = 0;
254255
bio->bi_status = 0;
255256
bio->bi_iter.bi_sector = 0;
256257
bio->bi_iter.bi_size = 0;
@@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
813814
{
814815
bio_set_flag(bio, BIO_CLONED);
815816
bio->bi_ioprio = bio_src->bi_ioprio;
817+
bio->bi_write_hint = bio_src->bi_write_hint;
816818
bio->bi_iter = bio_src->bi_iter;
817819

818820
if (bio->bi_bdev) {

block/blk-crypto-fallback.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
172172
if (bio_flagged(bio_src, BIO_REMAPPED))
173173
bio_set_flag(bio, BIO_REMAPPED);
174174
bio->bi_ioprio = bio_src->bi_ioprio;
175+
bio->bi_write_hint = bio_src->bi_write_hint;
175176
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
176177
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
177178

block/blk-merge.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q,
810810
if (rq_data_dir(req) != rq_data_dir(next))
811811
return NULL;
812812

813+
/* Don't merge requests with different write hints. */
814+
if (req->write_hint != next->write_hint)
815+
return NULL;
816+
813817
if (req->ioprio != next->ioprio)
814818
return NULL;
815819

@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
937941
if (!bio_crypt_rq_ctx_compatible(rq, bio))
938942
return false;
939943

944+
/* Don't merge requests with different write hints. */
945+
if (rq->write_hint != bio->bi_write_hint)
946+
return false;
947+
940948
if (rq->ioprio != bio_prio(bio))
941949
return false;
942950

block/blk-mq.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2584,6 +2584,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
25842584
rq->cmd_flags |= REQ_FAILFAST_MASK;
25852585

25862586
rq->__sector = bio->bi_iter.bi_sector;
2587+
rq->write_hint = bio->bi_write_hint;
25872588
blk_rq_bio_prep(rq, bio, nr_segs);
25882589

25892590
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
@@ -3175,6 +3176,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
31753176
}
31763177
rq->nr_phys_segments = rq_src->nr_phys_segments;
31773178
rq->ioprio = rq_src->ioprio;
3179+
rq->write_hint = rq_src->write_hint;
31783180

31793181
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
31803182
goto free_and_out;

block/bounce.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
169169
if (bio_flagged(bio_src, BIO_REMAPPED))
170170
bio_set_flag(bio, BIO_REMAPPED);
171171
bio->bi_ioprio = bio_src->bi_ioprio;
172+
bio->bi_write_hint = bio_src->bi_write_hint;
172173
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
173174
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
174175

block/fops.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
7373
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
7474
}
7575
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
76+
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
7677
bio.bi_ioprio = iocb->ki_ioprio;
7778

7879
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
203204

204205
for (;;) {
205206
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
207+
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
206208
bio->bi_private = dio;
207209
bio->bi_end_io = blkdev_bio_end_io;
208210
bio->bi_ioprio = iocb->ki_ioprio;
@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
321323
dio->flags = 0;
322324
dio->iocb = iocb;
323325
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
326+
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
324327
bio->bi_end_io = blkdev_bio_end_io_async;
325328
bio->bi_ioprio = iocb->ki_ioprio;
326329

@@ -482,7 +485,7 @@ static void blkdev_readahead(struct readahead_control *rac)
482485
}
483486

484487
static int blkdev_map_blocks(struct iomap_writepage_ctx *wpc,
485-
struct inode *inode, loff_t offset)
488+
struct inode *inode, loff_t offset, unsigned int len)
486489
{
487490
loff_t isize = i_size_read(inode);
488491

fs/buffer.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555

5656
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
5757
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
58-
struct writeback_control *wbc);
58+
enum rw_hint hint, struct writeback_control *wbc);
5959

6060
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
6161

@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
18891889
do {
18901890
struct buffer_head *next = bh->b_this_page;
18911891
if (buffer_async_write(bh)) {
1892-
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1892+
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
1893+
inode->i_write_hint, wbc);
18931894
nr_underway++;
18941895
}
18951896
bh = next;
@@ -1944,7 +1945,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
19441945
struct buffer_head *next = bh->b_this_page;
19451946
if (buffer_async_write(bh)) {
19461947
clear_buffer_dirty(bh);
1947-
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1948+
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
1949+
inode->i_write_hint, wbc);
19481950
nr_underway++;
19491951
}
19501952
bh = next;
@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
27562758
}
27572759

27582760
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
2761+
enum rw_hint write_hint,
27592762
struct writeback_control *wbc)
27602763
{
27612764
const enum req_op op = opf & REQ_OP_MASK;
@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
27832786
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
27842787

27852788
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2789+
bio->bi_write_hint = write_hint;
27862790

27872791
__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
27882792

@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
28022806

28032807
void submit_bh(blk_opf_t opf, struct buffer_head *bh)
28042808
{
2805-
submit_bh_wbc(opf, bh, NULL);
2809+
submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
28062810
}
28072811
EXPORT_SYMBOL(submit_bh);
28082812

fs/direct-io.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
410410
bio->bi_end_io = dio_bio_end_io;
411411
if (dio->is_pinned)
412412
bio_set_flag(bio, BIO_PAGE_PINNED);
413+
bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
414+
413415
sdio->bio = bio;
414416
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
415417
}

fs/f2fs/f2fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/blkdev.h>
2525
#include <linux/quotaops.h>
2626
#include <linux/part_stat.h>
27+
#include <linux/rw_hint.h>
2728
#include <crypto/hash.h>
2829

2930
#include <linux/fscrypt.h>

fs/fcntl.c

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <linux/memfd.h>
2828
#include <linux/compat.h>
2929
#include <linux/mount.h>
30+
#include <linux/rw_hint.h>
3031

3132
#include <linux/poll.h>
3233
#include <asm/siginfo.h>
@@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
268269
}
269270
#endif
270271

271-
static bool rw_hint_valid(enum rw_hint hint)
272+
static bool rw_hint_valid(u64 hint)
272273
{
274+
BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
275+
BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
276+
BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
277+
BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
278+
BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
279+
BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
280+
273281
switch (hint) {
274282
case RWH_WRITE_LIFE_NOT_SET:
275283
case RWH_WRITE_LIFE_NONE:
@@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint)
283291
}
284292
}
285293

286-
static long fcntl_rw_hint(struct file *file, unsigned int cmd,
287-
unsigned long arg)
294+
static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
295+
unsigned long arg)
288296
{
289297
struct inode *inode = file_inode(file);
290298
u64 __user *argp = (u64 __user *)arg;
291-
enum rw_hint hint;
292-
u64 h;
299+
u64 hint = READ_ONCE(inode->i_write_hint);
293300

294-
switch (cmd) {
295-
case F_GET_RW_HINT:
296-
h = inode->i_write_hint;
297-
if (copy_to_user(argp, &h, sizeof(*argp)))
298-
return -EFAULT;
299-
return 0;
300-
case F_SET_RW_HINT:
301-
if (copy_from_user(&h, argp, sizeof(h)))
302-
return -EFAULT;
303-
hint = (enum rw_hint) h;
304-
if (!rw_hint_valid(hint))
305-
return -EINVAL;
301+
if (copy_to_user(argp, &hint, sizeof(*argp)))
302+
return -EFAULT;
303+
return 0;
304+
}
306305

307-
inode_lock(inode);
308-
inode->i_write_hint = hint;
309-
inode_unlock(inode);
310-
return 0;
311-
default:
306+
static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
307+
unsigned long arg)
308+
{
309+
struct inode *inode = file_inode(file);
310+
u64 __user *argp = (u64 __user *)arg;
311+
u64 hint;
312+
313+
if (copy_from_user(&hint, argp, sizeof(hint)))
314+
return -EFAULT;
315+
if (!rw_hint_valid(hint))
312316
return -EINVAL;
313-
}
317+
318+
WRITE_ONCE(inode->i_write_hint, hint);
319+
320+
/*
321+
* file->f_mapping->host may differ from inode. As an example,
322+
* blkdev_open() modifies file->f_mapping.
323+
*/
324+
if (file->f_mapping->host != inode)
325+
WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
326+
327+
return 0;
314328
}
315329

316330
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
416430
err = memfd_fcntl(filp, cmd, argi);
417431
break;
418432
case F_GET_RW_HINT:
433+
err = fcntl_get_rw_hint(filp, cmd, arg);
434+
break;
419435
case F_SET_RW_HINT:
420-
err = fcntl_rw_hint(filp, cmd, arg);
436+
err = fcntl_set_rw_hint(filp, cmd, arg);
421437
break;
422438
default:
423439
break;

0 commit comments

Comments
 (0)