Skip to content

Commit 86835c3

Browse files
committed
Merge tag 'vfs-6.9.rw_hint' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs
Pull write hint fix from Christian Brauner: UFS devices are widely used in mobile applications, e.g. in smartphones. UFS vendors need data lifetime information to achieve good performance. Providing data lifetime information to UFS devices can result in up to 40% lower write amplification. Hence this patch series that restores the bi_write_hint member in struct bio. After this patch series has been merged, patches that implement data lifetime support in the SCSI disk (sd) driver will be sent to the Linux kernel SCSI maintainer. The following changes are included in this patch series: - Improvements for the F_GET_RW_HINT and F_SET_RW_HINT fcntls. - Move enum rw_hint into a new header file. - Support F_SET_RW_HINT for block devices to make it easy to test data lifetime support. - Restore the bio.bi_write_hint member and restore support in the VFS layer and also in the block layer for data lifetime information. The shell script that has been used to test the patch series combined with the SCSI patches is available at the end of this cover letter. * tag 'vfs-6.9.rw_hint' of gitolite.kernel.org:pub/scm/linux/kernel/git/vfs/vfs: block, fs: Restore the per-bio/request data lifetime fields fs: Propagate write hints to the struct block_device inode fs: Move enum rw_hint into a new header file fs: Split fcntl_rw_hint() fs: Verify write lifetime constants at compile time fs: Fix rw_hint validation Signed-off-by: Christian Brauner <brauner@kernel.org>
2 parents dcd04ea + 4498135 commit 86835c3

File tree

18 files changed

+102
-42
lines changed

18 files changed

+102
-42
lines changed

block/bio.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table,
251251
bio->bi_opf = opf;
252252
bio->bi_flags = 0;
253253
bio->bi_ioprio = 0;
254+
bio->bi_write_hint = 0;
254255
bio->bi_status = 0;
255256
bio->bi_iter.bi_sector = 0;
256257
bio->bi_iter.bi_size = 0;
@@ -813,6 +814,7 @@ static int __bio_clone(struct bio *bio, struct bio *bio_src, gfp_t gfp)
813814
{
814815
bio_set_flag(bio, BIO_CLONED);
815816
bio->bi_ioprio = bio_src->bi_ioprio;
817+
bio->bi_write_hint = bio_src->bi_write_hint;
816818
bio->bi_iter = bio_src->bi_iter;
817819

818820
if (bio->bi_bdev) {

block/blk-crypto-fallback.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,7 @@ static struct bio *blk_crypto_fallback_clone_bio(struct bio *bio_src)
172172
if (bio_flagged(bio_src, BIO_REMAPPED))
173173
bio_set_flag(bio, BIO_REMAPPED);
174174
bio->bi_ioprio = bio_src->bi_ioprio;
175+
bio->bi_write_hint = bio_src->bi_write_hint;
175176
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
176177
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
177178

block/blk-merge.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -810,6 +810,10 @@ static struct request *attempt_merge(struct request_queue *q,
810810
if (rq_data_dir(req) != rq_data_dir(next))
811811
return NULL;
812812

813+
/* Don't merge requests with different write hints. */
814+
if (req->write_hint != next->write_hint)
815+
return NULL;
816+
813817
if (req->ioprio != next->ioprio)
814818
return NULL;
815819

@@ -937,6 +941,10 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
937941
if (!bio_crypt_rq_ctx_compatible(rq, bio))
938942
return false;
939943

944+
/* Don't merge requests with different write hints. */
945+
if (rq->write_hint != bio->bi_write_hint)
946+
return false;
947+
940948
if (rq->ioprio != bio_prio(bio))
941949
return false;
942950

block/blk-mq.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2585,6 +2585,7 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio,
25852585
rq->cmd_flags |= REQ_FAILFAST_MASK;
25862586

25872587
rq->__sector = bio->bi_iter.bi_sector;
2588+
rq->write_hint = bio->bi_write_hint;
25882589
blk_rq_bio_prep(rq, bio, nr_segs);
25892590

25902591
/* This can't fail, since GFP_NOIO includes __GFP_DIRECT_RECLAIM. */
@@ -3185,6 +3186,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
31853186
}
31863187
rq->nr_phys_segments = rq_src->nr_phys_segments;
31873188
rq->ioprio = rq_src->ioprio;
3189+
rq->write_hint = rq_src->write_hint;
31883190

31893191
if (rq->bio && blk_crypto_rq_bio_prep(rq, rq->bio, gfp_mask) < 0)
31903192
goto free_and_out;

block/bounce.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ static struct bio *bounce_clone_bio(struct bio *bio_src)
169169
if (bio_flagged(bio_src, BIO_REMAPPED))
170170
bio_set_flag(bio, BIO_REMAPPED);
171171
bio->bi_ioprio = bio_src->bi_ioprio;
172+
bio->bi_write_hint = bio_src->bi_write_hint;
172173
bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector;
173174
bio->bi_iter.bi_size = bio_src->bi_iter.bi_size;
174175

block/fops.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
7373
bio_init(&bio, bdev, vecs, nr_pages, dio_bio_write_op(iocb));
7474
}
7575
bio.bi_iter.bi_sector = pos >> SECTOR_SHIFT;
76+
bio.bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
7677
bio.bi_ioprio = iocb->ki_ioprio;
7778

7879
ret = bio_iov_iter_get_pages(&bio, iter);
@@ -203,6 +204,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
203204

204205
for (;;) {
205206
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
207+
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
206208
bio->bi_private = dio;
207209
bio->bi_end_io = blkdev_bio_end_io;
208210
bio->bi_ioprio = iocb->ki_ioprio;
@@ -321,6 +323,7 @@ static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb,
321323
dio->flags = 0;
322324
dio->iocb = iocb;
323325
bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT;
326+
bio->bi_write_hint = file_inode(iocb->ki_filp)->i_write_hint;
324327
bio->bi_end_io = blkdev_bio_end_io_async;
325328
bio->bi_ioprio = iocb->ki_ioprio;
326329

fs/buffer.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@
5555

5656
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
5757
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
58-
struct writeback_control *wbc);
58+
enum rw_hint hint, struct writeback_control *wbc);
5959

6060
#define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers)
6161

@@ -1889,7 +1889,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
18891889
do {
18901890
struct buffer_head *next = bh->b_this_page;
18911891
if (buffer_async_write(bh)) {
1892-
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1892+
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
1893+
inode->i_write_hint, wbc);
18931894
nr_underway++;
18941895
}
18951896
bh = next;
@@ -1944,7 +1945,8 @@ int __block_write_full_folio(struct inode *inode, struct folio *folio,
19441945
struct buffer_head *next = bh->b_this_page;
19451946
if (buffer_async_write(bh)) {
19461947
clear_buffer_dirty(bh);
1947-
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc);
1948+
submit_bh_wbc(REQ_OP_WRITE | write_flags, bh,
1949+
inode->i_write_hint, wbc);
19481950
nr_underway++;
19491951
}
19501952
bh = next;
@@ -2756,6 +2758,7 @@ static void end_bio_bh_io_sync(struct bio *bio)
27562758
}
27572759

27582760
static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
2761+
enum rw_hint write_hint,
27592762
struct writeback_control *wbc)
27602763
{
27612764
const enum req_op op = opf & REQ_OP_MASK;
@@ -2783,6 +2786,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
27832786
fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
27842787

27852788
bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
2789+
bio->bi_write_hint = write_hint;
27862790

27872791
__bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
27882792

@@ -2802,7 +2806,7 @@ static void submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh,
28022806

28032807
void submit_bh(blk_opf_t opf, struct buffer_head *bh)
28042808
{
2805-
submit_bh_wbc(opf, bh, NULL);
2809+
submit_bh_wbc(opf, bh, WRITE_LIFE_NOT_SET, NULL);
28062810
}
28072811
EXPORT_SYMBOL(submit_bh);
28082812

fs/direct-io.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,8 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
410410
bio->bi_end_io = dio_bio_end_io;
411411
if (dio->is_pinned)
412412
bio_set_flag(bio, BIO_PAGE_PINNED);
413+
bio->bi_write_hint = file_inode(dio->iocb->ki_filp)->i_write_hint;
414+
413415
sdio->bio = bio;
414416
sdio->logical_offset_in_bio = sdio->cur_page_fs_offset;
415417
}

fs/f2fs/f2fs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <linux/blkdev.h>
2525
#include <linux/quotaops.h>
2626
#include <linux/part_stat.h>
27+
#include <linux/rw_hint.h>
2728
#include <crypto/hash.h>
2829

2930
#include <linux/fscrypt.h>

fs/fcntl.c

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <linux/memfd.h>
2828
#include <linux/compat.h>
2929
#include <linux/mount.h>
30+
#include <linux/rw_hint.h>
3031

3132
#include <linux/poll.h>
3233
#include <asm/siginfo.h>
@@ -268,8 +269,15 @@ static int f_getowner_uids(struct file *filp, unsigned long arg)
268269
}
269270
#endif
270271

271-
static bool rw_hint_valid(enum rw_hint hint)
272+
static bool rw_hint_valid(u64 hint)
272273
{
274+
BUILD_BUG_ON(WRITE_LIFE_NOT_SET != RWH_WRITE_LIFE_NOT_SET);
275+
BUILD_BUG_ON(WRITE_LIFE_NONE != RWH_WRITE_LIFE_NONE);
276+
BUILD_BUG_ON(WRITE_LIFE_SHORT != RWH_WRITE_LIFE_SHORT);
277+
BUILD_BUG_ON(WRITE_LIFE_MEDIUM != RWH_WRITE_LIFE_MEDIUM);
278+
BUILD_BUG_ON(WRITE_LIFE_LONG != RWH_WRITE_LIFE_LONG);
279+
BUILD_BUG_ON(WRITE_LIFE_EXTREME != RWH_WRITE_LIFE_EXTREME);
280+
273281
switch (hint) {
274282
case RWH_WRITE_LIFE_NOT_SET:
275283
case RWH_WRITE_LIFE_NONE:
@@ -283,34 +291,40 @@ static bool rw_hint_valid(enum rw_hint hint)
283291
}
284292
}
285293

286-
static long fcntl_rw_hint(struct file *file, unsigned int cmd,
287-
unsigned long arg)
294+
static long fcntl_get_rw_hint(struct file *file, unsigned int cmd,
295+
unsigned long arg)
288296
{
289297
struct inode *inode = file_inode(file);
290298
u64 __user *argp = (u64 __user *)arg;
291-
enum rw_hint hint;
292-
u64 h;
299+
u64 hint = READ_ONCE(inode->i_write_hint);
293300

294-
switch (cmd) {
295-
case F_GET_RW_HINT:
296-
h = inode->i_write_hint;
297-
if (copy_to_user(argp, &h, sizeof(*argp)))
298-
return -EFAULT;
299-
return 0;
300-
case F_SET_RW_HINT:
301-
if (copy_from_user(&h, argp, sizeof(h)))
302-
return -EFAULT;
303-
hint = (enum rw_hint) h;
304-
if (!rw_hint_valid(hint))
305-
return -EINVAL;
301+
if (copy_to_user(argp, &hint, sizeof(*argp)))
302+
return -EFAULT;
303+
return 0;
304+
}
306305

307-
inode_lock(inode);
308-
inode->i_write_hint = hint;
309-
inode_unlock(inode);
310-
return 0;
311-
default:
306+
static long fcntl_set_rw_hint(struct file *file, unsigned int cmd,
307+
unsigned long arg)
308+
{
309+
struct inode *inode = file_inode(file);
310+
u64 __user *argp = (u64 __user *)arg;
311+
u64 hint;
312+
313+
if (copy_from_user(&hint, argp, sizeof(hint)))
314+
return -EFAULT;
315+
if (!rw_hint_valid(hint))
312316
return -EINVAL;
313-
}
317+
318+
WRITE_ONCE(inode->i_write_hint, hint);
319+
320+
/*
321+
* file->f_mapping->host may differ from inode. As an example,
322+
* blkdev_open() modifies file->f_mapping.
323+
*/
324+
if (file->f_mapping->host != inode)
325+
WRITE_ONCE(file->f_mapping->host->i_write_hint, hint);
326+
327+
return 0;
314328
}
315329

316330
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
@@ -416,8 +430,10 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
416430
err = memfd_fcntl(filp, cmd, argi);
417431
break;
418432
case F_GET_RW_HINT:
433+
err = fcntl_get_rw_hint(filp, cmd, arg);
434+
break;
419435
case F_SET_RW_HINT:
420-
err = fcntl_rw_hint(filp, cmd, arg);
436+
err = fcntl_set_rw_hint(filp, cmd, arg);
421437
break;
422438
default:
423439
break;

0 commit comments

Comments
 (0)