Skip to content

Commit 7deea56

Browse files
committed
Merge tag 'block-6.15-20250424' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fix autoloading of drivers from stat*(2) - Fix losing read-ahead setting one suspend/resume, when a device is re-probed. - Fix race between setting the block size and page cache updates. Includes a helper that a coming XFS fix will use as well. - ublk cancelation fixes. - ublk selftest additions and fixes. - NVMe pull via Christoph: - fix an out-of-bounds access in nvmet_enable_port (Richard Weinberger) * tag 'block-6.15-20250424' of git://git.kernel.dk/linux: ublk: fix race between io_uring_cmd_complete_in_task and ublk_cancel_cmd ublk: call ublk_dispatch_req() for handling UBLK_U_IO_NEED_GET_DATA block: don't autoload drivers on blk-cgroup configuration block: don't autoload drivers on stat block: remove the backing_inode variable in bdev_statx block: move blkdev_{get,put} _no_open prototypes out of blkdev.h block: never reduce ra_pages in blk_apply_bdi_limits selftests: ublk: common: fix _get_disk_dev_t for pre-9.0 coreutils selftests: ublk: remove useless 'delay_us' from 'struct dev_ctx' selftests: ublk: fix recover test block: hoist block size validation code to a separate function block: fix race between set_blocksize and read paths nvmet: fix out-of-bounds access in nvmet_enable_port
2 parents 0537fbb + f40139f commit 7deea56

File tree

14 files changed

+121
-47
lines changed

14 files changed

+121
-47
lines changed

block/bdev.c

Lines changed: 51 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev)
152152
get_order(bsize));
153153
}
154154

155-
int set_blocksize(struct file *file, int size)
155+
/**
156+
* bdev_validate_blocksize - check that this block size is acceptable
157+
* @bdev: blockdevice to check
158+
* @block_size: block size to check
159+
*
160+
* For block device users that do not use buffer heads or the block device
161+
* page cache, make sure that this block size can be used with the device.
162+
*
163+
* Return: On success zero is returned, negative error code on failure.
164+
*/
165+
int bdev_validate_blocksize(struct block_device *bdev, int block_size)
156166
{
157-
struct inode *inode = file->f_mapping->host;
158-
struct block_device *bdev = I_BDEV(inode);
159-
160-
if (blk_validate_block_size(size))
167+
if (blk_validate_block_size(block_size))
161168
return -EINVAL;
162169

163170
/* Size cannot be smaller than the size supported by the device */
164-
if (size < bdev_logical_block_size(bdev))
171+
if (block_size < bdev_logical_block_size(bdev))
165172
return -EINVAL;
166173

174+
return 0;
175+
}
176+
EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
177+
178+
int set_blocksize(struct file *file, int size)
179+
{
180+
struct inode *inode = file->f_mapping->host;
181+
struct block_device *bdev = I_BDEV(inode);
182+
int ret;
183+
184+
ret = bdev_validate_blocksize(bdev, size);
185+
if (ret)
186+
return ret;
187+
167188
if (!file->private_data)
168189
return -EINVAL;
169190

170191
/* Don't change the size if it is same as current */
171192
if (inode->i_blkbits != blksize_bits(size)) {
193+
/*
194+
* Flush and truncate the pagecache before we reconfigure the
195+
* mapping geometry because folio sizes are variable now. If a
196+
* reader has already allocated a folio whose size is smaller
197+
* than the new min_order but invokes readahead after the new
198+
* min_order becomes visible, readahead will think there are
199+
* "zero" blocks per folio and crash. Take the inode and
200+
* invalidation locks to avoid racing with
201+
* read/write/fallocate.
202+
*/
203+
inode_lock(inode);
204+
filemap_invalidate_lock(inode->i_mapping);
205+
172206
sync_blockdev(bdev);
207+
kill_bdev(bdev);
208+
173209
inode->i_blkbits = blksize_bits(size);
174210
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
175211
kill_bdev(bdev);
212+
filemap_invalidate_unlock(inode->i_mapping);
213+
inode_unlock(inode);
176214
}
177215
return 0;
178216
}
@@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part)
777815
blkdev_put_whole(whole);
778816
}
779817

780-
struct block_device *blkdev_get_no_open(dev_t dev)
818+
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
781819
{
782820
struct block_device *bdev;
783821
struct inode *inode;
784822

785823
inode = ilookup(blockdev_superblock, dev);
786-
if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
824+
if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
787825
blk_request_module(dev);
788826
inode = ilookup(blockdev_superblock, dev);
789827
if (inode)
@@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
10051043
if (ret)
10061044
return ERR_PTR(ret);
10071045

1008-
bdev = blkdev_get_no_open(dev);
1046+
bdev = blkdev_get_no_open(dev, true);
10091047
if (!bdev)
10101048
return ERR_PTR(-ENXIO);
10111049

@@ -1274,18 +1312,15 @@ void sync_bdevs(bool wait)
12741312
*/
12751313
void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
12761314
{
1277-
struct inode *backing_inode;
12781315
struct block_device *bdev;
12791316

1280-
backing_inode = d_backing_inode(path->dentry);
1281-
12821317
/*
1283-
* Note that backing_inode is the inode of a block device node file,
1284-
* not the block device's internal inode. Therefore it is *not* valid
1285-
* to use I_BDEV() here; the block device has to be looked up by i_rdev
1318+
* Note that d_backing_inode() returns the block device node inode, not
1319+
* the block device's internal inode. Therefore it is *not* valid to
1320+
* use I_BDEV() here; the block device has to be looked up by i_rdev
12861321
* instead.
12871322
*/
1288-
bdev = blkdev_get_no_open(backing_inode->i_rdev);
1323+
bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
12891324
if (!bdev)
12901325
return;
12911326

block/blk-cgroup.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
797797
return -EINVAL;
798798
input = skip_spaces(input);
799799

800-
bdev = blkdev_get_no_open(MKDEV(major, minor));
800+
bdev = blkdev_get_no_open(MKDEV(major, minor), false);
801801
if (!bdev)
802802
return -ENODEV;
803803
if (bdev_is_partition(bdev)) {

block/blk-settings.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
6161
/*
6262
* For read-ahead of large files to be effective, we need to read ahead
6363
* at least twice the optimal I/O size.
64+
*
65+
* There is no hardware limitation for the read-ahead size and the user
66+
* might have increased the read-ahead size through sysfs, so don't ever
67+
* decrease it.
6468
*/
65-
bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
69+
bdi->ra_pages = max3(bdi->ra_pages,
70+
lim->io_opt * 2 / PAGE_SIZE,
71+
VM_READAHEAD_PAGES);
6672
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
6773
}
6874

block/blk-zoned.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
343343
op = REQ_OP_ZONE_RESET;
344344

345345
/* Invalidate the page cache, including dirty pages. */
346+
inode_lock(bdev->bd_mapping->host);
346347
filemap_invalidate_lock(bdev->bd_mapping);
347348
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
348349
if (ret)
@@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
364365
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
365366

366367
fail:
367-
if (cmd == BLKRESETZONE)
368+
if (cmd == BLKRESETZONE) {
368369
filemap_invalidate_unlock(bdev->bd_mapping);
370+
inode_unlock(bdev->bd_mapping->host);
371+
}
369372

370373
return ret;
371374
}

block/blk.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
9494
wait_for_completion_io(done);
9595
}
9696

97+
struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
98+
void blkdev_put_no_open(struct block_device *bdev);
99+
97100
#define BIO_INLINE_VECS 4
98101
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
99102
gfp_t gfp_mask);

block/fops.c

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
642642
if (ret)
643643
return ret;
644644

645-
bdev = blkdev_get_no_open(inode->i_rdev);
645+
bdev = blkdev_get_no_open(inode->i_rdev, true);
646646
if (!bdev)
647647
return -ENXIO;
648648

@@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
746746
ret = direct_write_fallback(iocb, from, ret,
747747
blkdev_buffered_write(iocb, from));
748748
} else {
749+
/*
750+
* Take i_rwsem and invalidate_lock to avoid racing with
751+
* set_blocksize changing i_blkbits/folio order and punching
752+
* out the pagecache.
753+
*/
754+
inode_lock_shared(bd_inode);
749755
ret = blkdev_buffered_write(iocb, from);
756+
inode_unlock_shared(bd_inode);
750757
}
751758

752759
if (ret > 0)
@@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
757764

758765
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
759766
{
767+
struct inode *bd_inode = bdev_file_inode(iocb->ki_filp);
760768
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
761769
loff_t size = bdev_nr_bytes(bdev);
762770
loff_t pos = iocb->ki_pos;
@@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
793801
goto reexpand;
794802
}
795803

804+
/*
805+
* Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
806+
* changing i_blkbits/folio order and punching out the pagecache.
807+
*/
808+
inode_lock_shared(bd_inode);
796809
ret = filemap_read(iocb, to, ret);
810+
inode_unlock_shared(bd_inode);
797811

798812
reexpand:
799813
if (unlikely(shorted))
@@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
836850
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
837851
return -EINVAL;
838852

853+
inode_lock(inode);
839854
filemap_invalidate_lock(inode->i_mapping);
840855

841856
/*
@@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
868883

869884
fail:
870885
filemap_invalidate_unlock(inode->i_mapping);
886+
inode_unlock(inode);
871887
return error;
872888
}
873889

block/ioctl.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
142142
if (err)
143143
return err;
144144

145+
inode_lock(bdev->bd_mapping->host);
145146
filemap_invalidate_lock(bdev->bd_mapping);
146147
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
147148
if (err)
@@ -174,6 +175,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
174175
blk_finish_plug(&plug);
175176
fail:
176177
filemap_invalidate_unlock(bdev->bd_mapping);
178+
inode_unlock(bdev->bd_mapping->host);
177179
return err;
178180
}
179181

@@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
199201
end > bdev_nr_bytes(bdev))
200202
return -EINVAL;
201203

204+
inode_lock(bdev->bd_mapping->host);
202205
filemap_invalidate_lock(bdev->bd_mapping);
203206
err = truncate_bdev_range(bdev, mode, start, end - 1);
204207
if (!err)
205208
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
206209
GFP_KERNEL);
207210
filemap_invalidate_unlock(bdev->bd_mapping);
211+
inode_unlock(bdev->bd_mapping->host);
208212
return err;
209213
}
210214

@@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
236240
return -EINVAL;
237241

238242
/* Invalidate the page cache, including dirty pages */
243+
inode_lock(bdev->bd_mapping->host);
239244
filemap_invalidate_lock(bdev->bd_mapping);
240245
err = truncate_bdev_range(bdev, mode, start, end);
241246
if (err)
@@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
246251

247252
fail:
248253
filemap_invalidate_unlock(bdev->bd_mapping);
254+
inode_unlock(bdev->bd_mapping->host);
249255
return err;
250256
}
251257

drivers/block/ublk_drv.c

Lines changed: 24 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1683,14 +1683,31 @@ static void ublk_start_cancel(struct ublk_queue *ubq)
16831683
ublk_put_disk(disk);
16841684
}
16851685

1686-
static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io,
1686+
static void ublk_cancel_cmd(struct ublk_queue *ubq, unsigned tag,
16871687
unsigned int issue_flags)
16881688
{
1689+
struct ublk_io *io = &ubq->ios[tag];
1690+
struct ublk_device *ub = ubq->dev;
1691+
struct request *req;
16891692
bool done;
16901693

16911694
if (!(io->flags & UBLK_IO_FLAG_ACTIVE))
16921695
return;
16931696

1697+
/*
1698+
* Don't try to cancel this command if the request is started for
1699+
* avoiding race between io_uring_cmd_done() and
1700+
* io_uring_cmd_complete_in_task().
1701+
*
1702+
* Either the started request will be aborted via __ublk_abort_rq(),
1703+
* then this uring_cmd is canceled next time, or it will be done in
1704+
* task work function ublk_dispatch_req() because io_uring guarantees
1705+
* that ublk_dispatch_req() is always called
1706+
*/
1707+
req = blk_mq_tag_to_rq(ub->tag_set.tags[ubq->q_id], tag);
1708+
if (req && blk_mq_request_started(req))
1709+
return;
1710+
16941711
spin_lock(&ubq->cancel_lock);
16951712
done = !!(io->flags & UBLK_IO_FLAG_CANCELED);
16961713
if (!done)
@@ -1722,7 +1739,6 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
17221739
struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
17231740
struct ublk_queue *ubq = pdu->ubq;
17241741
struct task_struct *task;
1725-
struct ublk_io *io;
17261742

17271743
if (WARN_ON_ONCE(!ubq))
17281744
return;
@@ -1737,9 +1753,8 @@ static void ublk_uring_cmd_cancel_fn(struct io_uring_cmd *cmd,
17371753
if (!ubq->canceling)
17381754
ublk_start_cancel(ubq);
17391755

1740-
io = &ubq->ios[pdu->tag];
1741-
WARN_ON_ONCE(io->cmd != cmd);
1742-
ublk_cancel_cmd(ubq, io, issue_flags);
1756+
WARN_ON_ONCE(ubq->ios[pdu->tag].cmd != cmd);
1757+
ublk_cancel_cmd(ubq, pdu->tag, issue_flags);
17431758
}
17441759

17451760
static inline bool ublk_queue_ready(struct ublk_queue *ubq)
@@ -1752,7 +1767,7 @@ static void ublk_cancel_queue(struct ublk_queue *ubq)
17521767
int i;
17531768

17541769
for (i = 0; i < ubq->q_depth; i++)
1755-
ublk_cancel_cmd(ubq, &ubq->ios[i], IO_URING_F_UNLOCKED);
1770+
ublk_cancel_cmd(ubq, i, IO_URING_F_UNLOCKED);
17561771
}
17571772

17581773
/* Cancel all pending commands, must be called after del_gendisk() returns */
@@ -1886,15 +1901,6 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
18861901
}
18871902
}
18881903

1889-
static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
1890-
int tag)
1891-
{
1892-
struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
1893-
struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
1894-
1895-
ublk_queue_cmd(ubq, req);
1896-
}
1897-
18981904
static inline int ublk_check_cmd_op(u32 cmd_op)
18991905
{
19001906
u32 ioc_type = _IOC_TYPE(cmd_op);
@@ -2103,8 +2109,9 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
21032109
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
21042110
goto out;
21052111
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
2106-
ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag);
2107-
break;
2112+
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
2113+
ublk_dispatch_req(ubq, req, issue_flags);
2114+
return -EIOCBQUEUED;
21082115
default:
21092116
goto out;
21102117
}

drivers/nvme/target/core.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,9 @@ int nvmet_enable_port(struct nvmet_port *port)
324324

325325
lockdep_assert_held(&nvmet_config_sem);
326326

327+
if (port->disc_addr.trtype == NVMF_TRTYPE_MAX)
328+
return -EINVAL;
329+
327330
ops = nvmet_transports[port->disc_addr.trtype];
328331
if (!ops) {
329332
up_write(&nvmet_config_sem);

0 commit comments

Comments
 (0)