Skip to content

Commit a97ccaa

Browse files
Merge patch series "Improve checks in blk_revalidate_disk_zones()"
Damien Le Moal <dlemoal@kernel.org> says: blk_revalidate_disk_zones() implements checks of the zones of a zoned block device, verifying that the zone size is a power of 2 number of sectors, that all zones (except possibly the last one) have the same size and that zones cover the entire addressing space of the device. While these checks are appropriate to verify that well tested hardware devices have an adequate zone configurations, they lack in certain areas which may result in issues with potentially buggy emulated devices implemented with user drivers such as ublk or tcmu. Specifically, this function does not check if the device driver indicated support for the mandatory zone append writes, that is, if the device max_zone_append_sectors queue limit is set to a non-zero value. Additionally, invalid zones such as a zero length zone with a start sector equal to the device capacity will not be detected and result in out of bounds use of the zone bitmaps prepared with the callback function blk_revalidate_zone_cb(). This series address these issues by modifying the 4 block device drivers that currently support zoned block devices to ensure that they all set a zoned device zone size and max zone append sectors limit before executing blk_revalidate_disk_zones(). With these changes in place, patch 5 improves blk_revalidate_disk_zones() to address the missing checks, relying on the fact that the zone size and zone append limit are normally set when this function is called. Link: https://lore.kernel.org/r/20230703024812.76778-1-dlemoal@kernel.org Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
2 parents 24befa9 + 03e51c4 commit a97ccaa

File tree

5 files changed

+79
-78
lines changed

5 files changed

+79
-78
lines changed

block/blk-zoned.c

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,6 @@ struct blk_revalidate_zone_args {
448448
unsigned long *conv_zones_bitmap;
449449
unsigned long *seq_zones_wlock;
450450
unsigned int nr_zones;
451-
sector_t zone_sectors;
452451
sector_t sector;
453452
};
454453

@@ -462,38 +461,34 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
462461
struct gendisk *disk = args->disk;
463462
struct request_queue *q = disk->queue;
464463
sector_t capacity = get_capacity(disk);
464+
sector_t zone_sectors = q->limits.chunk_sectors;
465+
466+
/* Check for bad zones and holes in the zone report */
467+
if (zone->start != args->sector) {
468+
pr_warn("%s: Zone gap at sectors %llu..%llu\n",
469+
disk->disk_name, args->sector, zone->start);
470+
return -ENODEV;
471+
}
472+
473+
if (zone->start >= capacity || !zone->len) {
474+
pr_warn("%s: Invalid zone start %llu, length %llu\n",
475+
disk->disk_name, zone->start, zone->len);
476+
return -ENODEV;
477+
}
465478

466479
/*
467480
* All zones must have the same size, with the exception on an eventual
468481
* smaller last zone.
469482
*/
470-
if (zone->start == 0) {
471-
if (zone->len == 0 || !is_power_of_2(zone->len)) {
472-
pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
473-
disk->disk_name, zone->len);
474-
return -ENODEV;
475-
}
476-
477-
args->zone_sectors = zone->len;
478-
args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
479-
} else if (zone->start + args->zone_sectors < capacity) {
480-
if (zone->len != args->zone_sectors) {
483+
if (zone->start + zone->len < capacity) {
484+
if (zone->len != zone_sectors) {
481485
pr_warn("%s: Invalid zoned device with non constant zone size\n",
482486
disk->disk_name);
483487
return -ENODEV;
484488
}
485-
} else {
486-
if (zone->len > args->zone_sectors) {
487-
pr_warn("%s: Invalid zoned device with larger last zone size\n",
488-
disk->disk_name);
489-
return -ENODEV;
490-
}
491-
}
492-
493-
/* Check for holes in the zone report */
494-
if (zone->start != args->sector) {
495-
pr_warn("%s: Zone gap at sectors %llu..%llu\n",
496-
disk->disk_name, args->sector, zone->start);
489+
} else if (zone->len > zone_sectors) {
490+
pr_warn("%s: Invalid zoned device with larger last zone size\n",
491+
disk->disk_name);
497492
return -ENODEV;
498493
}
499494

@@ -532,11 +527,13 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
532527
* @disk: Target disk
533528
* @update_driver_data: Callback to update driver data on the frozen disk
534529
*
535-
* Helper function for low-level device drivers to (re) allocate and initialize
536-
* a disk request queue zone bitmaps. This functions should normally be called
537-
* within the disk ->revalidate method for blk-mq based drivers. For BIO based
538-
* drivers only q->nr_zones needs to be updated so that the sysfs exposed value
539-
* is correct.
530+
* Helper function for low-level device drivers to check and (re) allocate and
531+
* initialize a disk request queue zone bitmaps. This functions should normally
532+
* be called within the disk ->revalidate method for blk-mq based drivers.
533+
* Before calling this function, the device driver must already have set the
534+
* device zone size (chunk_sector limit) and the max zone append limit.
535+
* For BIO based drivers, this function cannot be used. BIO based device drivers
536+
* only need to set disk->nr_zones so that the sysfs exposed value is correct.
540537
* If the @update_driver_data callback function is not NULL, the callback is
541538
* executed with the device request queue frozen after all zones have been
542539
* checked.
@@ -545,9 +542,9 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
545542
void (*update_driver_data)(struct gendisk *disk))
546543
{
547544
struct request_queue *q = disk->queue;
548-
struct blk_revalidate_zone_args args = {
549-
.disk = disk,
550-
};
545+
sector_t zone_sectors = q->limits.chunk_sectors;
546+
sector_t capacity = get_capacity(disk);
547+
struct blk_revalidate_zone_args args = { };
551548
unsigned int noio_flag;
552549
int ret;
553550

@@ -556,13 +553,31 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
556553
if (WARN_ON_ONCE(!queue_is_mq(q)))
557554
return -EIO;
558555

559-
if (!get_capacity(disk))
560-
return -EIO;
556+
if (!capacity)
557+
return -ENODEV;
558+
559+
/*
560+
* Checks that the device driver indicated a valid zone size and that
561+
* the max zone append limit is set.
562+
*/
563+
if (!zone_sectors || !is_power_of_2(zone_sectors)) {
564+
pr_warn("%s: Invalid non power of two zone size (%llu)\n",
565+
disk->disk_name, zone_sectors);
566+
return -ENODEV;
567+
}
568+
569+
if (!q->limits.max_zone_append_sectors) {
570+
pr_warn("%s: Invalid 0 maximum zone append limit\n",
571+
disk->disk_name);
572+
return -ENODEV;
573+
}
561574

562575
/*
563576
* Ensure that all memory allocations in this context are done as if
564577
* GFP_NOIO was specified.
565578
*/
579+
args.disk = disk;
580+
args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
566581
noio_flag = memalloc_noio_save();
567582
ret = disk->fops->report_zones(disk, 0, UINT_MAX,
568583
blk_revalidate_zone_cb, &args);
@@ -576,7 +591,7 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
576591
* If zones where reported, make sure that the entire disk capacity
577592
* has been checked.
578593
*/
579-
if (ret > 0 && args.sector != get_capacity(disk)) {
594+
if (ret > 0 && args.sector != capacity) {
580595
pr_warn("%s: Missing zones from sector %llu\n",
581596
disk->disk_name, args.sector);
582597
ret = -ENODEV;
@@ -589,7 +604,6 @@ int blk_revalidate_disk_zones(struct gendisk *disk,
589604
*/
590605
blk_mq_freeze_queue(q);
591606
if (ret > 0) {
592-
blk_queue_chunk_sectors(q, args.zone_sectors);
593607
disk->nr_zones = args.nr_zones;
594608
swap(disk->seq_zones_wlock, args.seq_zones_wlock);
595609
swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);

drivers/block/null_blk/zoned.c

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -162,21 +162,15 @@ int null_register_zoned_dev(struct nullb *nullb)
162162
disk_set_zoned(nullb->disk, BLK_ZONED_HM);
163163
blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
164164
blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
165-
166-
if (queue_is_mq(q)) {
167-
int ret = blk_revalidate_disk_zones(nullb->disk, NULL);
168-
169-
if (ret)
170-
return ret;
171-
} else {
172-
blk_queue_chunk_sectors(q, dev->zone_size_sects);
173-
nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
174-
}
175-
165+
blk_queue_chunk_sectors(q, dev->zone_size_sects);
166+
nullb->disk->nr_zones = bdev_nr_zones(nullb->disk->part0);
176167
blk_queue_max_zone_append_sectors(q, dev->zone_size_sects);
177168
disk_set_max_open_zones(nullb->disk, dev->zone_max_open);
178169
disk_set_max_active_zones(nullb->disk, dev->zone_max_active);
179170

171+
if (queue_is_mq(q))
172+
return blk_revalidate_disk_zones(nullb->disk, NULL);
173+
180174
return 0;
181175
}
182176

drivers/block/virtio_blk.c

Lines changed: 15 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -781,7 +781,6 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
781781
{
782782
u32 v, wg;
783783
u8 model;
784-
int ret;
785784

786785
virtio_cread(vdev, struct virtio_blk_config,
787786
zoned.model, &model);
@@ -836,6 +835,7 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
836835
vblk->zone_sectors);
837836
return -ENODEV;
838837
}
838+
blk_queue_chunk_sectors(q, vblk->zone_sectors);
839839
dev_dbg(&vdev->dev, "zone sectors = %u\n", vblk->zone_sectors);
840840

841841
if (virtio_has_feature(vdev, VIRTIO_BLK_F_DISCARD)) {
@@ -844,26 +844,22 @@ static int virtblk_probe_zoned_device(struct virtio_device *vdev,
844844
blk_queue_max_discard_sectors(q, 0);
845845
}
846846

847-
ret = blk_revalidate_disk_zones(vblk->disk, NULL);
848-
if (!ret) {
849-
virtio_cread(vdev, struct virtio_blk_config,
850-
zoned.max_append_sectors, &v);
851-
if (!v) {
852-
dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
853-
return -ENODEV;
854-
}
855-
if ((v << SECTOR_SHIFT) < wg) {
856-
dev_err(&vdev->dev,
857-
"write granularity %u exceeds max_append_sectors %u limit\n",
858-
wg, v);
859-
return -ENODEV;
860-
}
861-
862-
blk_queue_max_zone_append_sectors(q, v);
863-
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
847+
virtio_cread(vdev, struct virtio_blk_config,
848+
zoned.max_append_sectors, &v);
849+
if (!v) {
850+
dev_warn(&vdev->dev, "zero max_append_sectors reported\n");
851+
return -ENODEV;
852+
}
853+
if ((v << SECTOR_SHIFT) < wg) {
854+
dev_err(&vdev->dev,
855+
"write granularity %u exceeds max_append_sectors %u limit\n",
856+
wg, v);
857+
return -ENODEV;
864858
}
859+
blk_queue_max_zone_append_sectors(q, v);
860+
dev_dbg(&vdev->dev, "max append sectors = %u\n", v);
865861

866-
return ret;
862+
return blk_revalidate_disk_zones(vblk->disk, NULL);
867863
}
868864

869865
#else

drivers/nvme/host/zns.c

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,11 @@
1010
int nvme_revalidate_zones(struct nvme_ns *ns)
1111
{
1212
struct request_queue *q = ns->queue;
13-
int ret;
1413

15-
ret = blk_revalidate_disk_zones(ns->disk, NULL);
16-
if (!ret)
17-
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
18-
return ret;
14+
blk_queue_chunk_sectors(q, ns->zsze);
15+
blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
16+
17+
return blk_revalidate_disk_zones(ns->disk, NULL);
1918
}
2019

2120
static int nvme_set_max_append(struct nvme_ctrl *ctrl)

drivers/scsi/sd_zbc.c

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
831831
struct request_queue *q = disk->queue;
832832
u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
833833
unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
834-
u32 max_append;
835834
int ret = 0;
836835
unsigned int flags;
837836

@@ -876,6 +875,11 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
876875
goto unlock;
877876
}
878877

878+
blk_queue_chunk_sectors(q,
879+
logical_to_sectors(sdkp->device, zone_blocks));
880+
blk_queue_max_zone_append_sectors(q,
881+
q->limits.max_segments << PAGE_SECTORS_SHIFT);
882+
879883
ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
880884

881885
memalloc_noio_restore(flags);
@@ -888,12 +892,6 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
888892
goto unlock;
889893
}
890894

891-
max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
892-
q->limits.max_segments << PAGE_SECTORS_SHIFT);
893-
max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
894-
895-
blk_queue_max_zone_append_sectors(q, max_append);
896-
897895
sd_zbc_print_zones(sdkp);
898896

899897
unlock:

0 commit comments

Comments
 (0)