Skip to content

Commit 8098514

Browse files
keithbuschaxboe
authored andcommitted
block: always allocate integrity buffer when required
Many nvme metadata formats can not strip or generate the metadata on the controller side. For these formats, a host provided integrity buffer is mandatory even if it isn't checked. The block integrity read_verify and write_generate attributes prevent allocating the metadata buffer, but we need it when the format requires it, otherwise reads and writes will be rejected by the driver with IO errors. Assume the integrity buffer can be offloaded to the controller if the metadata size is the same as the protection information size. Otherwise provide an unchecked host buffer when the read verify or write generation attributes are disabled. This fixes the following nvme warning: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 371 at drivers/nvme/host/core.c:1036 nvme_setup_rw+0x122/0x210 ... RIP: 0010:nvme_setup_rw+0x122/0x210 ... Call Trace: <TASK> nvme_setup_cmd+0x1b4/0x280 nvme_queue_rqs+0xc4/0x1f0 [nvme] blk_mq_dispatch_queue_requests+0x24a/0x430 blk_mq_flush_plug_list+0x50/0x140 __blk_flush_plug+0xc1/0x100 __submit_bio+0x1c1/0x360 ? submit_bio_noacct_nocheck+0x2d6/0x3c0 submit_bio_noacct_nocheck+0x2d6/0x3c0 ? submit_bio_noacct+0x47/0x4c0 submit_bio_wait+0x48/0xa0 __blkdev_direct_IO_simple+0xee/0x210 ? current_time+0x1d/0x100 ? current_time+0x1d/0x100 ? __bio_clone+0xb0/0xb0 blkdev_read_iter+0xbb/0x140 vfs_read+0x239/0x310 ksys_read+0x58/0xc0 do_syscall_64+0x6c/0x180 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Signed-off-by: Keith Busch <kbusch@kernel.org> Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20250509153802.3482493-1-kbusch@meta.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
1 parent dd90905 commit 8098514

File tree

1 file changed

+47
-15
lines changed

1 file changed

+47
-15
lines changed

block/bio-integrity-auto.c

Lines changed: 47 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
* not aware of PI.
1010
*/
1111
#include <linux/blk-integrity.h>
12+
#include <linux/t10-pi.h>
1213
#include <linux/workqueue.h>
1314
#include "blk.h"
1415

@@ -43,6 +44,29 @@ static void bio_integrity_verify_fn(struct work_struct *work)
4344
bio_endio(bio);
4445
}
4546

47+
#define BIP_CHECK_FLAGS (BIP_CHECK_GUARD | BIP_CHECK_REFTAG | BIP_CHECK_APPTAG)
48+
static bool bip_should_check(struct bio_integrity_payload *bip)
49+
{
50+
return bip->bip_flags & BIP_CHECK_FLAGS;
51+
}
52+
53+
static bool bi_offload_capable(struct blk_integrity *bi)
54+
{
55+
switch (bi->csum_type) {
56+
case BLK_INTEGRITY_CSUM_CRC64:
57+
return bi->tuple_size == sizeof(struct crc64_pi_tuple);
58+
case BLK_INTEGRITY_CSUM_CRC:
59+
case BLK_INTEGRITY_CSUM_IP:
60+
return bi->tuple_size == sizeof(struct t10_pi_tuple);
61+
default:
62+
pr_warn_once("%s: unknown integrity checksum type:%d\n",
63+
__func__, bi->csum_type);
64+
fallthrough;
65+
case BLK_INTEGRITY_CSUM_NONE:
66+
return false;
67+
}
68+
}
69+
4670
/**
4771
* __bio_integrity_endio - Integrity I/O completion function
4872
* @bio: Protected bio
@@ -54,12 +78,12 @@ static void bio_integrity_verify_fn(struct work_struct *work)
5478
*/
5579
bool __bio_integrity_endio(struct bio *bio)
5680
{
57-
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
5881
struct bio_integrity_payload *bip = bio_integrity(bio);
5982
struct bio_integrity_data *bid =
6083
container_of(bip, struct bio_integrity_data, bip);
6184

62-
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status && bi->csum_type) {
85+
if (bio_op(bio) == REQ_OP_READ && !bio->bi_status &&
86+
bip_should_check(bip)) {
6387
INIT_WORK(&bid->work, bio_integrity_verify_fn);
6488
queue_work(kintegrityd_wq, &bid->work);
6589
return false;
@@ -84,6 +108,7 @@ bool bio_integrity_prep(struct bio *bio)
84108
{
85109
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
86110
struct bio_integrity_data *bid;
111+
bool set_flags = true;
87112
gfp_t gfp = GFP_NOIO;
88113
unsigned int len;
89114
void *buf;
@@ -100,19 +125,24 @@ bool bio_integrity_prep(struct bio *bio)
100125

101126
switch (bio_op(bio)) {
102127
case REQ_OP_READ:
103-
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
104-
return true;
128+
if (bi->flags & BLK_INTEGRITY_NOVERIFY) {
129+
if (bi_offload_capable(bi))
130+
return true;
131+
set_flags = false;
132+
}
105133
break;
106134
case REQ_OP_WRITE:
107-
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
108-
return true;
109-
110135
/*
111136
* Zero the memory allocated to not leak uninitialized kernel
112137
* memory to disk for non-integrity metadata where nothing else
113138
* initializes the memory.
114139
*/
115-
if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
140+
if (bi->flags & BLK_INTEGRITY_NOGENERATE) {
141+
if (bi_offload_capable(bi))
142+
return true;
143+
set_flags = false;
144+
gfp |= __GFP_ZERO;
145+
} else if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
116146
gfp |= __GFP_ZERO;
117147
break;
118148
default:
@@ -137,19 +167,21 @@ bool bio_integrity_prep(struct bio *bio)
137167
bid->bip.bip_flags |= BIP_BLOCK_INTEGRITY;
138168
bip_set_seed(&bid->bip, bio->bi_iter.bi_sector);
139169

140-
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
141-
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
142-
if (bi->csum_type)
143-
bid->bip.bip_flags |= BIP_CHECK_GUARD;
144-
if (bi->flags & BLK_INTEGRITY_REF_TAG)
145-
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
170+
if (set_flags) {
171+
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
172+
bid->bip.bip_flags |= BIP_IP_CHECKSUM;
173+
if (bi->csum_type)
174+
bid->bip.bip_flags |= BIP_CHECK_GUARD;
175+
if (bi->flags & BLK_INTEGRITY_REF_TAG)
176+
bid->bip.bip_flags |= BIP_CHECK_REFTAG;
177+
}
146178

147179
if (bio_integrity_add_page(bio, virt_to_page(buf), len,
148180
offset_in_page(buf)) < len)
149181
goto err_end_io;
150182

151183
/* Auto-generate integrity metadata if this is a write */
152-
if (bio_data_dir(bio) == WRITE)
184+
if (bio_data_dir(bio) == WRITE && bip_should_check(&bid->bip))
153185
blk_integrity_generate(bio);
154186
else
155187
bid->saved_bio_iter = bio->bi_iter;

0 commit comments

Comments
 (0)