Skip to content

Commit 5ced58b

Browse files
mikechristiemstsirkin
authored andcommitted
vhost-scsi: Fix alignment handling with windows
The linux block layer requires bios/requests to have lengths with a 512 byte alignment. Some drivers/layers like dm-crypt and the directi IO code will test for it and just fail. Other drivers like SCSI just assume the requirement is met and will end up in infinte retry loops. The problem for drivers like SCSI is that it uses functions like blk_rq_cur_sectors and blk_rq_sectors which divide the request's length by 512. If there's lefovers then it just gets dropped. But other code in the block/scsi layer may use blk_rq_bytes/blk_rq_cur_bytes and end up thinking there is still data left and try to retry the cmd. We can then end up getting stuck in retry loops where part of the block/scsi thinks there is data left, but other parts think we want to do IOs of zero length. Linux will always check for alignment, but windows will not. When vhost-scsi then translates the iovec it gets from a windows guest to a scatterlist, we can end up with sg items where the sg->length is not divisible by 512 due to the misaligned offset: sg[0].offset = 255; sg[0].length = 3841; sg... sg[N].offset = 0; sg[N].length = 255; When the lio backends then convert the SG to bios or other iovecs, we end up sending them with the same misaligned values and can hit the issues above. This just has us drop down to allocating a temp page and copying the data when we detect a misaligned buffer and the IO is large enough that it will get split into multiple bad IOs. Signed-off-by: Mike Christie <michael.christie@oracle.com> Message-Id: <20230709202859.138387-2-michael.christie@oracle.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Stefan Hajnoczi <stefanha@redhat.com>
1 parent 9ad1a29 commit 5ced58b

File tree

1 file changed

+161
-25
lines changed

1 file changed

+161
-25
lines changed

drivers/vhost/scsi.c

Lines changed: 161 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include <linux/fs.h>
2626
#include <linux/vmalloc.h>
2727
#include <linux/miscdevice.h>
28+
#include <linux/blk_types.h>
29+
#include <linux/bio.h>
2830
#include <asm/unaligned.h>
2931
#include <scsi/scsi_common.h>
3032
#include <scsi/scsi_proto.h>
@@ -75,6 +77,9 @@ struct vhost_scsi_cmd {
7577
u32 tvc_prot_sgl_count;
7678
/* Saved unpacked SCSI LUN for vhost_scsi_target_queue_cmd() */
7779
u32 tvc_lun;
80+
u32 copied_iov:1;
81+
const void *saved_iter_addr;
82+
struct iov_iter saved_iter;
7883
/* Pointer to the SGL formatted memory from virtio-scsi */
7984
struct scatterlist *tvc_sgl;
8085
struct scatterlist *tvc_prot_sgl;
@@ -328,8 +333,13 @@ static void vhost_scsi_release_cmd_res(struct se_cmd *se_cmd)
328333
int i;
329334

330335
if (tv_cmd->tvc_sgl_count) {
331-
for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
332-
put_page(sg_page(&tv_cmd->tvc_sgl[i]));
336+
for (i = 0; i < tv_cmd->tvc_sgl_count; i++) {
337+
if (tv_cmd->copied_iov)
338+
__free_page(sg_page(&tv_cmd->tvc_sgl[i]));
339+
else
340+
put_page(sg_page(&tv_cmd->tvc_sgl[i]));
341+
}
342+
kfree(tv_cmd->saved_iter_addr);
333343
}
334344
if (tv_cmd->tvc_prot_sgl_count) {
335345
for (i = 0; i < tv_cmd->tvc_prot_sgl_count; i++)
@@ -504,6 +514,28 @@ static void vhost_scsi_evt_work(struct vhost_work *work)
504514
mutex_unlock(&vq->mutex);
505515
}
506516

517+
static int vhost_scsi_copy_sgl_to_iov(struct vhost_scsi_cmd *cmd)
518+
{
519+
struct iov_iter *iter = &cmd->saved_iter;
520+
struct scatterlist *sg = cmd->tvc_sgl;
521+
struct page *page;
522+
size_t len;
523+
int i;
524+
525+
for (i = 0; i < cmd->tvc_sgl_count; i++) {
526+
page = sg_page(&sg[i]);
527+
len = sg[i].length;
528+
529+
if (copy_page_to_iter(page, 0, len, iter) != len) {
530+
pr_err("Could not copy data while handling misaligned cmd. Error %zu\n",
531+
len);
532+
return -1;
533+
}
534+
}
535+
536+
return 0;
537+
}
538+
507539
/* Fill in status and signal that we are done processing this command
508540
*
509541
* This is scheduled in the vhost work queue so we are called with the owner
@@ -527,15 +559,20 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
527559

528560
pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
529561
cmd, se_cmd->residual_count, se_cmd->scsi_status);
530-
531562
memset(&v_rsp, 0, sizeof(v_rsp));
532-
v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq, se_cmd->residual_count);
533-
/* TODO is status_qualifier field needed? */
534-
v_rsp.status = se_cmd->scsi_status;
535-
v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
536-
se_cmd->scsi_sense_length);
537-
memcpy(v_rsp.sense, cmd->tvc_sense_buf,
538-
se_cmd->scsi_sense_length);
563+
564+
if (cmd->saved_iter_addr && vhost_scsi_copy_sgl_to_iov(cmd)) {
565+
v_rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
566+
} else {
567+
v_rsp.resid = cpu_to_vhost32(cmd->tvc_vq,
568+
se_cmd->residual_count);
569+
/* TODO is status_qualifier field needed? */
570+
v_rsp.status = se_cmd->scsi_status;
571+
v_rsp.sense_len = cpu_to_vhost32(cmd->tvc_vq,
572+
se_cmd->scsi_sense_length);
573+
memcpy(v_rsp.sense, cmd->tvc_sense_buf,
574+
se_cmd->scsi_sense_length);
575+
}
539576

540577
iov_iter_init(&iov_iter, ITER_DEST, cmd->tvc_resp_iov,
541578
cmd->tvc_in_iovs, sizeof(v_rsp));
@@ -613,12 +650,12 @@ static int
613650
vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
614651
struct iov_iter *iter,
615652
struct scatterlist *sgl,
616-
bool write)
653+
bool is_prot)
617654
{
618655
struct page **pages = cmd->tvc_upages;
619656
struct scatterlist *sg = sgl;
620-
ssize_t bytes;
621-
size_t offset;
657+
ssize_t bytes, mapped_bytes;
658+
size_t offset, mapped_offset;
622659
unsigned int npages = 0;
623660

624661
bytes = iov_iter_get_pages2(iter, pages, LONG_MAX,
@@ -627,13 +664,53 @@ vhost_scsi_map_to_sgl(struct vhost_scsi_cmd *cmd,
627664
if (bytes <= 0)
628665
return bytes < 0 ? bytes : -EFAULT;
629666

667+
mapped_bytes = bytes;
668+
mapped_offset = offset;
669+
630670
while (bytes) {
631671
unsigned n = min_t(unsigned, PAGE_SIZE - offset, bytes);
672+
/*
673+
* The block layer requires bios/requests to be a multiple of
674+
* 512 bytes, but Windows can send us vecs that are misaligned.
675+
* This can result in bios and later requests with misaligned
676+
* sizes if we have to break up a cmd/scatterlist into multiple
677+
* bios.
678+
*
679+
* We currently only break up a command into multiple bios if
680+
* we hit the vec/seg limit, so check if our sgl_count is
681+
* greater than the max and if a vec in the cmd has a
682+
* misaligned offset/size.
683+
*/
684+
if (!is_prot &&
685+
(offset & (SECTOR_SIZE - 1) || n & (SECTOR_SIZE - 1)) &&
686+
cmd->tvc_sgl_count > BIO_MAX_VECS) {
687+
WARN_ONCE(true,
688+
"vhost-scsi detected misaligned IO. Performance may be degraded.");
689+
goto revert_iter_get_pages;
690+
}
691+
632692
sg_set_page(sg++, pages[npages++], n, offset);
633693
bytes -= n;
634694
offset = 0;
635695
}
696+
636697
return npages;
698+
699+
revert_iter_get_pages:
700+
iov_iter_revert(iter, mapped_bytes);
701+
702+
npages = 0;
703+
while (mapped_bytes) {
704+
unsigned int n = min_t(unsigned int, PAGE_SIZE - mapped_offset,
705+
mapped_bytes);
706+
707+
put_page(pages[npages++]);
708+
709+
mapped_bytes -= n;
710+
mapped_offset = 0;
711+
}
712+
713+
return -EINVAL;
637714
}
638715

639716
static int
@@ -657,25 +734,80 @@ vhost_scsi_calc_sgls(struct iov_iter *iter, size_t bytes, int max_sgls)
657734
}
658735

659736
static int
660-
vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, bool write,
661-
struct iov_iter *iter,
662-
struct scatterlist *sg, int sg_count)
737+
vhost_scsi_copy_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
738+
struct scatterlist *sg, int sg_count)
739+
{
740+
size_t len = iov_iter_count(iter);
741+
unsigned int nbytes = 0;
742+
struct page *page;
743+
int i;
744+
745+
if (cmd->tvc_data_direction == DMA_FROM_DEVICE) {
746+
cmd->saved_iter_addr = dup_iter(&cmd->saved_iter, iter,
747+
GFP_KERNEL);
748+
if (!cmd->saved_iter_addr)
749+
return -ENOMEM;
750+
}
751+
752+
for (i = 0; i < sg_count; i++) {
753+
page = alloc_page(GFP_KERNEL);
754+
if (!page) {
755+
i--;
756+
goto err;
757+
}
758+
759+
nbytes = min_t(unsigned int, PAGE_SIZE, len);
760+
sg_set_page(&sg[i], page, nbytes, 0);
761+
762+
if (cmd->tvc_data_direction == DMA_TO_DEVICE &&
763+
copy_page_from_iter(page, 0, nbytes, iter) != nbytes)
764+
goto err;
765+
766+
len -= nbytes;
767+
}
768+
769+
cmd->copied_iov = 1;
770+
return 0;
771+
772+
err:
773+
pr_err("Could not read %u bytes while handling misaligned cmd\n",
774+
nbytes);
775+
776+
for (; i >= 0; i--)
777+
__free_page(sg_page(&sg[i]));
778+
kfree(cmd->saved_iter_addr);
779+
return -ENOMEM;
780+
}
781+
782+
static int
783+
vhost_scsi_iov_to_sgl(struct vhost_scsi_cmd *cmd, struct iov_iter *iter,
784+
struct scatterlist *sg, int sg_count, bool is_prot)
663785
{
664786
struct scatterlist *p = sg;
787+
size_t revert_bytes;
665788
int ret;
666789

667790
while (iov_iter_count(iter)) {
668-
ret = vhost_scsi_map_to_sgl(cmd, iter, sg, write);
791+
ret = vhost_scsi_map_to_sgl(cmd, iter, sg, is_prot);
669792
if (ret < 0) {
793+
revert_bytes = 0;
794+
670795
while (p < sg) {
671-
struct page *page = sg_page(p++);
672-
if (page)
796+
struct page *page = sg_page(p);
797+
798+
if (page) {
673799
put_page(page);
800+
revert_bytes += p->length;
801+
}
802+
p++;
674803
}
804+
805+
iov_iter_revert(iter, revert_bytes);
675806
return ret;
676807
}
677808
sg += ret;
678809
}
810+
679811
return 0;
680812
}
681813

@@ -685,7 +817,6 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
685817
size_t data_bytes, struct iov_iter *data_iter)
686818
{
687819
int sgl_count, ret;
688-
bool write = (cmd->tvc_data_direction == DMA_FROM_DEVICE);
689820

690821
if (prot_bytes) {
691822
sgl_count = vhost_scsi_calc_sgls(prot_iter, prot_bytes,
@@ -698,9 +829,8 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
698829
pr_debug("%s prot_sg %p prot_sgl_count %u\n", __func__,
699830
cmd->tvc_prot_sgl, cmd->tvc_prot_sgl_count);
700831

701-
ret = vhost_scsi_iov_to_sgl(cmd, write, prot_iter,
702-
cmd->tvc_prot_sgl,
703-
cmd->tvc_prot_sgl_count);
832+
ret = vhost_scsi_iov_to_sgl(cmd, prot_iter, cmd->tvc_prot_sgl,
833+
cmd->tvc_prot_sgl_count, true);
704834
if (ret < 0) {
705835
cmd->tvc_prot_sgl_count = 0;
706836
return ret;
@@ -716,8 +846,14 @@ vhost_scsi_mapal(struct vhost_scsi_cmd *cmd,
716846
pr_debug("%s data_sg %p data_sgl_count %u\n", __func__,
717847
cmd->tvc_sgl, cmd->tvc_sgl_count);
718848

719-
ret = vhost_scsi_iov_to_sgl(cmd, write, data_iter,
720-
cmd->tvc_sgl, cmd->tvc_sgl_count);
849+
ret = vhost_scsi_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
850+
cmd->tvc_sgl_count, false);
851+
if (ret == -EINVAL) {
852+
sg_init_table(cmd->tvc_sgl, cmd->tvc_sgl_count);
853+
ret = vhost_scsi_copy_iov_to_sgl(cmd, data_iter, cmd->tvc_sgl,
854+
cmd->tvc_sgl_count);
855+
}
856+
721857
if (ret < 0) {
722858
cmd->tvc_sgl_count = 0;
723859
return ret;

0 commit comments

Comments
 (0)