Skip to content

Commit 83e445e

Browse files
dtatuleamstsirkin
authored andcommitted
vdpa/mlx5: Fix error path during device add
In the error recovery path of mlx5_vdpa_dev_add(), the cleanup is executed and at the end put_device() is called which ends up calling mlx5_vdpa_free(). This function will execute the same cleanup all over again. Most resources support being cleaned up twice, but the recent mlx5_vdpa_destroy_mr_resources() doesn't. This change drops the explicit cleanup from within the mlx5_vdpa_dev_add() and lets mlx5_vdpa_free() do its work. This issue was discovered while trying to add 2 vdpa devices with the same name: $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.2 $> vdpa dev add name vdpa-0 mgmtdev auxiliary/mlx5_core.sf.3 ... yields the following dump: BUG: kernel NULL pointer dereference, address: 00000000000000b8 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP CPU: 4 UID: 0 PID: 2811 Comm: vdpa Not tainted 6.12.0-rc6 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:destroy_workqueue+0xe/0x2a0 Code: ... RSP: 0018:ffff88814920b9a8 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffff888105c10000 RCX: 0000000000000000 RDX: 0000000000000001 RSI: ffff888100400168 RDI: 0000000000000000 RBP: 0000000000000000 R08: ffff888100120c00 R09: ffffffff828578c0 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: ffff888131fd99a0 R14: 0000000000000000 R15: ffff888105c10580 FS: 00007fdfa6b4f740(0000) GS:ffff88852ca00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000000000b8 CR3: 000000018db09006 CR4: 0000000000372eb0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: <TASK> ? __die+0x20/0x60 ? page_fault_oops+0x150/0x3e0 ? exc_page_fault+0x74/0x130 ? asm_exc_page_fault+0x22/0x30 ? destroy_workqueue+0xe/0x2a0 mlx5_vdpa_destroy_mr_resources+0x2b/0x40 [mlx5_vdpa] mlx5_vdpa_free+0x45/0x150 [mlx5_vdpa] vdpa_release_dev+0x1e/0x50 [vdpa] device_release+0x31/0x90 kobject_put+0x8d/0x230 mlx5_vdpa_dev_add+0x328/0x8b0 [mlx5_vdpa] vdpa_nl_cmd_dev_add_set_doit+0x2b8/0x4c0 [vdpa] genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? __vdpa_alloc_device+0x1b0/0x1b0 [vdpa] ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2d0 netlink_sendmsg+0x1e4/0x410 __sock_sendmsg+0x38/0x60 ? sockfd_lookup_light+0x12/0x60 __sys_sendto+0x105/0x160 ? __count_memcg_events+0x53/0xe0 ? handle_mm_fault+0x100/0x220 ? do_user_addr_fault+0x40d/0x620 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x4c/0x100 entry_SYSCALL_64_after_hwframe+0x4b/0x53 RIP: 0033:0x7fdfa6c66b57 Code: ... RSP: 002b:00007ffeace22998 EFLAGS: 00000202 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 000055a498608350 RCX: 00007fdfa6c66b57 RDX: 000000000000006c RSI: 000055a498608350 RDI: 0000000000000003 RBP: 00007ffeace229c0 R08: 00007fdfa6d35200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000202 R12: 000055a4986082a0 R13: 0000000000000000 R14: 0000000000000000 R15: 00007ffeace233f3 </TASK> Modules linked in: ... CR2: 00000000000000b8 Fixes: 6211165 ("vdpa/mlx5: Postpone MR deletion") Signed-off-by: Dragos Tatulea <dtatulea@nvidia.com> Message-Id: <20241105185101.1323272-2-dtatulea@nvidia.com> Signed-off-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Acked-by: Eugenio Pérez <eperezma@redhat.com>
1 parent 4e39eca commit 83e445e

File tree

1 file changed

+5
-16
lines changed

1 file changed

+5
-16
lines changed

drivers/vdpa/mlx5/net/mlx5_vnet.c

Lines changed: 5 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -3963,28 +3963,28 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
39633963
mvdev->vdev.dma_dev = &mdev->pdev->dev;
39643964
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
39653965
if (err)
3966-
goto err_mpfs;
3966+
goto err_alloc;
39673967

39683968
err = mlx5_vdpa_init_mr_resources(mvdev);
39693969
if (err)
3970-
goto err_res;
3970+
goto err_alloc;
39713971

39723972
if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
39733973
err = mlx5_vdpa_create_dma_mr(mvdev);
39743974
if (err)
3975-
goto err_mr_res;
3975+
goto err_alloc;
39763976
}
39773977

39783978
err = alloc_fixed_resources(ndev);
39793979
if (err)
3980-
goto err_mr;
3980+
goto err_alloc;
39813981

39823982
ndev->cvq_ent.mvdev = mvdev;
39833983
INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
39843984
mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
39853985
if (!mvdev->wq) {
39863986
err = -ENOMEM;
3987-
goto err_res2;
3987+
goto err_alloc;
39883988
}
39893989

39903990
mvdev->vdev.mdev = &mgtdev->mgtdev;
@@ -4010,17 +4010,6 @@ static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
40104010
_vdpa_unregister_device(&mvdev->vdev);
40114011
err_reg:
40124012
destroy_workqueue(mvdev->wq);
4013-
err_res2:
4014-
free_fixed_resources(ndev);
4015-
err_mr:
4016-
mlx5_vdpa_clean_mrs(mvdev);
4017-
err_mr_res:
4018-
mlx5_vdpa_destroy_mr_resources(mvdev);
4019-
err_res:
4020-
mlx5_vdpa_free_resources(&ndev->mvdev);
4021-
err_mpfs:
4022-
if (!is_zero_ether_addr(config->mac))
4023-
mlx5_mpfs_del_mac(pfmdev, config->mac);
40244013
err_alloc:
40254014
put_device(&mvdev->vdev.dev);
40264015
return err;

0 commit comments

Comments
 (0)