Skip to content

Commit 1768821

Browse files
committed
Merge tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio
Pull vfio updates from Alex Williamson: - Improvements to mlx5 vfio-pci variant driver, including support for parallel migration per PF (Yishai Hadas) - Remove redundant iommu_present() check (Robin Murphy) - Ongoing refactoring to consolidate the VFIO driver facing API to use vfio_device (Jason Gunthorpe) - Use drvdata to store vfio_device among all vfio-pci and variant drivers (Jason Gunthorpe) - Remove redundant code now that IOMMU core manages group DMA ownership (Jason Gunthorpe) - Remove vfio_group from external API handling struct file ownership (Jason Gunthorpe) - Correct typo in uapi comments (Thomas Huth) - Fix coccicheck detected deadlock (Wan Jiabing) - Use rwsem to remove races and simplify code around container and kvm association to groups (Jason Gunthorpe) - Harden access to devices in low power states and use runtime PM to enable d3cold support for unused devices (Abhishek Sahu) - Fix dma_owner handling of fake IOMMU groups (Jason Gunthorpe) - Set driver_managed_dma on vfio-pci variant drivers (Jason Gunthorpe) - Pass KVM pointer directly rather than via notifier (Matthew Rosato) * tag 'vfio-v5.19-rc1' of https://github.com/awilliam/linux-vfio: (38 commits) vfio: remove VFIO_GROUP_NOTIFY_SET_KVM vfio/pci: Add driver_managed_dma to the new vfio_pci drivers vfio: Do not manipulate iommu dma_owner for fake iommu groups vfio/pci: Move the unused device into low power state with runtime PM vfio/pci: Virtualize PME related registers bits and initialize to zero vfio/pci: Change the PF power state to D0 before enabling VFs vfio/pci: Invalidate mmaps and block the access in D3hot power state vfio: Change struct vfio_group::container_users to a non-atomic int vfio: Simplify the life cycle of the group FD vfio: Fully lock struct vfio_group::container vfio: Split up vfio_group_get_device_fd() vfio: Change struct vfio_group::opened from an atomic to bool vfio: Add missing locking for struct vfio_group::kvm kvm/vfio: Fix potential deadlock problem in vfio include/uapi/linux/vfio.h: Fix trivial typo - _IORW should be _IOWR instead vfio/pci: Use the struct file as the handle not the vfio_group kvm/vfio: Remove vfio_group from kvm vfio: Change vfio_group_set_kvm() to vfio_file_set_kvm() vfio: Change vfio_external_check_extension() to vfio_file_enforced_coherent() vfio: Remove vfio_external_group_match_file() ...
2 parents 8171acb + 421cfe6 commit 1768821

File tree

24 files changed

+1095
-1144
lines changed

24 files changed

+1095
-1144
lines changed

Documentation/driver-api/vfio-mediated-device.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,10 +262,10 @@ Translation APIs for Mediated Devices
262262
The following APIs are provided for translating user pfn to host pfn in a VFIO
263263
driver::
264264

265-
extern int vfio_pin_pages(struct device *dev, unsigned long *user_pfn,
265+
int vfio_pin_pages(struct vfio_device *device, unsigned long *user_pfn,
266266
int npage, int prot, unsigned long *phys_pfn);
267267

268-
extern int vfio_unpin_pages(struct device *dev, unsigned long *user_pfn,
268+
int vfio_unpin_pages(struct vfio_device *device, unsigned long *user_pfn,
269269
int npage);
270270

271271
These functions call back into the back-end IOMMU module by using the pin_pages

drivers/gpu/drm/i915/gvt/gtt.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
5151

5252
static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
5353
{
54-
struct kvm *kvm = vgpu->kvm;
54+
struct kvm *kvm = vgpu->vfio_device.kvm;
5555
int idx;
5656
bool ret;
5757

@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
11851185

11861186
if (!vgpu->attached)
11871187
return -EINVAL;
1188-
pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
1188+
pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
11891189
if (is_error_noslot_pfn(pfn))
11901190
return -EINVAL;
11911191
return PageTransHuge(pfn_to_page(pfn));

drivers/gpu/drm/i915/gvt/gvt.h

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -227,11 +227,7 @@ struct intel_vgpu {
227227
struct mutex cache_lock;
228228

229229
struct notifier_block iommu_notifier;
230-
struct notifier_block group_notifier;
231-
struct kvm *kvm;
232-
struct work_struct release_work;
233230
atomic_t released;
234-
struct vfio_group *vfio_group;
235231

236232
struct kvm_page_track_notifier_node track_node;
237233
#define NR_BKT (1 << 18)
@@ -732,7 +728,7 @@ static inline int intel_gvt_read_gpa(struct intel_vgpu *vgpu, unsigned long gpa,
732728
{
733729
if (!vgpu->attached)
734730
return -ESRCH;
735-
return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, false);
731+
return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, false);
736732
}
737733

738734
/**
@@ -750,7 +746,7 @@ static inline int intel_gvt_write_gpa(struct intel_vgpu *vgpu,
750746
{
751747
if (!vgpu->attached)
752748
return -ESRCH;
753-
return vfio_dma_rw(vgpu->vfio_group, gpa, buf, len, true);
749+
return vfio_dma_rw(&vgpu->vfio_device, gpa, buf, len, true);
754750
}
755751

756752
void intel_gvt_debugfs_remove_vgpu(struct intel_vgpu *vgpu);

drivers/gpu/drm/i915/gvt/kvmgt.c

Lines changed: 27 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
228228
}
229229
}
230230

231-
static void intel_vgpu_release_work(struct work_struct *work);
232-
233231
static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
234232
unsigned long size)
235233
{
@@ -243,7 +241,7 @@ static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
243241
for (npage = 0; npage < total_pages; npage++) {
244242
unsigned long cur_gfn = gfn + npage;
245243

246-
ret = vfio_group_unpin_pages(vgpu->vfio_group, &cur_gfn, 1);
244+
ret = vfio_unpin_pages(&vgpu->vfio_device, &cur_gfn, 1);
247245
drm_WARN_ON(&i915->drm, ret != 1);
248246
}
249247
}
@@ -266,8 +264,8 @@ static int gvt_pin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
266264
unsigned long cur_gfn = gfn + npage;
267265
unsigned long pfn;
268266

269-
ret = vfio_group_pin_pages(vgpu->vfio_group, &cur_gfn, 1,
270-
IOMMU_READ | IOMMU_WRITE, &pfn);
267+
ret = vfio_pin_pages(&vgpu->vfio_device, &cur_gfn, 1,
268+
IOMMU_READ | IOMMU_WRITE, &pfn);
271269
if (ret != 1) {
272270
gvt_vgpu_err("vfio_pin_pages failed for gfn 0x%lx, ret %d\n",
273271
cur_gfn, ret);
@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
761759
return NOTIFY_OK;
762760
}
763761

764-
static int intel_vgpu_group_notifier(struct notifier_block *nb,
765-
unsigned long action, void *data)
766-
{
767-
struct intel_vgpu *vgpu =
768-
container_of(nb, struct intel_vgpu, group_notifier);
769-
770-
/* the only action we care about */
771-
if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
772-
vgpu->kvm = data;
773-
774-
if (!data)
775-
schedule_work(&vgpu->release_work);
776-
}
777-
778-
return NOTIFY_OK;
779-
}
780-
781762
static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
782763
{
783764
struct intel_vgpu *itr;
@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
789770
if (!itr->attached)
790771
continue;
791772

792-
if (vgpu->kvm == itr->kvm) {
773+
if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
793774
ret = true;
794775
goto out;
795776
}
@@ -804,61 +785,44 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
804785
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
805786
unsigned long events;
806787
int ret;
807-
struct vfio_group *vfio_group;
808788

809789
vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
810-
vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
811790

812791
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
813-
ret = vfio_register_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY, &events,
814-
&vgpu->iommu_notifier);
792+
ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
793+
&vgpu->iommu_notifier);
815794
if (ret != 0) {
816795
gvt_vgpu_err("vfio_register_notifier for iommu failed: %d\n",
817796
ret);
818797
goto out;
819798
}
820799

821-
events = VFIO_GROUP_NOTIFY_SET_KVM;
822-
ret = vfio_register_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY, &events,
823-
&vgpu->group_notifier);
824-
if (ret != 0) {
825-
gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
826-
ret);
827-
goto undo_iommu;
828-
}
829-
830-
vfio_group =
831-
vfio_group_get_external_user_from_dev(vgpu->vfio_device.dev);
832-
if (IS_ERR_OR_NULL(vfio_group)) {
833-
ret = !vfio_group ? -EFAULT : PTR_ERR(vfio_group);
834-
gvt_vgpu_err("vfio_group_get_external_user_from_dev failed\n");
835-
goto undo_register;
836-
}
837-
vgpu->vfio_group = vfio_group;
838-
839800
ret = -EEXIST;
840801
if (vgpu->attached)
841-
goto undo_group;
802+
goto undo_iommu;
842803

843804
ret = -ESRCH;
844-
if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
805+
if (!vgpu->vfio_device.kvm ||
806+
vgpu->vfio_device.kvm->mm != current->mm) {
845807
gvt_vgpu_err("KVM is required to use Intel vGPU\n");
846-
goto undo_group;
808+
goto undo_iommu;
847809
}
848810

811+
kvm_get_kvm(vgpu->vfio_device.kvm);
812+
849813
ret = -EEXIST;
850814
if (__kvmgt_vgpu_exist(vgpu))
851-
goto undo_group;
815+
goto undo_iommu;
852816

853817
vgpu->attached = true;
854-
kvm_get_kvm(vgpu->kvm);
855818

856819
kvmgt_protect_table_init(vgpu);
857820
gvt_cache_init(vgpu);
858821

859822
vgpu->track_node.track_write = kvmgt_page_track_write;
860823
vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
861-
kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
824+
kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
825+
&vgpu->track_node);
862826

863827
debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
864828
&vgpu->nr_cache_entries);
@@ -868,17 +832,9 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
868832
atomic_set(&vgpu->released, 0);
869833
return 0;
870834

871-
undo_group:
872-
vfio_group_put_external_user(vgpu->vfio_group);
873-
vgpu->vfio_group = NULL;
874-
875-
undo_register:
876-
vfio_unregister_notifier(vfio_dev->dev, VFIO_GROUP_NOTIFY,
877-
&vgpu->group_notifier);
878-
879835
undo_iommu:
880-
vfio_unregister_notifier(vfio_dev->dev, VFIO_IOMMU_NOTIFY,
881-
&vgpu->iommu_notifier);
836+
vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
837+
&vgpu->iommu_notifier);
882838
out:
883839
return ret;
884840
}
@@ -894,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
894850
}
895851
}
896852

897-
static void __intel_vgpu_release(struct intel_vgpu *vgpu)
853+
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
898854
{
855+
struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
899856
struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
900857
int ret;
901858

@@ -907,41 +864,24 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
907864

908865
intel_gvt_release_vgpu(vgpu);
909866

910-
ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_IOMMU_NOTIFY,
911-
&vgpu->iommu_notifier);
867+
ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_IOMMU_NOTIFY,
868+
&vgpu->iommu_notifier);
912869
drm_WARN(&i915->drm, ret,
913870
"vfio_unregister_notifier for iommu failed: %d\n", ret);
914871

915-
ret = vfio_unregister_notifier(vgpu->vfio_device.dev, VFIO_GROUP_NOTIFY,
916-
&vgpu->group_notifier);
917-
drm_WARN(&i915->drm, ret,
918-
"vfio_unregister_notifier for group failed: %d\n", ret);
919-
920872
debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
921873

922-
kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
923-
kvm_put_kvm(vgpu->kvm);
874+
kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
875+
&vgpu->track_node);
924876
kvmgt_protect_table_destroy(vgpu);
925877
gvt_cache_destroy(vgpu);
926878

927879
intel_vgpu_release_msi_eventfd_ctx(vgpu);
928-
vfio_group_put_external_user(vgpu->vfio_group);
929880

930-
vgpu->kvm = NULL;
931881
vgpu->attached = false;
932-
}
933-
934-
static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
935-
{
936-
__intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
937-
}
938-
939-
static void intel_vgpu_release_work(struct work_struct *work)
940-
{
941-
struct intel_vgpu *vgpu =
942-
container_of(work, struct intel_vgpu, release_work);
943882

944-
__intel_vgpu_release(vgpu);
883+
if (vgpu->vfio_device.kvm)
884+
kvm_put_kvm(vgpu->vfio_device.kvm);
945885
}
946886

947887
static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1690,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
16901630
return PTR_ERR(vgpu);
16911631
}
16921632

1693-
INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
16941633
vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
16951634
&intel_vgpu_dev_ops);
16961635

@@ -1728,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
17281667

17291668
int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
17301669
{
1731-
struct kvm *kvm = info->kvm;
1670+
struct kvm *kvm = info->vfio_device.kvm;
17321671
struct kvm_memory_slot *slot;
17331672
int idx;
17341673

@@ -1758,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
17581697

17591698
int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
17601699
{
1761-
struct kvm *kvm = info->kvm;
1700+
struct kvm *kvm = info->vfio_device.kvm;
17621701
struct kvm_memory_slot *slot;
17631702
int idx;
17641703

drivers/net/ethernet/mellanox/mlx5/core/sriov.c

Lines changed: 64 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,11 @@ static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs)
8787
enable_vfs_hca:
8888
num_msix_count = mlx5_get_default_msix_vec_count(dev, num_vfs);
8989
for (vf = 0; vf < num_vfs; vf++) {
90+
/* Notify the VF before its enablement to let it set
91+
* some stuff.
92+
*/
93+
blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
94+
MLX5_PF_NOTIFY_ENABLE_VF, dev);
9095
err = mlx5_core_enable_hca(dev, vf + 1);
9196
if (err) {
9297
mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err);
@@ -127,6 +132,11 @@ mlx5_device_disable_sriov(struct mlx5_core_dev *dev, int num_vfs, bool clear_vf)
127132
for (vf = num_vfs - 1; vf >= 0; vf--) {
128133
if (!sriov->vfs_ctx[vf].enabled)
129134
continue;
135+
/* Notify the VF before its disablement to let it clean
136+
* some resources.
137+
*/
138+
blocking_notifier_call_chain(&sriov->vfs_ctx[vf].notifier,
139+
MLX5_PF_NOTIFY_DISABLE_VF, dev);
130140
err = mlx5_core_disable_hca(dev, vf + 1);
131141
if (err) {
132142
mlx5_core_warn(dev, "failed to disable VF %d\n", vf);
@@ -257,7 +267,7 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
257267
{
258268
struct mlx5_core_sriov *sriov = &dev->priv.sriov;
259269
struct pci_dev *pdev = dev->pdev;
260-
int total_vfs;
270+
int total_vfs, i;
261271

262272
if (!mlx5_core_is_pf(dev))
263273
return 0;
@@ -269,6 +279,9 @@ int mlx5_sriov_init(struct mlx5_core_dev *dev)
269279
if (!sriov->vfs_ctx)
270280
return -ENOMEM;
271281

282+
for (i = 0; i < total_vfs; i++)
283+
BLOCKING_INIT_NOTIFIER_HEAD(&sriov->vfs_ctx[i].notifier);
284+
272285
return 0;
273286
}
274287

@@ -281,3 +294,53 @@ void mlx5_sriov_cleanup(struct mlx5_core_dev *dev)
281294

282295
kfree(sriov->vfs_ctx);
283296
}
297+
298+
/**
299+
* mlx5_sriov_blocking_notifier_unregister - Unregister a VF from
300+
* a notification block chain.
301+
*
302+
* @mdev: The mlx5 core device.
303+
* @vf_id: The VF id.
304+
* @nb: The notifier block to be unregistered.
305+
*/
306+
void mlx5_sriov_blocking_notifier_unregister(struct mlx5_core_dev *mdev,
307+
int vf_id,
308+
struct notifier_block *nb)
309+
{
310+
struct mlx5_vf_context *vfs_ctx;
311+
struct mlx5_core_sriov *sriov;
312+
313+
sriov = &mdev->priv.sriov;
314+
if (WARN_ON(vf_id < 0 || vf_id >= sriov->num_vfs))
315+
return;
316+
317+
vfs_ctx = &sriov->vfs_ctx[vf_id];
318+
blocking_notifier_chain_unregister(&vfs_ctx->notifier, nb);
319+
}
320+
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_unregister);
321+
322+
/**
323+
* mlx5_sriov_blocking_notifier_register - Register a VF notification
324+
* block chain.
325+
*
326+
* @mdev: The mlx5 core device.
327+
* @vf_id: The VF id.
328+
* @nb: The notifier block to be called upon the VF events.
329+
*
330+
* Returns 0 on success or an error code.
331+
*/
332+
int mlx5_sriov_blocking_notifier_register(struct mlx5_core_dev *mdev,
333+
int vf_id,
334+
struct notifier_block *nb)
335+
{
336+
struct mlx5_vf_context *vfs_ctx;
337+
struct mlx5_core_sriov *sriov;
338+
339+
sriov = &mdev->priv.sriov;
340+
if (vf_id < 0 || vf_id >= sriov->num_vfs)
341+
return -EINVAL;
342+
343+
vfs_ctx = &sriov->vfs_ctx[vf_id];
344+
return blocking_notifier_chain_register(&vfs_ctx->notifier, nb);
345+
}
346+
EXPORT_SYMBOL(mlx5_sriov_blocking_notifier_register);

0 commit comments

Comments
 (0)