Skip to content

Commit 18c198c

Browse files
committed
vfio/pci: Create persistent INTx handler
A vulnerability exists where the eventfd for INTx signaling can be deconfigured, which unregisters the IRQ handler but still allows eventfds to be signaled with a NULL context through the SET_IRQS ioctl or through unmask irqfd if the device interrupt is pending. Ideally this could be solved with some additional locking; the igate mutex serializes the ioctl and config space accesses, and the interrupt handler is unregistered relative to the trigger, but the irqfd path runs asynchronous to those. The igate mutex cannot be acquired from the atomic context of the eventfd wake function. Disabling the irqfd relative to the eventfd registration is potentially incompatible with existing userspace. As a result, the solution implemented here moves configuration of the INTx interrupt handler to track the lifetime of the INTx context object and irq_type configuration, rather than registration of a particular trigger eventfd. Synchronization is added between the ioctl path and eventfd_signal() wrapper such that the eventfd trigger can be dynamically updated relative to in-flight interrupts or irqfd callbacks. Cc: <stable@vger.kernel.org> Fixes: 89e1f7d ("vfio: Add PCI device driver") Reported-by: Reinette Chatre <reinette.chatre@intel.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Reinette Chatre <reinette.chatre@intel.com> Reviewed-by: Eric Auger <eric.auger@redhat.com> Link: https://lore.kernel.org/r/20240308230557.805580-5-alex.williamson@redhat.com Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
1 parent b620ecb commit 18c198c

File tree

1 file changed

+78
-67
lines changed

1 file changed

+78
-67
lines changed

drivers/vfio/pci/vfio_pci_intrs.c

Lines changed: 78 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,15 @@ static void vfio_send_intx_eventfd(void *opaque, void *unused)
9090

9191
if (likely(is_intx(vdev) && !vdev->virq_disabled)) {
9292
struct vfio_pci_irq_ctx *ctx;
93+
struct eventfd_ctx *trigger;
9394

9495
ctx = vfio_irq_ctx_get(vdev, 0);
9596
if (WARN_ON_ONCE(!ctx))
9697
return;
97-
eventfd_signal(ctx->trigger);
98+
99+
trigger = READ_ONCE(ctx->trigger);
100+
if (likely(trigger))
101+
eventfd_signal(trigger);
98102
}
99103
}
100104

@@ -253,111 +257,114 @@ static irqreturn_t vfio_intx_handler(int irq, void *dev_id)
253257
return ret;
254258
}
255259

256-
static int vfio_intx_enable(struct vfio_pci_core_device *vdev)
260+
static int vfio_intx_enable(struct vfio_pci_core_device *vdev,
261+
struct eventfd_ctx *trigger)
257262
{
263+
struct pci_dev *pdev = vdev->pdev;
258264
struct vfio_pci_irq_ctx *ctx;
265+
unsigned long irqflags;
266+
char *name;
267+
int ret;
259268

260269
if (!is_irq_none(vdev))
261270
return -EINVAL;
262271

263-
if (!vdev->pdev->irq)
272+
if (!pdev->irq)
264273
return -ENODEV;
265274

275+
name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)", pci_name(pdev));
276+
if (!name)
277+
return -ENOMEM;
278+
266279
ctx = vfio_irq_ctx_alloc(vdev, 0);
267280
if (!ctx)
268281
return -ENOMEM;
269282

283+
ctx->name = name;
284+
ctx->trigger = trigger;
285+
270286
/*
271-
* If the virtual interrupt is masked, restore it. Devices
272-
* supporting DisINTx can be masked at the hardware level
273-
* here, non-PCI-2.3 devices will have to wait until the
274-
* interrupt is enabled.
287+
* Fill the initial masked state based on virq_disabled. After
288+
* enable, changing the DisINTx bit in vconfig directly changes INTx
289+
* masking. igate prevents races during setup, once running masked
290+
* is protected via irqlock.
291+
*
292+
* Devices supporting DisINTx also reflect the current mask state in
293+
* the physical DisINTx bit, which is not affected during IRQ setup.
294+
*
295+
* Devices without DisINTx support require an exclusive interrupt.
296+
* IRQ masking is performed at the IRQ chip. Again, igate protects
297+
* against races during setup and IRQ handlers and irqfds are not
298+
* yet active, therefore masked is stable and can be used to
299+
* conditionally auto-enable the IRQ.
300+
*
301+
* irq_type must be stable while the IRQ handler is registered,
302+
* therefore it must be set before request_irq().
275303
*/
276304
ctx->masked = vdev->virq_disabled;
277-
if (vdev->pci_2_3)
278-
pci_intx(vdev->pdev, !ctx->masked);
305+
if (vdev->pci_2_3) {
306+
pci_intx(pdev, !ctx->masked);
307+
irqflags = IRQF_SHARED;
308+
} else {
309+
irqflags = ctx->masked ? IRQF_NO_AUTOEN : 0;
310+
}
279311

280312
vdev->irq_type = VFIO_PCI_INTX_IRQ_INDEX;
281313

314+
ret = request_irq(pdev->irq, vfio_intx_handler,
315+
irqflags, ctx->name, vdev);
316+
if (ret) {
317+
vdev->irq_type = VFIO_PCI_NUM_IRQS;
318+
kfree(name);
319+
vfio_irq_ctx_free(vdev, ctx, 0);
320+
return ret;
321+
}
322+
282323
return 0;
283324
}
284325

285-
static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev, int fd)
326+
static int vfio_intx_set_signal(struct vfio_pci_core_device *vdev,
327+
struct eventfd_ctx *trigger)
286328
{
287329
struct pci_dev *pdev = vdev->pdev;
288-
unsigned long irqflags = IRQF_SHARED;
289330
struct vfio_pci_irq_ctx *ctx;
290-
struct eventfd_ctx *trigger;
291-
unsigned long flags;
292-
int ret;
331+
struct eventfd_ctx *old;
293332

294333
ctx = vfio_irq_ctx_get(vdev, 0);
295334
if (WARN_ON_ONCE(!ctx))
296335
return -EINVAL;
297336

298-
if (ctx->trigger) {
299-
free_irq(pdev->irq, vdev);
300-
kfree(ctx->name);
301-
eventfd_ctx_put(ctx->trigger);
302-
ctx->trigger = NULL;
303-
}
304-
305-
if (fd < 0) /* Disable only */
306-
return 0;
307-
308-
ctx->name = kasprintf(GFP_KERNEL_ACCOUNT, "vfio-intx(%s)",
309-
pci_name(pdev));
310-
if (!ctx->name)
311-
return -ENOMEM;
312-
313-
trigger = eventfd_ctx_fdget(fd);
314-
if (IS_ERR(trigger)) {
315-
kfree(ctx->name);
316-
return PTR_ERR(trigger);
317-
}
337+
old = ctx->trigger;
318338

319-
ctx->trigger = trigger;
339+
WRITE_ONCE(ctx->trigger, trigger);
320340

321-
/*
322-
* Devices without DisINTx support require an exclusive interrupt,
323-
* IRQ masking is performed at the IRQ chip. The masked status is
324-
* protected by vdev->irqlock. Setup the IRQ without auto-enable and
325-
* unmask as necessary below under lock. DisINTx is unmodified by
326-
* the IRQ configuration and may therefore use auto-enable.
327-
*/
328-
if (!vdev->pci_2_3)
329-
irqflags = IRQF_NO_AUTOEN;
330-
331-
ret = request_irq(pdev->irq, vfio_intx_handler,
332-
irqflags, ctx->name, vdev);
333-
if (ret) {
334-
ctx->trigger = NULL;
335-
kfree(ctx->name);
336-
eventfd_ctx_put(trigger);
337-
return ret;
341+
/* Releasing an old ctx requires synchronizing in-flight users */
342+
if (old) {
343+
synchronize_irq(pdev->irq);
344+
vfio_virqfd_flush_thread(&ctx->unmask);
345+
eventfd_ctx_put(old);
338346
}
339347

340-
spin_lock_irqsave(&vdev->irqlock, flags);
341-
if (!vdev->pci_2_3 && !ctx->masked)
342-
enable_irq(pdev->irq);
343-
spin_unlock_irqrestore(&vdev->irqlock, flags);
344-
345348
return 0;
346349
}
347350

348351
static void vfio_intx_disable(struct vfio_pci_core_device *vdev)
349352
{
353+
struct pci_dev *pdev = vdev->pdev;
350354
struct vfio_pci_irq_ctx *ctx;
351355

352356
ctx = vfio_irq_ctx_get(vdev, 0);
353357
WARN_ON_ONCE(!ctx);
354358
if (ctx) {
355359
vfio_virqfd_disable(&ctx->unmask);
356360
vfio_virqfd_disable(&ctx->mask);
361+
free_irq(pdev->irq, vdev);
362+
if (ctx->trigger)
363+
eventfd_ctx_put(ctx->trigger);
364+
kfree(ctx->name);
365+
vfio_irq_ctx_free(vdev, ctx, 0);
357366
}
358-
vfio_intx_set_signal(vdev, -1);
359367
vdev->irq_type = VFIO_PCI_NUM_IRQS;
360-
vfio_irq_ctx_free(vdev, ctx, 0);
361368
}
362369

363370
/*
@@ -641,19 +648,23 @@ static int vfio_pci_set_intx_trigger(struct vfio_pci_core_device *vdev,
641648
return -EINVAL;
642649

643650
if (flags & VFIO_IRQ_SET_DATA_EVENTFD) {
651+
struct eventfd_ctx *trigger = NULL;
644652
int32_t fd = *(int32_t *)data;
645653
int ret;
646654

647-
if (is_intx(vdev))
648-
return vfio_intx_set_signal(vdev, fd);
655+
if (fd >= 0) {
656+
trigger = eventfd_ctx_fdget(fd);
657+
if (IS_ERR(trigger))
658+
return PTR_ERR(trigger);
659+
}
649660

650-
ret = vfio_intx_enable(vdev);
651-
if (ret)
652-
return ret;
661+
if (is_intx(vdev))
662+
ret = vfio_intx_set_signal(vdev, trigger);
663+
else
664+
ret = vfio_intx_enable(vdev, trigger);
653665

654-
ret = vfio_intx_set_signal(vdev, fd);
655-
if (ret)
656-
vfio_intx_disable(vdev);
666+
if (ret && trigger)
667+
eventfd_ctx_put(trigger);
657668

658669
return ret;
659670
}

0 commit comments

Comments
 (0)