Skip to content

Commit f8941e6

Browse files
vireshkjgross1
authored andcommitted
xen: privcmd: Add support for irqfd
Xen provides support for injecting interrupts to the guests via the HYPERVISOR_dm_op() hypercall. The same is used by the Virtio based device backend implementations, in an inefficient manner currently. Generally, the Virtio backends are implemented to work with the Eventfd based mechanism. In order to make such backends work with Xen, another software layer needs to poll the Eventfds and raise an interrupt to the guest using the Xen based mechanism. This results in an extra context switch. This is not a new problem in Linux though. It is present with other hypervisors like KVM, etc. as well. The generic solution implemented in the kernel for them is to provide an IOCTL call to pass the interrupt details and eventfd, which lets the kernel take care of polling the eventfd and raising of the interrupt, instead of handling this in user space (which involves an extra context switch). This patch adds support to inject a specific interrupt to guest using the eventfd mechanism, by preventing the extra context switch. Inspired by existing implementations for KVM, etc.. Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org> Reviewed-by: Juergen Gross <jgross@suse.com> Link: https://lore.kernel.org/r/8e724ac1f50c2bc1eb8da9b3ff6166f1372570aa.1692697321.git.viresh.kumar@linaro.org Signed-off-by: Juergen Gross <jgross@suse.com>
1 parent 442466e commit f8941e6

File tree

3 files changed

+301
-2
lines changed

3 files changed

+301
-2
lines changed

drivers/xen/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,13 @@ config XEN_PRIVCMD
269269
disaggregated Xen setups this driver might be needed for other
270270
domains, too.
271271

272+
config XEN_PRIVCMD_IRQFD
273+
bool "Xen irqfd support"
274+
depends on XEN_PRIVCMD && XEN_VIRTIO && EVENTFD
275+
help
276+
Using the irqfd mechanism a virtio backend running in a daemon can
277+
speed up interrupt injection into a guest.
278+
272279
config XEN_ACPI_PROCESSOR
273280
tristate "Xen ACPI processor"
274281
depends on XEN && XEN_PV_DOM0 && X86 && ACPI_PROCESSOR && CPU_FREQ

drivers/xen/privcmd.c

Lines changed: 280 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,16 @@
99

1010
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
1111

12+
#include <linux/eventfd.h>
13+
#include <linux/file.h>
1214
#include <linux/kernel.h>
1315
#include <linux/module.h>
16+
#include <linux/mutex.h>
17+
#include <linux/poll.h>
1418
#include <linux/sched.h>
1519
#include <linux/slab.h>
1620
#include <linux/string.h>
21+
#include <linux/workqueue.h>
1722
#include <linux/errno.h>
1823
#include <linux/mm.h>
1924
#include <linux/mman.h>
@@ -833,6 +838,263 @@ static long privcmd_ioctl_mmap_resource(struct file *file,
833838
return rc;
834839
}
835840

841+
#ifdef CONFIG_XEN_PRIVCMD_IRQFD
842+
/* Irqfd support */
843+
static struct workqueue_struct *irqfd_cleanup_wq;
844+
static DEFINE_MUTEX(irqfds_lock);
845+
static LIST_HEAD(irqfds_list);
846+
847+
struct privcmd_kernel_irqfd {
848+
struct xen_dm_op_buf xbufs;
849+
domid_t dom;
850+
bool error;
851+
struct eventfd_ctx *eventfd;
852+
struct work_struct shutdown;
853+
wait_queue_entry_t wait;
854+
struct list_head list;
855+
poll_table pt;
856+
};
857+
858+
static void irqfd_deactivate(struct privcmd_kernel_irqfd *kirqfd)
859+
{
860+
lockdep_assert_held(&irqfds_lock);
861+
862+
list_del_init(&kirqfd->list);
863+
queue_work(irqfd_cleanup_wq, &kirqfd->shutdown);
864+
}
865+
866+
static void irqfd_shutdown(struct work_struct *work)
867+
{
868+
struct privcmd_kernel_irqfd *kirqfd =
869+
container_of(work, struct privcmd_kernel_irqfd, shutdown);
870+
u64 cnt;
871+
872+
eventfd_ctx_remove_wait_queue(kirqfd->eventfd, &kirqfd->wait, &cnt);
873+
eventfd_ctx_put(kirqfd->eventfd);
874+
kfree(kirqfd);
875+
}
876+
877+
static void irqfd_inject(struct privcmd_kernel_irqfd *kirqfd)
878+
{
879+
u64 cnt;
880+
long rc;
881+
882+
eventfd_ctx_do_read(kirqfd->eventfd, &cnt);
883+
884+
xen_preemptible_hcall_begin();
885+
rc = HYPERVISOR_dm_op(kirqfd->dom, 1, &kirqfd->xbufs);
886+
xen_preemptible_hcall_end();
887+
888+
/* Don't repeat the error message for consecutive failures */
889+
if (rc && !kirqfd->error) {
890+
pr_err("Failed to configure irq for guest domain: %d\n",
891+
kirqfd->dom);
892+
}
893+
894+
kirqfd->error = rc;
895+
}
896+
897+
static int
898+
irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key)
899+
{
900+
struct privcmd_kernel_irqfd *kirqfd =
901+
container_of(wait, struct privcmd_kernel_irqfd, wait);
902+
__poll_t flags = key_to_poll(key);
903+
904+
if (flags & EPOLLIN)
905+
irqfd_inject(kirqfd);
906+
907+
if (flags & EPOLLHUP) {
908+
mutex_lock(&irqfds_lock);
909+
irqfd_deactivate(kirqfd);
910+
mutex_unlock(&irqfds_lock);
911+
}
912+
913+
return 0;
914+
}
915+
916+
static void
917+
irqfd_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt)
918+
{
919+
struct privcmd_kernel_irqfd *kirqfd =
920+
container_of(pt, struct privcmd_kernel_irqfd, pt);
921+
922+
add_wait_queue_priority(wqh, &kirqfd->wait);
923+
}
924+
925+
static int privcmd_irqfd_assign(struct privcmd_irqfd *irqfd)
926+
{
927+
struct privcmd_kernel_irqfd *kirqfd, *tmp;
928+
__poll_t events;
929+
struct fd f;
930+
void *dm_op;
931+
int ret;
932+
933+
kirqfd = kzalloc(sizeof(*kirqfd) + irqfd->size, GFP_KERNEL);
934+
if (!kirqfd)
935+
return -ENOMEM;
936+
dm_op = kirqfd + 1;
937+
938+
if (copy_from_user(dm_op, irqfd->dm_op, irqfd->size)) {
939+
ret = -EFAULT;
940+
goto error_kfree;
941+
}
942+
943+
kirqfd->xbufs.size = irqfd->size;
944+
set_xen_guest_handle(kirqfd->xbufs.h, dm_op);
945+
kirqfd->dom = irqfd->dom;
946+
INIT_WORK(&kirqfd->shutdown, irqfd_shutdown);
947+
948+
f = fdget(irqfd->fd);
949+
if (!f.file) {
950+
ret = -EBADF;
951+
goto error_kfree;
952+
}
953+
954+
kirqfd->eventfd = eventfd_ctx_fileget(f.file);
955+
if (IS_ERR(kirqfd->eventfd)) {
956+
ret = PTR_ERR(kirqfd->eventfd);
957+
goto error_fd_put;
958+
}
959+
960+
/*
961+
* Install our own custom wake-up handling so we are notified via a
962+
* callback whenever someone signals the underlying eventfd.
963+
*/
964+
init_waitqueue_func_entry(&kirqfd->wait, irqfd_wakeup);
965+
init_poll_funcptr(&kirqfd->pt, irqfd_poll_func);
966+
967+
mutex_lock(&irqfds_lock);
968+
969+
list_for_each_entry(tmp, &irqfds_list, list) {
970+
if (kirqfd->eventfd == tmp->eventfd) {
971+
ret = -EBUSY;
972+
mutex_unlock(&irqfds_lock);
973+
goto error_eventfd;
974+
}
975+
}
976+
977+
list_add_tail(&kirqfd->list, &irqfds_list);
978+
mutex_unlock(&irqfds_lock);
979+
980+
/*
981+
* Check if there was an event already pending on the eventfd before we
982+
* registered, and trigger it as if we didn't miss it.
983+
*/
984+
events = vfs_poll(f.file, &kirqfd->pt);
985+
if (events & EPOLLIN)
986+
irqfd_inject(kirqfd);
987+
988+
/*
989+
* Do not drop the file until the kirqfd is fully initialized, otherwise
990+
* we might race against the EPOLLHUP.
991+
*/
992+
fdput(f);
993+
return 0;
994+
995+
error_eventfd:
996+
eventfd_ctx_put(kirqfd->eventfd);
997+
998+
error_fd_put:
999+
fdput(f);
1000+
1001+
error_kfree:
1002+
kfree(kirqfd);
1003+
return ret;
1004+
}
1005+
1006+
static int privcmd_irqfd_deassign(struct privcmd_irqfd *irqfd)
1007+
{
1008+
struct privcmd_kernel_irqfd *kirqfd;
1009+
struct eventfd_ctx *eventfd;
1010+
1011+
eventfd = eventfd_ctx_fdget(irqfd->fd);
1012+
if (IS_ERR(eventfd))
1013+
return PTR_ERR(eventfd);
1014+
1015+
mutex_lock(&irqfds_lock);
1016+
1017+
list_for_each_entry(kirqfd, &irqfds_list, list) {
1018+
if (kirqfd->eventfd == eventfd) {
1019+
irqfd_deactivate(kirqfd);
1020+
break;
1021+
}
1022+
}
1023+
1024+
mutex_unlock(&irqfds_lock);
1025+
1026+
eventfd_ctx_put(eventfd);
1027+
1028+
/*
1029+
* Block until we know all outstanding shutdown jobs have completed so
1030+
* that we guarantee there will not be any more interrupts once this
1031+
* deassign function returns.
1032+
*/
1033+
flush_workqueue(irqfd_cleanup_wq);
1034+
1035+
return 0;
1036+
}
1037+
1038+
static long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
1039+
{
1040+
struct privcmd_data *data = file->private_data;
1041+
struct privcmd_irqfd irqfd;
1042+
1043+
if (copy_from_user(&irqfd, udata, sizeof(irqfd)))
1044+
return -EFAULT;
1045+
1046+
/* No other flags should be set */
1047+
if (irqfd.flags & ~PRIVCMD_IRQFD_FLAG_DEASSIGN)
1048+
return -EINVAL;
1049+
1050+
/* If restriction is in place, check the domid matches */
1051+
if (data->domid != DOMID_INVALID && data->domid != irqfd.dom)
1052+
return -EPERM;
1053+
1054+
if (irqfd.flags & PRIVCMD_IRQFD_FLAG_DEASSIGN)
1055+
return privcmd_irqfd_deassign(&irqfd);
1056+
1057+
return privcmd_irqfd_assign(&irqfd);
1058+
}
1059+
1060+
static int privcmd_irqfd_init(void)
1061+
{
1062+
irqfd_cleanup_wq = alloc_workqueue("privcmd-irqfd-cleanup", 0, 0);
1063+
if (!irqfd_cleanup_wq)
1064+
return -ENOMEM;
1065+
1066+
return 0;
1067+
}
1068+
1069+
static void privcmd_irqfd_exit(void)
1070+
{
1071+
struct privcmd_kernel_irqfd *kirqfd, *tmp;
1072+
1073+
mutex_lock(&irqfds_lock);
1074+
1075+
list_for_each_entry_safe(kirqfd, tmp, &irqfds_list, list)
1076+
irqfd_deactivate(kirqfd);
1077+
1078+
mutex_unlock(&irqfds_lock);
1079+
1080+
destroy_workqueue(irqfd_cleanup_wq);
1081+
}
1082+
#else
1083+
static inline long privcmd_ioctl_irqfd(struct file *file, void __user *udata)
1084+
{
1085+
return -EOPNOTSUPP;
1086+
}
1087+
1088+
static inline int privcmd_irqfd_init(void)
1089+
{
1090+
return 0;
1091+
}
1092+
1093+
static inline void privcmd_irqfd_exit(void)
1094+
{
1095+
}
1096+
#endif /* CONFIG_XEN_PRIVCMD_IRQFD */
1097+
8361098
static long privcmd_ioctl(struct file *file,
8371099
unsigned int cmd, unsigned long data)
8381100
{
@@ -868,6 +1130,10 @@ static long privcmd_ioctl(struct file *file,
8681130
ret = privcmd_ioctl_mmap_resource(file, udata);
8691131
break;
8701132

1133+
case IOCTL_PRIVCMD_IRQFD:
1134+
ret = privcmd_ioctl_irqfd(file, udata);
1135+
break;
1136+
8711137
default:
8721138
break;
8731139
}
@@ -992,15 +1258,27 @@ static int __init privcmd_init(void)
9921258
err = misc_register(&xen_privcmdbuf_dev);
9931259
if (err != 0) {
9941260
pr_err("Could not register Xen hypercall-buf device\n");
995-
misc_deregister(&privcmd_dev);
996-
return err;
1261+
goto err_privcmdbuf;
1262+
}
1263+
1264+
err = privcmd_irqfd_init();
1265+
if (err != 0) {
1266+
pr_err("irqfd init failed\n");
1267+
goto err_irqfd;
9971268
}
9981269

9991270
return 0;
1271+
1272+
err_irqfd:
1273+
misc_deregister(&xen_privcmdbuf_dev);
1274+
err_privcmdbuf:
1275+
misc_deregister(&privcmd_dev);
1276+
return err;
10001277
}
10011278

10021279
static void __exit privcmd_exit(void)
10031280
{
1281+
privcmd_irqfd_exit();
10041282
misc_deregister(&privcmd_dev);
10051283
misc_deregister(&xen_privcmdbuf_dev);
10061284
}

include/uapi/xen/privcmd.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,18 @@ struct privcmd_mmap_resource {
9898
__u64 addr;
9999
};
100100

101+
/* For privcmd_irqfd::flags */
102+
#define PRIVCMD_IRQFD_FLAG_DEASSIGN (1 << 0)
103+
104+
struct privcmd_irqfd {
105+
void __user *dm_op;
106+
__u32 size; /* Size of structure pointed by dm_op */
107+
__u32 fd;
108+
__u32 flags;
109+
domid_t dom;
110+
__u8 pad[2];
111+
};
112+
101113
/*
102114
* @cmd: IOCTL_PRIVCMD_HYPERCALL
103115
* @arg: &privcmd_hypercall_t
@@ -125,5 +137,7 @@ struct privcmd_mmap_resource {
125137
_IOC(_IOC_NONE, 'P', 6, sizeof(domid_t))
126138
#define IOCTL_PRIVCMD_MMAP_RESOURCE \
127139
_IOC(_IOC_NONE, 'P', 7, sizeof(struct privcmd_mmap_resource))
140+
#define IOCTL_PRIVCMD_IRQFD \
141+
_IOC(_IOC_NONE, 'P', 8, sizeof(struct privcmd_irqfd))
128142

129143
#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */

0 commit comments

Comments
 (0)