Skip to content

Commit 621191d

Browse files
NunoDasNevesliuw
authored andcommitted
Drivers: hv: Introduce mshv_root module to expose /dev/mshv to VMMs
Provide a set of IOCTLs for creating and managing child partitions when running as root partition on Hyper-V. The new driver is enabled via CONFIG_MSHV_ROOT. A brief overview of the interface: MSHV_CREATE_PARTITION is the entry point, returning a file descriptor representing a child partition. IOCTLs on this fd can be used to map memory, create VPs, etc. Creating a VP returns another file descriptor representing that VP which in turn has another set of corresponding IOCTLs for running the VP, getting/setting state, etc. MSHV_ROOT_HVCALL is a generic "passthrough" hypercall IOCTL which can be used for a number of partition or VP hypercalls. This is for hypercalls that do not affect any state in the kernel driver, such as getting and setting VP registers and partition properties, translating addresses, etc. It is "passthrough" because the binary input and output for the hypercall is only interpreted by the VMM - the kernel driver does nothing but insert the VP and partition id where necessary (which are always in the same place), and execute the hypercall. Co-developed-by: Anirudh Rayabharam <anrayabh@linux.microsoft.com> Signed-off-by: Anirudh Rayabharam <anrayabh@linux.microsoft.com> Co-developed-by: Jinank Jain <jinankjain@microsoft.com> Signed-off-by: Jinank Jain <jinankjain@microsoft.com> Co-developed-by: Mukesh Rathor <mrathor@linux.microsoft.com> Signed-off-by: Mukesh Rathor <mrathor@linux.microsoft.com> Co-developed-by: Muminul Islam <muislam@microsoft.com> Signed-off-by: Muminul Islam <muislam@microsoft.com> Co-developed-by: Praveen K Paladugu <prapal@linux.microsoft.com> Signed-off-by: Praveen K Paladugu <prapal@linux.microsoft.com> Co-developed-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> Signed-off-by: Stanislav Kinsburskii <skinsburskii@linux.microsoft.com> Co-developed-by: Wei Liu <wei.liu@kernel.org> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com> Reviewed-by: Roman Kisel <romank@linux.microsoft.com> Link: https://lore.kernel.org/r/1741980536-3865-11-git-send-email-nunodasneves@linux.microsoft.com Signed-off-by: Wei Liu <wei.liu@kernel.org> Message-ID: <1741980536-3865-11-git-send-email-nunodasneves@linux.microsoft.com>
1 parent 0bd921a commit 621191d

File tree

14 files changed

+5732
-1
lines changed

14 files changed

+5732
-1
lines changed

Documentation/userspace-api/ioctl/ioctl-number.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -370,6 +370,8 @@ Code Seq# Include File Comments
370370
0xB7 all uapi/linux/remoteproc_cdev.h <mailto:linux-remoteproc@vger.kernel.org>
371371
0xB7 all uapi/linux/nsfs.h <mailto:Andrei Vagin <avagin@openvz.org>>
372372
0xB8 01-02 uapi/misc/mrvl_cn10k_dpi.h Marvell CN10K DPI driver
373+
0xB8 all uapi/linux/mshv.h Microsoft Hyper-V /dev/mshv driver
374+
<mailto:linux-hyperv@vger.kernel.org>
373375
0xC0 00-0F linux/usb/iowarrior.h
374376
0xCA 00-0F uapi/misc/cxl.h
375377
0xCA 10-2F uapi/misc/ocxl.h

drivers/hv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ config MSHV_ROOT
6464
# e.g. When withdrawing memory, the hypervisor gives back 4k pages in
6565
# no particular order, making it impossible to reassemble larger pages
6666
depends on PAGE_SIZE_4KB
67+
select EVENTFD
6768
default n
6869
help
6970
Select this option to enable support for booting and running as root

drivers/hv/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
obj-$(CONFIG_HYPERV) += hv_vmbus.o
33
obj-$(CONFIG_HYPERV_UTILS) += hv_utils.o
44
obj-$(CONFIG_HYPERV_BALLOON) += hv_balloon.o
5+
obj-$(CONFIG_MSHV_ROOT) += mshv_root.o
56

67
CFLAGS_hv_trace.o = -I$(src)
78
CFLAGS_hv_balloon.o = -I$(src)
@@ -11,7 +12,9 @@ hv_vmbus-y := vmbus_drv.o \
1112
channel_mgmt.o ring_buffer.o hv_trace.o
1213
hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
1314
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
15+
mshv_root-y := mshv_root_main.o mshv_synic.o mshv_eventfd.o mshv_irq.o \
16+
mshv_root_hv_call.o mshv_portid_table.o
1417

1518
# Code that must be built-in
1619
obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
17-
obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o
20+
obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o mshv_common.o

drivers/hv/mshv.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (c) 2023, Microsoft Corporation.
4+
*/
5+
6+
#ifndef _MSHV_H_
7+
#define _MSHV_H_
8+
9+
#include <linux/stddef.h>
10+
#include <linux/string.h>
11+
#include <hyperv/hvhdk.h>
12+
13+
#define mshv_field_nonzero(STRUCT, MEMBER) \
14+
memchr_inv(&((STRUCT).MEMBER), \
15+
0, sizeof_field(typeof(STRUCT), MEMBER))
16+
17+
int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
18+
union hv_input_vtl input_vtl,
19+
struct hv_register_assoc *registers);
20+
21+
int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
22+
union hv_input_vtl input_vtl,
23+
struct hv_register_assoc *registers);
24+
25+
int hv_call_get_partition_property(u64 partition_id, u64 property_code,
26+
u64 *property_value);
27+
28+
int mshv_do_pre_guest_mode_work(ulong th_flags);
29+
30+
#endif /* _MSHV_H */

drivers/hv/mshv_common.c

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (c) 2024, Microsoft Corporation.
4+
*
5+
* This file contains functions that will be called from one or more modules.
6+
* If any of these modules are configured to build, this file is built and just
7+
* statically linked in.
8+
*
9+
* Authors: Microsoft Linux virtualization team
10+
*/
11+
12+
#include <linux/kernel.h>
13+
#include <linux/mm.h>
14+
#include <asm/mshyperv.h>
15+
#include <linux/resume_user_mode.h>
16+
17+
#include "mshv.h"
18+
19+
#define HV_GET_REGISTER_BATCH_SIZE \
20+
(HV_HYP_PAGE_SIZE / sizeof(union hv_register_value))
21+
#define HV_SET_REGISTER_BATCH_SIZE \
22+
((HV_HYP_PAGE_SIZE - sizeof(struct hv_input_set_vp_registers)) \
23+
/ sizeof(struct hv_register_assoc))
24+
25+
int hv_call_get_vp_registers(u32 vp_index, u64 partition_id, u16 count,
26+
union hv_input_vtl input_vtl,
27+
struct hv_register_assoc *registers)
28+
{
29+
struct hv_input_get_vp_registers *input_page;
30+
union hv_register_value *output_page;
31+
u16 completed = 0;
32+
unsigned long remaining = count;
33+
int rep_count, i;
34+
u64 status = HV_STATUS_SUCCESS;
35+
unsigned long flags;
36+
37+
local_irq_save(flags);
38+
39+
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
40+
output_page = *this_cpu_ptr(hyperv_pcpu_output_arg);
41+
42+
input_page->partition_id = partition_id;
43+
input_page->vp_index = vp_index;
44+
input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
45+
input_page->rsvd_z8 = 0;
46+
input_page->rsvd_z16 = 0;
47+
48+
while (remaining) {
49+
rep_count = min(remaining, HV_GET_REGISTER_BATCH_SIZE);
50+
for (i = 0; i < rep_count; ++i)
51+
input_page->names[i] = registers[i].name;
52+
53+
status = hv_do_rep_hypercall(HVCALL_GET_VP_REGISTERS, rep_count,
54+
0, input_page, output_page);
55+
if (!hv_result_success(status))
56+
break;
57+
58+
completed = hv_repcomp(status);
59+
for (i = 0; i < completed; ++i)
60+
registers[i].value = output_page[i];
61+
62+
registers += completed;
63+
remaining -= completed;
64+
}
65+
local_irq_restore(flags);
66+
67+
return hv_result_to_errno(status);
68+
}
69+
EXPORT_SYMBOL_GPL(hv_call_get_vp_registers);
70+
71+
int hv_call_set_vp_registers(u32 vp_index, u64 partition_id, u16 count,
72+
union hv_input_vtl input_vtl,
73+
struct hv_register_assoc *registers)
74+
{
75+
struct hv_input_set_vp_registers *input_page;
76+
u16 completed = 0;
77+
unsigned long remaining = count;
78+
int rep_count;
79+
u64 status = HV_STATUS_SUCCESS;
80+
unsigned long flags;
81+
82+
local_irq_save(flags);
83+
input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
84+
85+
input_page->partition_id = partition_id;
86+
input_page->vp_index = vp_index;
87+
input_page->input_vtl.as_uint8 = input_vtl.as_uint8;
88+
input_page->rsvd_z8 = 0;
89+
input_page->rsvd_z16 = 0;
90+
91+
while (remaining) {
92+
rep_count = min(remaining, HV_SET_REGISTER_BATCH_SIZE);
93+
memcpy(input_page->elements, registers,
94+
sizeof(struct hv_register_assoc) * rep_count);
95+
96+
status = hv_do_rep_hypercall(HVCALL_SET_VP_REGISTERS, rep_count,
97+
0, input_page, NULL);
98+
if (!hv_result_success(status))
99+
break;
100+
101+
completed = hv_repcomp(status);
102+
registers += completed;
103+
remaining -= completed;
104+
}
105+
106+
local_irq_restore(flags);
107+
108+
return hv_result_to_errno(status);
109+
}
110+
EXPORT_SYMBOL_GPL(hv_call_set_vp_registers);
111+
112+
int hv_call_get_partition_property(u64 partition_id,
113+
u64 property_code,
114+
u64 *property_value)
115+
{
116+
u64 status;
117+
unsigned long flags;
118+
struct hv_input_get_partition_property *input;
119+
struct hv_output_get_partition_property *output;
120+
121+
local_irq_save(flags);
122+
input = *this_cpu_ptr(hyperv_pcpu_input_arg);
123+
output = *this_cpu_ptr(hyperv_pcpu_output_arg);
124+
memset(input, 0, sizeof(*input));
125+
input->partition_id = partition_id;
126+
input->property_code = property_code;
127+
status = hv_do_hypercall(HVCALL_GET_PARTITION_PROPERTY, input, output);
128+
129+
if (!hv_result_success(status)) {
130+
local_irq_restore(flags);
131+
return hv_result_to_errno(status);
132+
}
133+
*property_value = output->property_value;
134+
135+
local_irq_restore(flags);
136+
137+
return 0;
138+
}
139+
EXPORT_SYMBOL_GPL(hv_call_get_partition_property);
140+
141+
/*
142+
* Handle any pre-processing before going into the guest mode on this cpu, most
143+
* notably call schedule(). Must be invoked with both preemption and
144+
* interrupts enabled.
145+
*
146+
* Returns: 0 on success, -errno on error.
147+
*/
148+
int mshv_do_pre_guest_mode_work(ulong th_flags)
149+
{
150+
if (th_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
151+
return -EINTR;
152+
153+
if (th_flags & _TIF_NEED_RESCHED)
154+
schedule();
155+
156+
if (th_flags & _TIF_NOTIFY_RESUME)
157+
resume_user_mode_work(NULL);
158+
159+
return 0;
160+
}
161+
EXPORT_SYMBOL_GPL(mshv_do_pre_guest_mode_work);

0 commit comments

Comments
 (0)