Skip to content

Commit d466c19

Browse files
committed
RISC-V: KVM: Add common nested acceleration support
Add a common nested acceleration support which will be shared by all parts of KVM RISC-V. This nested acceleration support detects and enables SBI NACL extension usage based on static keys which ensures minimum impact on the non-nested scenario. Signed-off-by: Anup Patel <apatel@ventanamicro.com> Reviewed-by: Atish Patra <atishp@rivosinc.com> Link: https://lore.kernel.org/r/20241020194734.58686-9-apatel@ventanamicro.com Signed-off-by: Anup Patel <anup@brainfault.org>
1 parent 5daf89e commit d466c19

File tree

4 files changed

+441
-2
lines changed

4 files changed

+441
-2
lines changed

arch/riscv/include/asm/kvm_nacl.h

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (c) 2024 Ventana Micro Systems Inc.
4+
*/
5+
6+
#ifndef __KVM_NACL_H
7+
#define __KVM_NACL_H
8+
9+
#include <linux/jump_label.h>
10+
#include <linux/percpu.h>
11+
#include <asm/byteorder.h>
12+
#include <asm/csr.h>
13+
#include <asm/sbi.h>
14+
15+
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_available);
16+
#define kvm_riscv_nacl_available() \
17+
static_branch_unlikely(&kvm_riscv_nacl_available)
18+
19+
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_csr_available);
20+
#define kvm_riscv_nacl_sync_csr_available() \
21+
static_branch_unlikely(&kvm_riscv_nacl_sync_csr_available)
22+
23+
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_hfence_available);
24+
#define kvm_riscv_nacl_sync_hfence_available() \
25+
static_branch_unlikely(&kvm_riscv_nacl_sync_hfence_available)
26+
27+
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_sync_sret_available);
28+
#define kvm_riscv_nacl_sync_sret_available() \
29+
static_branch_unlikely(&kvm_riscv_nacl_sync_sret_available)
30+
31+
DECLARE_STATIC_KEY_FALSE(kvm_riscv_nacl_autoswap_csr_available);
32+
#define kvm_riscv_nacl_autoswap_csr_available() \
33+
static_branch_unlikely(&kvm_riscv_nacl_autoswap_csr_available)
34+
35+
struct kvm_riscv_nacl {
36+
void *shmem;
37+
phys_addr_t shmem_phys;
38+
};
39+
DECLARE_PER_CPU(struct kvm_riscv_nacl, kvm_riscv_nacl);
40+
41+
void __kvm_riscv_nacl_hfence(void *shmem,
42+
unsigned long control,
43+
unsigned long page_num,
44+
unsigned long page_count);
45+
46+
int kvm_riscv_nacl_enable(void);
47+
48+
void kvm_riscv_nacl_disable(void);
49+
50+
void kvm_riscv_nacl_exit(void);
51+
52+
int kvm_riscv_nacl_init(void);
53+
54+
#ifdef CONFIG_32BIT
55+
#define lelong_to_cpu(__x) le32_to_cpu(__x)
56+
#define cpu_to_lelong(__x) cpu_to_le32(__x)
57+
#else
58+
#define lelong_to_cpu(__x) le64_to_cpu(__x)
59+
#define cpu_to_lelong(__x) cpu_to_le64(__x)
60+
#endif
61+
62+
#define nacl_shmem() \
63+
this_cpu_ptr(&kvm_riscv_nacl)->shmem
64+
65+
#define nacl_scratch_read_long(__shmem, __offset) \
66+
({ \
67+
unsigned long *__p = (__shmem) + \
68+
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
69+
(__offset); \
70+
lelong_to_cpu(*__p); \
71+
})
72+
73+
#define nacl_scratch_write_long(__shmem, __offset, __val) \
74+
do { \
75+
unsigned long *__p = (__shmem) + \
76+
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
77+
(__offset); \
78+
*__p = cpu_to_lelong(__val); \
79+
} while (0)
80+
81+
#define nacl_scratch_write_longs(__shmem, __offset, __array, __count) \
82+
do { \
83+
unsigned int __i; \
84+
unsigned long *__p = (__shmem) + \
85+
SBI_NACL_SHMEM_SCRATCH_OFFSET + \
86+
(__offset); \
87+
for (__i = 0; __i < (__count); __i++) \
88+
__p[__i] = cpu_to_lelong((__array)[__i]); \
89+
} while (0)
90+
91+
#define nacl_sync_hfence(__e) \
92+
sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_HFENCE, \
93+
(__e), 0, 0, 0, 0, 0)
94+
95+
#define nacl_hfence_mkconfig(__type, __order, __vmid, __asid) \
96+
({ \
97+
unsigned long __c = SBI_NACL_SHMEM_HFENCE_CONFIG_PEND; \
98+
__c |= ((__type) & SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_MASK) \
99+
<< SBI_NACL_SHMEM_HFENCE_CONFIG_TYPE_SHIFT; \
100+
__c |= (((__order) - SBI_NACL_SHMEM_HFENCE_ORDER_BASE) & \
101+
SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_MASK) \
102+
<< SBI_NACL_SHMEM_HFENCE_CONFIG_ORDER_SHIFT; \
103+
__c |= ((__vmid) & SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_MASK) \
104+
<< SBI_NACL_SHMEM_HFENCE_CONFIG_VMID_SHIFT; \
105+
__c |= ((__asid) & SBI_NACL_SHMEM_HFENCE_CONFIG_ASID_MASK); \
106+
__c; \
107+
})
108+
109+
#define nacl_hfence_mkpnum(__order, __addr) \
110+
((__addr) >> (__order))
111+
112+
#define nacl_hfence_mkpcount(__order, __size) \
113+
((__size) >> (__order))
114+
115+
#define nacl_hfence_gvma(__shmem, __gpa, __gpsz, __order) \
116+
__kvm_riscv_nacl_hfence(__shmem, \
117+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA, \
118+
__order, 0, 0), \
119+
nacl_hfence_mkpnum(__order, __gpa), \
120+
nacl_hfence_mkpcount(__order, __gpsz))
121+
122+
#define nacl_hfence_gvma_all(__shmem) \
123+
__kvm_riscv_nacl_hfence(__shmem, \
124+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_ALL, \
125+
0, 0, 0), 0, 0)
126+
127+
#define nacl_hfence_gvma_vmid(__shmem, __vmid, __gpa, __gpsz, __order) \
128+
__kvm_riscv_nacl_hfence(__shmem, \
129+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID, \
130+
__order, __vmid, 0), \
131+
nacl_hfence_mkpnum(__order, __gpa), \
132+
nacl_hfence_mkpcount(__order, __gpsz))
133+
134+
#define nacl_hfence_gvma_vmid_all(__shmem, __vmid) \
135+
__kvm_riscv_nacl_hfence(__shmem, \
136+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_GVMA_VMID_ALL, \
137+
0, __vmid, 0), 0, 0)
138+
139+
#define nacl_hfence_vvma(__shmem, __vmid, __gva, __gvsz, __order) \
140+
__kvm_riscv_nacl_hfence(__shmem, \
141+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA, \
142+
__order, __vmid, 0), \
143+
nacl_hfence_mkpnum(__order, __gva), \
144+
nacl_hfence_mkpcount(__order, __gvsz))
145+
146+
#define nacl_hfence_vvma_all(__shmem, __vmid) \
147+
__kvm_riscv_nacl_hfence(__shmem, \
148+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ALL, \
149+
0, __vmid, 0), 0, 0)
150+
151+
#define nacl_hfence_vvma_asid(__shmem, __vmid, __asid, __gva, __gvsz, __order)\
152+
__kvm_riscv_nacl_hfence(__shmem, \
153+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID, \
154+
__order, __vmid, __asid), \
155+
nacl_hfence_mkpnum(__order, __gva), \
156+
nacl_hfence_mkpcount(__order, __gvsz))
157+
158+
#define nacl_hfence_vvma_asid_all(__shmem, __vmid, __asid) \
159+
__kvm_riscv_nacl_hfence(__shmem, \
160+
nacl_hfence_mkconfig(SBI_NACL_SHMEM_HFENCE_TYPE_VVMA_ASID_ALL, \
161+
0, __vmid, __asid), 0, 0)
162+
163+
#define nacl_csr_read(__shmem, __csr) \
164+
({ \
165+
unsigned long *__a = (__shmem) + SBI_NACL_SHMEM_CSR_OFFSET; \
166+
lelong_to_cpu(__a[SBI_NACL_SHMEM_CSR_INDEX(__csr)]); \
167+
})
168+
169+
#define nacl_csr_write(__shmem, __csr, __val) \
170+
do { \
171+
void *__s = (__shmem); \
172+
unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \
173+
unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \
174+
u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \
175+
__a[__i] = cpu_to_lelong(__val); \
176+
__b[__i >> 3] |= 1U << (__i & 0x7); \
177+
} while (0)
178+
179+
#define nacl_csr_swap(__shmem, __csr, __val) \
180+
({ \
181+
void *__s = (__shmem); \
182+
unsigned int __i = SBI_NACL_SHMEM_CSR_INDEX(__csr); \
183+
unsigned long *__a = (__s) + SBI_NACL_SHMEM_CSR_OFFSET; \
184+
u8 *__b = (__s) + SBI_NACL_SHMEM_DBITMAP_OFFSET; \
185+
unsigned long __r = lelong_to_cpu(__a[__i]); \
186+
__a[__i] = cpu_to_lelong(__val); \
187+
__b[__i >> 3] |= 1U << (__i & 0x7); \
188+
__r; \
189+
})
190+
191+
#define nacl_sync_csr(__csr) \
192+
sbi_ecall(SBI_EXT_NACL, SBI_EXT_NACL_SYNC_CSR, \
193+
(__csr), 0, 0, 0, 0, 0)
194+
195+
/*
196+
* Each ncsr_xyz() macro defined below has it's own static-branch so every
197+
* use of ncsr_xyz() macro emits a patchable direct jump. This means multiple
198+
* back-to-back ncsr_xyz() macro usage will emit multiple patchable direct
199+
* jumps which is sub-optimal.
200+
*
201+
* Based on the above, it is recommended to avoid multiple back-to-back
202+
* ncsr_xyz() macro usage.
203+
*/
204+
205+
#define ncsr_read(__csr) \
206+
({ \
207+
unsigned long __r; \
208+
if (kvm_riscv_nacl_available()) \
209+
__r = nacl_csr_read(nacl_shmem(), __csr); \
210+
else \
211+
__r = csr_read(__csr); \
212+
__r; \
213+
})
214+
215+
#define ncsr_write(__csr, __val) \
216+
do { \
217+
if (kvm_riscv_nacl_sync_csr_available()) \
218+
nacl_csr_write(nacl_shmem(), __csr, __val); \
219+
else \
220+
csr_write(__csr, __val); \
221+
} while (0)
222+
223+
#define ncsr_swap(__csr, __val) \
224+
({ \
225+
unsigned long __r; \
226+
if (kvm_riscv_nacl_sync_csr_available()) \
227+
__r = nacl_csr_swap(nacl_shmem(), __csr, __val); \
228+
else \
229+
__r = csr_swap(__csr, __val); \
230+
__r; \
231+
})
232+
233+
#define nsync_csr(__csr) \
234+
do { \
235+
if (kvm_riscv_nacl_sync_csr_available()) \
236+
nacl_sync_csr(__csr); \
237+
} while (0)
238+
239+
#endif

arch/riscv/kvm/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ kvm-y += aia_device.o
1616
kvm-y += aia_imsic.o
1717
kvm-y += main.o
1818
kvm-y += mmu.o
19+
kvm-y += nacl.o
1920
kvm-y += tlb.o
2021
kvm-y += vcpu.o
2122
kvm-y += vcpu_exit.o

arch/riscv/kvm/main.c

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@
1010
#include <linux/err.h>
1111
#include <linux/module.h>
1212
#include <linux/kvm_host.h>
13-
#include <asm/csr.h>
1413
#include <asm/cpufeature.h>
14+
#include <asm/kvm_nacl.h>
1515
#include <asm/sbi.h>
1616

1717
long kvm_arch_dev_ioctl(struct file *filp,
@@ -22,6 +22,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
2222

2323
int kvm_arch_enable_virtualization_cpu(void)
2424
{
25+
int rc;
26+
27+
rc = kvm_riscv_nacl_enable();
28+
if (rc)
29+
return rc;
30+
2531
csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT);
2632
csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT);
2733

@@ -49,17 +55,21 @@ void kvm_arch_disable_virtualization_cpu(void)
4955
csr_write(CSR_HVIP, 0);
5056
csr_write(CSR_HEDELEG, 0);
5157
csr_write(CSR_HIDELEG, 0);
58+
59+
kvm_riscv_nacl_disable();
5260
}
5361

5462
static void kvm_riscv_teardown(void)
5563
{
5664
kvm_riscv_aia_exit();
65+
kvm_riscv_nacl_exit();
5766
kvm_unregister_perf_callbacks();
5867
}
5968

6069
static int __init riscv_kvm_init(void)
6170
{
6271
int rc;
72+
char slist[64];
6373
const char *str;
6474

6575
if (!riscv_isa_extension_available(NULL, h)) {
@@ -77,16 +87,53 @@ static int __init riscv_kvm_init(void)
7787
return -ENODEV;
7888
}
7989

90+
rc = kvm_riscv_nacl_init();
91+
if (rc && rc != -ENODEV)
92+
return rc;
93+
8094
kvm_riscv_gstage_mode_detect();
8195

8296
kvm_riscv_gstage_vmid_detect();
8397

8498
rc = kvm_riscv_aia_init();
85-
if (rc && rc != -ENODEV)
99+
if (rc && rc != -ENODEV) {
100+
kvm_riscv_nacl_exit();
86101
return rc;
102+
}
87103

88104
kvm_info("hypervisor extension available\n");
89105

106+
if (kvm_riscv_nacl_available()) {
107+
rc = 0;
108+
slist[0] = '\0';
109+
if (kvm_riscv_nacl_sync_csr_available()) {
110+
if (rc)
111+
strcat(slist, ", ");
112+
strcat(slist, "sync_csr");
113+
rc++;
114+
}
115+
if (kvm_riscv_nacl_sync_hfence_available()) {
116+
if (rc)
117+
strcat(slist, ", ");
118+
strcat(slist, "sync_hfence");
119+
rc++;
120+
}
121+
if (kvm_riscv_nacl_sync_sret_available()) {
122+
if (rc)
123+
strcat(slist, ", ");
124+
strcat(slist, "sync_sret");
125+
rc++;
126+
}
127+
if (kvm_riscv_nacl_autoswap_csr_available()) {
128+
if (rc)
129+
strcat(slist, ", ");
130+
strcat(slist, "autoswap_csr");
131+
rc++;
132+
}
133+
kvm_info("using SBI nested acceleration with %s\n",
134+
(rc) ? slist : "no features");
135+
}
136+
90137
switch (kvm_riscv_gstage_mode()) {
91138
case HGATP_MODE_SV32X4:
92139
str = "Sv32x4";

0 commit comments

Comments
 (0)