Skip to content

Commit d2062cc

Browse files
ashkalrabp3tk0v
authored andcommitted
x86/sev: Do not touch VMSA pages during SNP guest memory kdump
When kdump is running makedumpfile to generate vmcore and dump SNP guest memory it touches the VMSA page of the vCPU executing kdump. It then results in unrecoverable #NPF/RMP faults as the VMSA page is marked busy/in-use when the vCPU is running and subsequently a causes guest softlockup/hang. Additionally, other APs may be halted in guest mode and their VMSA pages are marked busy and touching these VMSA pages during guest memory dump will also cause #NPF. Issue AP_DESTROY GHCB calls on other APs to ensure they are kicked out of guest mode and then clear the VMSA bit on their VMSA pages. If the vCPU running kdump is an AP, mark it's VMSA page as offline to ensure that makedumpfile excludes that page while dumping guest memory. Fixes: 3074152 ("x86/sev: Convert shared memory back to private on kexec") Signed-off-by: Ashish Kalra <ashish.kalra@amd.com> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Reviewed-by: Pankaj Gupta <pankaj.gupta@amd.com> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com> Tested-by: Srikanth Aithal <sraithal@amd.com> Cc: stable@vger.kernel.org Link: https://lore.kernel.org/20250428214151.155464-1-Ashish.Kalra@amd.com
1 parent 386cd3d commit d2062cc

File tree

1 file changed

+158
-86
lines changed

1 file changed

+158
-86
lines changed

arch/x86/coco/sev/core.c

Lines changed: 158 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
959959
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
960960
}
961961

962+
static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
963+
{
964+
bool create = event != SVM_VMGEXIT_AP_DESTROY;
965+
struct ghcb_state state;
966+
unsigned long flags;
967+
struct ghcb *ghcb;
968+
int ret = 0;
969+
970+
local_irq_save(flags);
971+
972+
ghcb = __sev_get_ghcb(&state);
973+
974+
vc_ghcb_invalidate(ghcb);
975+
976+
if (create)
977+
ghcb_set_rax(ghcb, vmsa->sev_features);
978+
979+
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
980+
ghcb_set_sw_exit_info_1(ghcb,
981+
((u64)apic_id << 32) |
982+
((u64)snp_vmpl << 16) |
983+
event);
984+
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
985+
986+
sev_es_wr_ghcb_msr(__pa(ghcb));
987+
VMGEXIT();
988+
989+
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
990+
lower_32_bits(ghcb->save.sw_exit_info_1)) {
991+
pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
992+
ret = -EINVAL;
993+
}
994+
995+
__sev_put_ghcb(&state);
996+
997+
local_irq_restore(flags);
998+
999+
return ret;
1000+
}
1001+
1002+
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1003+
{
1004+
int ret;
1005+
1006+
if (snp_vmpl) {
1007+
struct svsm_call call = {};
1008+
unsigned long flags;
1009+
1010+
local_irq_save(flags);
1011+
1012+
call.caa = this_cpu_read(svsm_caa);
1013+
call.rcx = __pa(va);
1014+
1015+
if (make_vmsa) {
1016+
/* Protocol 0, Call ID 2 */
1017+
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1018+
call.rdx = __pa(caa);
1019+
call.r8 = apic_id;
1020+
} else {
1021+
/* Protocol 0, Call ID 3 */
1022+
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1023+
}
1024+
1025+
ret = svsm_perform_call_protocol(&call);
1026+
1027+
local_irq_restore(flags);
1028+
} else {
1029+
/*
1030+
* If the kernel runs at VMPL0, it can change the VMSA
1031+
* bit for a page using the RMPADJUST instruction.
1032+
* However, for the instruction to succeed it must
1033+
* target the permissions of a lesser privileged (higher
1034+
* numbered) VMPL level, so use VMPL1.
1035+
*/
1036+
u64 attrs = 1;
1037+
1038+
if (make_vmsa)
1039+
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1040+
1041+
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1042+
}
1043+
1044+
return ret;
1045+
}
1046+
1047+
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1048+
{
1049+
int err;
1050+
1051+
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1052+
if (err)
1053+
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1054+
else
1055+
free_page((unsigned long)vmsa);
1056+
}
1057+
9621058
static void set_pte_enc(pte_t *kpte, int level, void *va)
9631059
{
9641060
struct pte_enc_desc d = {
@@ -1055,6 +1151,65 @@ void snp_kexec_begin(void)
10551151
pr_warn("Failed to stop shared<->private conversions\n");
10561152
}
10571153

1154+
/*
1155+
* Shutdown all APs except the one handling kexec/kdump and clearing
1156+
* the VMSA tag on AP's VMSA pages as they are not being used as
1157+
* VMSA page anymore.
1158+
*/
1159+
static void shutdown_all_aps(void)
1160+
{
1161+
struct sev_es_save_area *vmsa;
1162+
int apic_id, this_cpu, cpu;
1163+
1164+
this_cpu = get_cpu();
1165+
1166+
/*
1167+
* APs are already in HLT loop when enc_kexec_finish() callback
1168+
* is invoked.
1169+
*/
1170+
for_each_present_cpu(cpu) {
1171+
vmsa = per_cpu(sev_vmsa, cpu);
1172+
1173+
/*
1174+
* The BSP or offlined APs do not have guest allocated VMSA
1175+
* and there is no need to clear the VMSA tag for this page.
1176+
*/
1177+
if (!vmsa)
1178+
continue;
1179+
1180+
/*
1181+
* Cannot clear the VMSA tag for the currently running vCPU.
1182+
*/
1183+
if (this_cpu == cpu) {
1184+
unsigned long pa;
1185+
struct page *p;
1186+
1187+
pa = __pa(vmsa);
1188+
/*
1189+
* Mark the VMSA page of the running vCPU as offline
1190+
* so that is excluded and not touched by makedumpfile
1191+
* while generating vmcore during kdump.
1192+
*/
1193+
p = pfn_to_online_page(pa >> PAGE_SHIFT);
1194+
if (p)
1195+
__SetPageOffline(p);
1196+
continue;
1197+
}
1198+
1199+
apic_id = cpuid_to_apicid[cpu];
1200+
1201+
/*
1202+
* Issue AP destroy to ensure AP gets kicked out of guest mode
1203+
* to allow using RMPADJUST to remove the VMSA tag on it's
1204+
* VMSA page.
1205+
*/
1206+
vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
1207+
snp_cleanup_vmsa(vmsa, apic_id);
1208+
}
1209+
1210+
put_cpu();
1211+
}
1212+
10581213
void snp_kexec_finish(void)
10591214
{
10601215
struct sev_es_runtime_data *data;
@@ -1069,6 +1224,8 @@ void snp_kexec_finish(void)
10691224
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
10701225
return;
10711226

1227+
shutdown_all_aps();
1228+
10721229
unshare_all_memory();
10731230

10741231
/*
@@ -1090,51 +1247,6 @@ void snp_kexec_finish(void)
10901247
}
10911248
}
10921249

1093-
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1094-
{
1095-
int ret;
1096-
1097-
if (snp_vmpl) {
1098-
struct svsm_call call = {};
1099-
unsigned long flags;
1100-
1101-
local_irq_save(flags);
1102-
1103-
call.caa = this_cpu_read(svsm_caa);
1104-
call.rcx = __pa(va);
1105-
1106-
if (make_vmsa) {
1107-
/* Protocol 0, Call ID 2 */
1108-
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1109-
call.rdx = __pa(caa);
1110-
call.r8 = apic_id;
1111-
} else {
1112-
/* Protocol 0, Call ID 3 */
1113-
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1114-
}
1115-
1116-
ret = svsm_perform_call_protocol(&call);
1117-
1118-
local_irq_restore(flags);
1119-
} else {
1120-
/*
1121-
* If the kernel runs at VMPL0, it can change the VMSA
1122-
* bit for a page using the RMPADJUST instruction.
1123-
* However, for the instruction to succeed it must
1124-
* target the permissions of a lesser privileged (higher
1125-
* numbered) VMPL level, so use VMPL1.
1126-
*/
1127-
u64 attrs = 1;
1128-
1129-
if (make_vmsa)
1130-
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1131-
1132-
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1133-
}
1134-
1135-
return ret;
1136-
}
1137-
11381250
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
11391251
#define INIT_CS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_READ_MASK | SVM_SELECTOR_CODE_MASK)
11401252
#define INIT_DS_ATTRIBS (__ATTR_BASE | SVM_SELECTOR_WRITE_MASK)
@@ -1166,24 +1278,10 @@ static void *snp_alloc_vmsa_page(int cpu)
11661278
return page_address(p + 1);
11671279
}
11681280

1169-
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1170-
{
1171-
int err;
1172-
1173-
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1174-
if (err)
1175-
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1176-
else
1177-
free_page((unsigned long)vmsa);
1178-
}
1179-
11801281
static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
11811282
{
11821283
struct sev_es_save_area *cur_vmsa, *vmsa;
1183-
struct ghcb_state state;
11841284
struct svsm_ca *caa;
1185-
unsigned long flags;
1186-
struct ghcb *ghcb;
11871285
u8 sipi_vector;
11881286
int cpu, ret;
11891287
u64 cr4;
@@ -1297,33 +1395,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
12971395
}
12981396

12991397
/* Issue VMGEXIT AP Creation NAE event */
1300-
local_irq_save(flags);
1301-
1302-
ghcb = __sev_get_ghcb(&state);
1303-
1304-
vc_ghcb_invalidate(ghcb);
1305-
ghcb_set_rax(ghcb, vmsa->sev_features);
1306-
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
1307-
ghcb_set_sw_exit_info_1(ghcb,
1308-
((u64)apic_id << 32) |
1309-
((u64)snp_vmpl << 16) |
1310-
SVM_VMGEXIT_AP_CREATE);
1311-
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
1312-
1313-
sev_es_wr_ghcb_msr(__pa(ghcb));
1314-
VMGEXIT();
1315-
1316-
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
1317-
lower_32_bits(ghcb->save.sw_exit_info_1)) {
1318-
pr_err("SNP AP Creation error\n");
1319-
ret = -EINVAL;
1320-
}
1321-
1322-
__sev_put_ghcb(&state);
1323-
1324-
local_irq_restore(flags);
1325-
1326-
/* Perform cleanup if there was an error */
1398+
ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
13271399
if (ret) {
13281400
snp_cleanup_vmsa(vmsa, apic_id);
13291401
vmsa = NULL;

0 commit comments

Comments
 (0)