Skip to content

Commit 56b2b1f

Browse files
committed
Merge tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull misc x86 fixes from Ingo Molnar: - Fix SEV-SNP kdump bugs - Update the email address of Alexey Makhalov in MAINTAINERS - Add the CPU feature flag for the Zen6 microarchitecture - Fix typo in system message * tag 'x86-urgent-2025-05-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Remove duplicated word in warning message x86/CPU/AMD: Add X86_FEATURE_ZEN6 x86/sev: Make sure pages are not skipped during kdump x86/sev: Do not touch VMSA pages during SNP guest memory kdump MAINTAINERS: Update Alexey Makhalov's email address x86/sev: Fix operator precedence in GHCB_MSR_VMPL_REQ_LEVEL macro
2 parents 4bcaa59 + 0368091 commit 56b2b1f

File tree

6 files changed

+176
-96
lines changed

6 files changed

+176
-96
lines changed

MAINTAINERS

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18439,7 +18439,7 @@ F: include/uapi/linux/ppdev.h
1843918439
PARAVIRT_OPS INTERFACE
1844018440
M: Juergen Gross <jgross@suse.com>
1844118441
R: Ajay Kaher <ajay.kaher@broadcom.com>
18442-
R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
18442+
R: Alexey Makhalov <alexey.makhalov@broadcom.com>
1844318443
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
1844418444
L: virtualization@lists.linux.dev
1844518445
L: x86@kernel.org
@@ -25924,7 +25924,7 @@ F: drivers/misc/vmw_balloon.c
2592425924

2592525925
VMWARE HYPERVISOR INTERFACE
2592625926
M: Ajay Kaher <ajay.kaher@broadcom.com>
25927-
M: Alexey Makhalov <alexey.amakhalov@broadcom.com>
25927+
M: Alexey Makhalov <alexey.makhalov@broadcom.com>
2592825928
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
2592925929
L: virtualization@lists.linux.dev
2593025930
L: x86@kernel.org
@@ -25952,7 +25952,7 @@ F: drivers/scsi/vmw_pvscsi.h
2595225952
VMWARE VIRTUAL PTP CLOCK DRIVER
2595325953
M: Nick Shi <nick.shi@broadcom.com>
2595425954
R: Ajay Kaher <ajay.kaher@broadcom.com>
25955-
R: Alexey Makhalov <alexey.amakhalov@broadcom.com>
25955+
R: Alexey Makhalov <alexey.makhalov@broadcom.com>
2595625956
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
2595725957
L: netdev@vger.kernel.org
2595825958
S: Supported

arch/x86/coco/sev/core.c

Lines changed: 165 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
959959
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
960960
}
961961

962+
static int vmgexit_ap_control(u64 event, struct sev_es_save_area *vmsa, u32 apic_id)
963+
{
964+
bool create = event != SVM_VMGEXIT_AP_DESTROY;
965+
struct ghcb_state state;
966+
unsigned long flags;
967+
struct ghcb *ghcb;
968+
int ret = 0;
969+
970+
local_irq_save(flags);
971+
972+
ghcb = __sev_get_ghcb(&state);
973+
974+
vc_ghcb_invalidate(ghcb);
975+
976+
if (create)
977+
ghcb_set_rax(ghcb, vmsa->sev_features);
978+
979+
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
980+
ghcb_set_sw_exit_info_1(ghcb,
981+
((u64)apic_id << 32) |
982+
((u64)snp_vmpl << 16) |
983+
event);
984+
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
985+
986+
sev_es_wr_ghcb_msr(__pa(ghcb));
987+
VMGEXIT();
988+
989+
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
990+
lower_32_bits(ghcb->save.sw_exit_info_1)) {
991+
pr_err("SNP AP %s error\n", (create ? "CREATE" : "DESTROY"));
992+
ret = -EINVAL;
993+
}
994+
995+
__sev_put_ghcb(&state);
996+
997+
local_irq_restore(flags);
998+
999+
return ret;
1000+
}
1001+
1002+
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1003+
{
1004+
int ret;
1005+
1006+
if (snp_vmpl) {
1007+
struct svsm_call call = {};
1008+
unsigned long flags;
1009+
1010+
local_irq_save(flags);
1011+
1012+
call.caa = this_cpu_read(svsm_caa);
1013+
call.rcx = __pa(va);
1014+
1015+
if (make_vmsa) {
1016+
/* Protocol 0, Call ID 2 */
1017+
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1018+
call.rdx = __pa(caa);
1019+
call.r8 = apic_id;
1020+
} else {
1021+
/* Protocol 0, Call ID 3 */
1022+
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1023+
}
1024+
1025+
ret = svsm_perform_call_protocol(&call);
1026+
1027+
local_irq_restore(flags);
1028+
} else {
1029+
/*
1030+
* If the kernel runs at VMPL0, it can change the VMSA
1031+
* bit for a page using the RMPADJUST instruction.
1032+
* However, for the instruction to succeed it must
1033+
* target the permissions of a lesser privileged (higher
1034+
* numbered) VMPL level, so use VMPL1.
1035+
*/
1036+
u64 attrs = 1;
1037+
1038+
if (make_vmsa)
1039+
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1040+
1041+
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1042+
}
1043+
1044+
return ret;
1045+
}
1046+
1047+
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1048+
{
1049+
int err;
1050+
1051+
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1052+
if (err)
1053+
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1054+
else
1055+
free_page((unsigned long)vmsa);
1056+
}
1057+
9621058
static void set_pte_enc(pte_t *kpte, int level, void *va)
9631059
{
9641060
struct pte_enc_desc d = {
@@ -1005,7 +1101,8 @@ static void unshare_all_memory(void)
10051101
data = per_cpu(runtime_data, cpu);
10061102
ghcb = (unsigned long)&data->ghcb_page;
10071103

1008-
if (addr <= ghcb && ghcb <= addr + size) {
1104+
/* Handle the case of a huge page containing the GHCB page */
1105+
if (addr <= ghcb && ghcb < addr + size) {
10091106
skipped_addr = true;
10101107
break;
10111108
}
@@ -1055,11 +1152,70 @@ void snp_kexec_begin(void)
10551152
pr_warn("Failed to stop shared<->private conversions\n");
10561153
}
10571154

1155+
/*
1156+
* Shutdown all APs except the one handling kexec/kdump and clearing
1157+
* the VMSA tag on AP's VMSA pages as they are not being used as
1158+
* VMSA page anymore.
1159+
*/
1160+
static void shutdown_all_aps(void)
1161+
{
1162+
struct sev_es_save_area *vmsa;
1163+
int apic_id, this_cpu, cpu;
1164+
1165+
this_cpu = get_cpu();
1166+
1167+
/*
1168+
* APs are already in HLT loop when enc_kexec_finish() callback
1169+
* is invoked.
1170+
*/
1171+
for_each_present_cpu(cpu) {
1172+
vmsa = per_cpu(sev_vmsa, cpu);
1173+
1174+
/*
1175+
* The BSP or offlined APs do not have guest allocated VMSA
1176+
* and there is no need to clear the VMSA tag for this page.
1177+
*/
1178+
if (!vmsa)
1179+
continue;
1180+
1181+
/*
1182+
* Cannot clear the VMSA tag for the currently running vCPU.
1183+
*/
1184+
if (this_cpu == cpu) {
1185+
unsigned long pa;
1186+
struct page *p;
1187+
1188+
pa = __pa(vmsa);
1189+
/*
1190+
* Mark the VMSA page of the running vCPU as offline
1191+
* so that is excluded and not touched by makedumpfile
1192+
* while generating vmcore during kdump.
1193+
*/
1194+
p = pfn_to_online_page(pa >> PAGE_SHIFT);
1195+
if (p)
1196+
__SetPageOffline(p);
1197+
continue;
1198+
}
1199+
1200+
apic_id = cpuid_to_apicid[cpu];
1201+
1202+
/*
1203+
* Issue AP destroy to ensure AP gets kicked out of guest mode
1204+
* to allow using RMPADJUST to remove the VMSA tag on it's
1205+
* VMSA page.
1206+
*/
1207+
vmgexit_ap_control(SVM_VMGEXIT_AP_DESTROY, vmsa, apic_id);
1208+
snp_cleanup_vmsa(vmsa, apic_id);
1209+
}
1210+
1211+
put_cpu();
1212+
}
1213+
10581214
void snp_kexec_finish(void)
10591215
{
10601216
struct sev_es_runtime_data *data;
1217+
unsigned long size, addr;
10611218
unsigned int level, cpu;
1062-
unsigned long size;
10631219
struct ghcb *ghcb;
10641220
pte_t *pte;
10651221

@@ -1069,6 +1225,8 @@ void snp_kexec_finish(void)
10691225
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
10701226
return;
10711227

1228+
shutdown_all_aps();
1229+
10721230
unshare_all_memory();
10731231

10741232
/*
@@ -1085,54 +1243,11 @@ void snp_kexec_finish(void)
10851243
ghcb = &data->ghcb_page;
10861244
pte = lookup_address((unsigned long)ghcb, &level);
10871245
size = page_level_size(level);
1088-
set_pte_enc(pte, level, (void *)ghcb);
1089-
snp_set_memory_private((unsigned long)ghcb, (size / PAGE_SIZE));
1090-
}
1091-
}
1092-
1093-
static int snp_set_vmsa(void *va, void *caa, int apic_id, bool make_vmsa)
1094-
{
1095-
int ret;
1096-
1097-
if (snp_vmpl) {
1098-
struct svsm_call call = {};
1099-
unsigned long flags;
1100-
1101-
local_irq_save(flags);
1102-
1103-
call.caa = this_cpu_read(svsm_caa);
1104-
call.rcx = __pa(va);
1105-
1106-
if (make_vmsa) {
1107-
/* Protocol 0, Call ID 2 */
1108-
call.rax = SVSM_CORE_CALL(SVSM_CORE_CREATE_VCPU);
1109-
call.rdx = __pa(caa);
1110-
call.r8 = apic_id;
1111-
} else {
1112-
/* Protocol 0, Call ID 3 */
1113-
call.rax = SVSM_CORE_CALL(SVSM_CORE_DELETE_VCPU);
1114-
}
1115-
1116-
ret = svsm_perform_call_protocol(&call);
1117-
1118-
local_irq_restore(flags);
1119-
} else {
1120-
/*
1121-
* If the kernel runs at VMPL0, it can change the VMSA
1122-
* bit for a page using the RMPADJUST instruction.
1123-
* However, for the instruction to succeed it must
1124-
* target the permissions of a lesser privileged (higher
1125-
* numbered) VMPL level, so use VMPL1.
1126-
*/
1127-
u64 attrs = 1;
1128-
1129-
if (make_vmsa)
1130-
attrs |= RMPADJUST_VMSA_PAGE_BIT;
1131-
1132-
ret = rmpadjust((unsigned long)va, RMP_PG_SIZE_4K, attrs);
1246+
/* Handle the case of a huge page containing the GHCB page */
1247+
addr = (unsigned long)ghcb & page_level_mask(level);
1248+
set_pte_enc(pte, level, (void *)addr);
1249+
snp_set_memory_private(addr, (size / PAGE_SIZE));
11331250
}
1134-
1135-
return ret;
11361251
}
11371252

11381253
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1166,24 +1281,10 @@ static void *snp_alloc_vmsa_page(int cpu)
11661281
return page_address(p + 1);
11671282
}
11681283

1169-
static void snp_cleanup_vmsa(struct sev_es_save_area *vmsa, int apic_id)
1170-
{
1171-
int err;
1172-
1173-
err = snp_set_vmsa(vmsa, NULL, apic_id, false);
1174-
if (err)
1175-
pr_err("clear VMSA page failed (%u), leaking page\n", err);
1176-
else
1177-
free_page((unsigned long)vmsa);
1178-
}
1179-
11801284
static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
11811285
{
11821286
struct sev_es_save_area *cur_vmsa, *vmsa;
1183-
struct ghcb_state state;
11841287
struct svsm_ca *caa;
1185-
unsigned long flags;
1186-
struct ghcb *ghcb;
11871288
u8 sipi_vector;
11881289
int cpu, ret;
11891290
u64 cr4;
@@ -1297,33 +1398,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
12971398
}
12981399

12991400
/* Issue VMGEXIT AP Creation NAE event */
1300-
local_irq_save(flags);
1301-
1302-
ghcb = __sev_get_ghcb(&state);
1303-
1304-
vc_ghcb_invalidate(ghcb);
1305-
ghcb_set_rax(ghcb, vmsa->sev_features);
1306-
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_CREATION);
1307-
ghcb_set_sw_exit_info_1(ghcb,
1308-
((u64)apic_id << 32) |
1309-
((u64)snp_vmpl << 16) |
1310-
SVM_VMGEXIT_AP_CREATE);
1311-
ghcb_set_sw_exit_info_2(ghcb, __pa(vmsa));
1312-
1313-
sev_es_wr_ghcb_msr(__pa(ghcb));
1314-
VMGEXIT();
1315-
1316-
if (!ghcb_sw_exit_info_1_is_valid(ghcb) ||
1317-
lower_32_bits(ghcb->save.sw_exit_info_1)) {
1318-
pr_err("SNP AP Creation error\n");
1319-
ret = -EINVAL;
1320-
}
1321-
1322-
__sev_put_ghcb(&state);
1323-
1324-
local_irq_restore(flags);
1325-
1326-
/* Perform cleanup if there was an error */
1401+
ret = vmgexit_ap_control(SVM_VMGEXIT_AP_CREATE, vmsa, apic_id);
13271402
if (ret) {
13281403
snp_cleanup_vmsa(vmsa, apic_id);
13291404
vmsa = NULL;

arch/x86/include/asm/cpufeatures.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@
7575
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */
7676
#define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */
7777
#define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */
78-
/* Free ( 3*32+ 6) */
78+
#define X86_FEATURE_ZEN6 ( 3*32+ 6) /* CPU based on Zen6 microarchitecture */
7979
/* Free ( 3*32+ 7) */
8080
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */
8181
#define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */

arch/x86/include/asm/sev-common.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ enum psc_op {
116116
#define GHCB_MSR_VMPL_REQ 0x016
117117
#define GHCB_MSR_VMPL_REQ_LEVEL(v) \
118118
/* GHCBData[39:32] */ \
119-
(((u64)(v) & GENMASK_ULL(7, 0) << 32) | \
119+
((((u64)(v) & GENMASK_ULL(7, 0)) << 32) | \
120120
/* GHCBDdata[11:0] */ \
121121
GHCB_MSR_VMPL_REQ)
122122

arch/x86/kernel/cpu/amd.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -472,6 +472,11 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
472472
case 0x60 ... 0x7f:
473473
setup_force_cpu_cap(X86_FEATURE_ZEN5);
474474
break;
475+
case 0x50 ... 0x5f:
476+
case 0x90 ... 0xaf:
477+
case 0xc0 ... 0xcf:
478+
setup_force_cpu_cap(X86_FEATURE_ZEN6);
479+
break;
475480
default:
476481
goto warn;
477482
}

arch/x86/mm/init_32.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,7 @@ static void __init lowmem_pfn_init(void)
566566
"only %luMB highmem pages available, ignoring highmem size of %luMB!\n"
567567

568568
#define MSG_HIGHMEM_TRIMMED \
569-
"Warning: only 4GB will be used. Support for for CONFIG_HIGHMEM64G was removed!\n"
569+
"Warning: only 4GB will be used. Support for CONFIG_HIGHMEM64G was removed!\n"
570570
/*
571571
* We have more RAM than fits into lowmem - we try to put it into
572572
* highmem, also taking the highmem=x boot parameter into account:

0 commit comments

Comments
 (0)