@@ -959,6 +959,102 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
959
959
set_pages_state (vaddr , npages , SNP_PAGE_STATE_PRIVATE );
960
960
}
961
961
962
+ static int vmgexit_ap_control (u64 event , struct sev_es_save_area * vmsa , u32 apic_id )
963
+ {
964
+ bool create = event != SVM_VMGEXIT_AP_DESTROY ;
965
+ struct ghcb_state state ;
966
+ unsigned long flags ;
967
+ struct ghcb * ghcb ;
968
+ int ret = 0 ;
969
+
970
+ local_irq_save (flags );
971
+
972
+ ghcb = __sev_get_ghcb (& state );
973
+
974
+ vc_ghcb_invalidate (ghcb );
975
+
976
+ if (create )
977
+ ghcb_set_rax (ghcb , vmsa -> sev_features );
978
+
979
+ ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
980
+ ghcb_set_sw_exit_info_1 (ghcb ,
981
+ ((u64 )apic_id << 32 ) |
982
+ ((u64 )snp_vmpl << 16 ) |
983
+ event );
984
+ ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
985
+
986
+ sev_es_wr_ghcb_msr (__pa (ghcb ));
987
+ VMGEXIT ();
988
+
989
+ if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
990
+ lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
991
+ pr_err ("SNP AP %s error\n" , (create ? "CREATE" : "DESTROY" ));
992
+ ret = - EINVAL ;
993
+ }
994
+
995
+ __sev_put_ghcb (& state );
996
+
997
+ local_irq_restore (flags );
998
+
999
+ return ret ;
1000
+ }
1001
+
1002
+ static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1003
+ {
1004
+ int ret ;
1005
+
1006
+ if (snp_vmpl ) {
1007
+ struct svsm_call call = {};
1008
+ unsigned long flags ;
1009
+
1010
+ local_irq_save (flags );
1011
+
1012
+ call .caa = this_cpu_read (svsm_caa );
1013
+ call .rcx = __pa (va );
1014
+
1015
+ if (make_vmsa ) {
1016
+ /* Protocol 0, Call ID 2 */
1017
+ call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1018
+ call .rdx = __pa (caa );
1019
+ call .r8 = apic_id ;
1020
+ } else {
1021
+ /* Protocol 0, Call ID 3 */
1022
+ call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1023
+ }
1024
+
1025
+ ret = svsm_perform_call_protocol (& call );
1026
+
1027
+ local_irq_restore (flags );
1028
+ } else {
1029
+ /*
1030
+ * If the kernel runs at VMPL0, it can change the VMSA
1031
+ * bit for a page using the RMPADJUST instruction.
1032
+ * However, for the instruction to succeed it must
1033
+ * target the permissions of a lesser privileged (higher
1034
+ * numbered) VMPL level, so use VMPL1.
1035
+ */
1036
+ u64 attrs = 1 ;
1037
+
1038
+ if (make_vmsa )
1039
+ attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1040
+
1041
+ ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1042
+ }
1043
+
1044
+ return ret ;
1045
+ }
1046
+
1047
+ static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1048
+ {
1049
+ int err ;
1050
+
1051
+ err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1052
+ if (err )
1053
+ pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1054
+ else
1055
+ free_page ((unsigned long )vmsa );
1056
+ }
1057
+
962
1058
static void set_pte_enc (pte_t * kpte , int level , void * va )
963
1059
{
964
1060
struct pte_enc_desc d = {
@@ -1005,7 +1101,8 @@ static void unshare_all_memory(void)
1005
1101
data = per_cpu (runtime_data , cpu );
1006
1102
ghcb = (unsigned long )& data -> ghcb_page ;
1007
1103
1008
- if (addr <= ghcb && ghcb <= addr + size ) {
1104
+ /* Handle the case of a huge page containing the GHCB page */
1105
+ if (addr <= ghcb && ghcb < addr + size ) {
1009
1106
skipped_addr = true;
1010
1107
break ;
1011
1108
}
@@ -1055,11 +1152,70 @@ void snp_kexec_begin(void)
1055
1152
pr_warn ("Failed to stop shared<->private conversions\n" );
1056
1153
}
1057
1154
1155
+ /*
1156
+ * Shutdown all APs except the one handling kexec/kdump and clearing
1157
+ * the VMSA tag on AP's VMSA pages as they are not being used as
1158
+ * VMSA page anymore.
1159
+ */
1160
+ static void shutdown_all_aps (void )
1161
+ {
1162
+ struct sev_es_save_area * vmsa ;
1163
+ int apic_id , this_cpu , cpu ;
1164
+
1165
+ this_cpu = get_cpu ();
1166
+
1167
+ /*
1168
+ * APs are already in HLT loop when enc_kexec_finish() callback
1169
+ * is invoked.
1170
+ */
1171
+ for_each_present_cpu (cpu ) {
1172
+ vmsa = per_cpu (sev_vmsa , cpu );
1173
+
1174
+ /*
1175
+ * The BSP or offlined APs do not have guest allocated VMSA
1176
+ * and there is no need to clear the VMSA tag for this page.
1177
+ */
1178
+ if (!vmsa )
1179
+ continue ;
1180
+
1181
+ /*
1182
+ * Cannot clear the VMSA tag for the currently running vCPU.
1183
+ */
1184
+ if (this_cpu == cpu ) {
1185
+ unsigned long pa ;
1186
+ struct page * p ;
1187
+
1188
+ pa = __pa (vmsa );
1189
+ /*
1190
+ * Mark the VMSA page of the running vCPU as offline
1191
+ * so that is excluded and not touched by makedumpfile
1192
+ * while generating vmcore during kdump.
1193
+ */
1194
+ p = pfn_to_online_page (pa >> PAGE_SHIFT );
1195
+ if (p )
1196
+ __SetPageOffline (p );
1197
+ continue ;
1198
+ }
1199
+
1200
+ apic_id = cpuid_to_apicid [cpu ];
1201
+
1202
+ /*
1203
+ * Issue AP destroy to ensure AP gets kicked out of guest mode
1204
+ * to allow using RMPADJUST to remove the VMSA tag on it's
1205
+ * VMSA page.
1206
+ */
1207
+ vmgexit_ap_control (SVM_VMGEXIT_AP_DESTROY , vmsa , apic_id );
1208
+ snp_cleanup_vmsa (vmsa , apic_id );
1209
+ }
1210
+
1211
+ put_cpu ();
1212
+ }
1213
+
1058
1214
void snp_kexec_finish (void )
1059
1215
{
1060
1216
struct sev_es_runtime_data * data ;
1217
+ unsigned long size , addr ;
1061
1218
unsigned int level , cpu ;
1062
- unsigned long size ;
1063
1219
struct ghcb * ghcb ;
1064
1220
pte_t * pte ;
1065
1221
@@ -1069,6 +1225,8 @@ void snp_kexec_finish(void)
1069
1225
if (!IS_ENABLED (CONFIG_KEXEC_CORE ))
1070
1226
return ;
1071
1227
1228
+ shutdown_all_aps ();
1229
+
1072
1230
unshare_all_memory ();
1073
1231
1074
1232
/*
@@ -1085,54 +1243,11 @@ void snp_kexec_finish(void)
1085
1243
ghcb = & data -> ghcb_page ;
1086
1244
pte = lookup_address ((unsigned long )ghcb , & level );
1087
1245
size = page_level_size (level );
1088
- set_pte_enc (pte , level , (void * )ghcb );
1089
- snp_set_memory_private ((unsigned long )ghcb , (size / PAGE_SIZE ));
1090
- }
1091
- }
1092
-
1093
- static int snp_set_vmsa (void * va , void * caa , int apic_id , bool make_vmsa )
1094
- {
1095
- int ret ;
1096
-
1097
- if (snp_vmpl ) {
1098
- struct svsm_call call = {};
1099
- unsigned long flags ;
1100
-
1101
- local_irq_save (flags );
1102
-
1103
- call .caa = this_cpu_read (svsm_caa );
1104
- call .rcx = __pa (va );
1105
-
1106
- if (make_vmsa ) {
1107
- /* Protocol 0, Call ID 2 */
1108
- call .rax = SVSM_CORE_CALL (SVSM_CORE_CREATE_VCPU );
1109
- call .rdx = __pa (caa );
1110
- call .r8 = apic_id ;
1111
- } else {
1112
- /* Protocol 0, Call ID 3 */
1113
- call .rax = SVSM_CORE_CALL (SVSM_CORE_DELETE_VCPU );
1114
- }
1115
-
1116
- ret = svsm_perform_call_protocol (& call );
1117
-
1118
- local_irq_restore (flags );
1119
- } else {
1120
- /*
1121
- * If the kernel runs at VMPL0, it can change the VMSA
1122
- * bit for a page using the RMPADJUST instruction.
1123
- * However, for the instruction to succeed it must
1124
- * target the permissions of a lesser privileged (higher
1125
- * numbered) VMPL level, so use VMPL1.
1126
- */
1127
- u64 attrs = 1 ;
1128
-
1129
- if (make_vmsa )
1130
- attrs |= RMPADJUST_VMSA_PAGE_BIT ;
1131
-
1132
- ret = rmpadjust ((unsigned long )va , RMP_PG_SIZE_4K , attrs );
1246
+ /* Handle the case of a huge page containing the GHCB page */
1247
+ addr = (unsigned long )ghcb & page_level_mask (level );
1248
+ set_pte_enc (pte , level , (void * )addr );
1249
+ snp_set_memory_private (addr , (size / PAGE_SIZE ));
1133
1250
}
1134
-
1135
- return ret ;
1136
1251
}
1137
1252
1138
1253
#define __ATTR_BASE (SVM_SELECTOR_P_MASK | SVM_SELECTOR_S_MASK)
@@ -1166,24 +1281,10 @@ static void *snp_alloc_vmsa_page(int cpu)
1166
1281
return page_address (p + 1 );
1167
1282
}
1168
1283
1169
- static void snp_cleanup_vmsa (struct sev_es_save_area * vmsa , int apic_id )
1170
- {
1171
- int err ;
1172
-
1173
- err = snp_set_vmsa (vmsa , NULL , apic_id , false);
1174
- if (err )
1175
- pr_err ("clear VMSA page failed (%u), leaking page\n" , err );
1176
- else
1177
- free_page ((unsigned long )vmsa );
1178
- }
1179
-
1180
1284
static int wakeup_cpu_via_vmgexit (u32 apic_id , unsigned long start_ip )
1181
1285
{
1182
1286
struct sev_es_save_area * cur_vmsa , * vmsa ;
1183
- struct ghcb_state state ;
1184
1287
struct svsm_ca * caa ;
1185
- unsigned long flags ;
1186
- struct ghcb * ghcb ;
1187
1288
u8 sipi_vector ;
1188
1289
int cpu , ret ;
1189
1290
u64 cr4 ;
@@ -1297,33 +1398,7 @@ static int wakeup_cpu_via_vmgexit(u32 apic_id, unsigned long start_ip)
1297
1398
}
1298
1399
1299
1400
/* Issue VMGEXIT AP Creation NAE event */
1300
- local_irq_save (flags );
1301
-
1302
- ghcb = __sev_get_ghcb (& state );
1303
-
1304
- vc_ghcb_invalidate (ghcb );
1305
- ghcb_set_rax (ghcb , vmsa -> sev_features );
1306
- ghcb_set_sw_exit_code (ghcb , SVM_VMGEXIT_AP_CREATION );
1307
- ghcb_set_sw_exit_info_1 (ghcb ,
1308
- ((u64 )apic_id << 32 ) |
1309
- ((u64 )snp_vmpl << 16 ) |
1310
- SVM_VMGEXIT_AP_CREATE );
1311
- ghcb_set_sw_exit_info_2 (ghcb , __pa (vmsa ));
1312
-
1313
- sev_es_wr_ghcb_msr (__pa (ghcb ));
1314
- VMGEXIT ();
1315
-
1316
- if (!ghcb_sw_exit_info_1_is_valid (ghcb ) ||
1317
- lower_32_bits (ghcb -> save .sw_exit_info_1 )) {
1318
- pr_err ("SNP AP Creation error\n" );
1319
- ret = - EINVAL ;
1320
- }
1321
-
1322
- __sev_put_ghcb (& state );
1323
-
1324
- local_irq_restore (flags );
1325
-
1326
- /* Perform cleanup if there was an error */
1401
+ ret = vmgexit_ap_control (SVM_VMGEXIT_AP_CREATE , vmsa , apic_id );
1327
1402
if (ret ) {
1328
1403
snp_cleanup_vmsa (vmsa , apic_id );
1329
1404
vmsa = NULL ;
0 commit comments