Skip to content
This repository was archived by the owner on Nov 8, 2023. It is now read-only.

Commit 98896d8

Browse files
committed
Merge tag 'x86_cc_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 confidential computing updates from Borislav Petkov: "Unrelated x86/cc changes queued here to avoid ugly cross-merges and conflicts: - Carve out CPU hotplug function declarations into a separate header with the goal to be able to use the lockdep assertions in a more flexible manner - As a result, refactor cacheinfo code after carving out a function to return the cache ID associated with a given cache level - Cleanups Add support to be able to kexec TDX guests: - Expand ACPI MADT CPU offlining support - Add machinery to prepare CoCo guests memory before kexec-ing into a new kernel - Cleanup, readjust and massage related code" * tag 'x86_cc_for_v6.11_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (24 commits) ACPI: tables: Print MULTIPROC_WAKEUP when MADT is parsed x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method x86/mm: Introduce kernel_ident_mapping_free() x86/smp: Add smp_ops.stop_this_cpu() callback x86/acpi: Do not attempt to bring up secondary CPUs in the kexec case x86/acpi: Rename fields in the acpi_madt_multiproc_wakeup structure x86/mm: Do not zap page table entries mapping unaccepted memory table during kdump x86/mm: Make e820__end_ram_pfn() cover E820_TYPE_ACPI ranges x86/tdx: Convert shared memory back to private on kexec x86/mm: Add callbacks to prepare encrypted memory for kexec x86/tdx: Account shared memory x86/mm: Return correct level from lookup_address() if pte is none x86/mm: Make x86_platform.guest.enc_status_change_*() return an error x86/kexec: Keep CR4.MCE set during kexec for TDX guest x86/relocate_kernel: Use named labels for less confusion cpu/hotplug, x86/acpi: Disable CPU offlining for ACPI MADT wakeup cpu/hotplug: Add support for declaring CPU offlining not supported x86/apic: Mark acpi_mp_wake_* variables as __ro_after_init x86/acpi: Extract ACPI MADT wakeup code into a separate file x86/kexec: Remove spurious unconditional JMP from from identity_mapped() ...
2 parents 181a984 + 16df359 commit 98896d8

34 files changed

+812
-226
lines changed

arch/x86/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,13 @@ config X86_LOCAL_APIC
11181118
depends on X86_64 || SMP || X86_32_NON_STANDARD || X86_UP_APIC || PCI_MSI
11191119
select IRQ_DOMAIN_HIERARCHY
11201120

1121+
config ACPI_MADT_WAKEUP
1122+
def_bool y
1123+
depends on X86_64
1124+
depends on ACPI
1125+
depends on SMP
1126+
depends on X86_LOCAL_APIC
1127+
11211128
config X86_IO_APIC
11221129
def_bool y
11231130
depends on X86_LOCAL_APIC || X86_UP_IOAPIC

arch/x86/coco/core.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ static bool noinstr intel_cc_platform_has(enum cc_attr attr)
2929
{
3030
switch (attr) {
3131
case CC_ATTR_GUEST_UNROLL_STRING_IO:
32-
case CC_ATTR_HOTPLUG_DISABLED:
3332
case CC_ATTR_GUEST_MEM_ENCRYPT:
3433
case CC_ATTR_MEM_ENCRYPT:
3534
return true;

arch/x86/coco/tdx/tdx.c

Lines changed: 112 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,15 @@
77
#include <linux/cpufeature.h>
88
#include <linux/export.h>
99
#include <linux/io.h>
10+
#include <linux/kexec.h>
1011
#include <asm/coco.h>
1112
#include <asm/tdx.h>
1213
#include <asm/vmx.h>
1314
#include <asm/ia32.h>
1415
#include <asm/insn.h>
1516
#include <asm/insn-eval.h>
1617
#include <asm/pgtable.h>
18+
#include <asm/set_memory.h>
1719

1820
/* MMIO direction */
1921
#define EPT_READ 0
@@ -38,6 +40,8 @@
3840

3941
#define TDREPORT_SUBTYPE_0 0
4042

43+
static atomic_long_t nr_shared;
44+
4145
/* Called from __tdx_hypercall() for unrecoverable failure */
4246
noinstr void __noreturn __tdx_hypercall_failed(void)
4347
{
@@ -798,28 +802,124 @@ static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc)
798802
return true;
799803
}
800804

801-
static bool tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
802-
bool enc)
805+
static int tdx_enc_status_change_prepare(unsigned long vaddr, int numpages,
806+
bool enc)
803807
{
804808
/*
805809
* Only handle shared->private conversion here.
806810
* See the comment in tdx_early_init().
807811
*/
808-
if (enc)
809-
return tdx_enc_status_changed(vaddr, numpages, enc);
810-
return true;
812+
if (enc && !tdx_enc_status_changed(vaddr, numpages, enc))
813+
return -EIO;
814+
815+
return 0;
811816
}
812817

813-
static bool tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
818+
static int tdx_enc_status_change_finish(unsigned long vaddr, int numpages,
814819
bool enc)
815820
{
816821
/*
817822
* Only handle private->shared conversion here.
818823
* See the comment in tdx_early_init().
819824
*/
820-
if (!enc)
821-
return tdx_enc_status_changed(vaddr, numpages, enc);
822-
return true;
825+
if (!enc && !tdx_enc_status_changed(vaddr, numpages, enc))
826+
return -EIO;
827+
828+
if (enc)
829+
atomic_long_sub(numpages, &nr_shared);
830+
else
831+
atomic_long_add(numpages, &nr_shared);
832+
833+
return 0;
834+
}
835+
836+
/* Stop new private<->shared conversions */
837+
static void tdx_kexec_begin(void)
838+
{
839+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
840+
return;
841+
842+
/*
843+
* Crash kernel reaches here with interrupts disabled: can't wait for
844+
* conversions to finish.
845+
*
846+
* If race happened, just report and proceed.
847+
*/
848+
if (!set_memory_enc_stop_conversion())
849+
pr_warn("Failed to stop shared<->private conversions\n");
850+
}
851+
852+
/* Walk direct mapping and convert all shared memory back to private */
853+
static void tdx_kexec_finish(void)
854+
{
855+
unsigned long addr, end;
856+
long found = 0, shared;
857+
858+
if (!IS_ENABLED(CONFIG_KEXEC_CORE))
859+
return;
860+
861+
lockdep_assert_irqs_disabled();
862+
863+
addr = PAGE_OFFSET;
864+
end = PAGE_OFFSET + get_max_mapped();
865+
866+
while (addr < end) {
867+
unsigned long size;
868+
unsigned int level;
869+
pte_t *pte;
870+
871+
pte = lookup_address(addr, &level);
872+
size = page_level_size(level);
873+
874+
if (pte && pte_decrypted(*pte)) {
875+
int pages = size / PAGE_SIZE;
876+
877+
/*
878+
* Touching memory with shared bit set triggers implicit
879+
* conversion to shared.
880+
*
881+
* Make sure nobody touches the shared range from
882+
* now on.
883+
*/
884+
set_pte(pte, __pte(0));
885+
886+
/*
887+
* Memory encryption state persists across kexec.
888+
* If tdx_enc_status_changed() fails in the first
889+
* kernel, it leaves memory in an unknown state.
890+
*
891+
* If that memory remains shared, accessing it in the
892+
* *next* kernel through a private mapping will result
893+
* in an unrecoverable guest shutdown.
894+
*
895+
* The kdump kernel boot is not impacted as it uses
896+
* a pre-reserved memory range that is always private.
897+
* However, gathering crash information could lead to
898+
* a crash if it accesses unconverted memory through
899+
* a private mapping which is possible when accessing
900+
* that memory through /proc/vmcore, for example.
901+
*
902+
* In all cases, print error info in order to leave
903+
* enough bread crumbs for debugging.
904+
*/
905+
if (!tdx_enc_status_changed(addr, pages, true)) {
906+
pr_err("Failed to unshare range %#lx-%#lx\n",
907+
addr, addr + size);
908+
}
909+
910+
found += pages;
911+
}
912+
913+
addr += size;
914+
}
915+
916+
__flush_tlb_all();
917+
918+
shared = atomic_long_read(&nr_shared);
919+
if (shared != found) {
920+
pr_err("shared page accounting is off\n");
921+
pr_err("nr_shared = %ld, nr_found = %ld\n", shared, found);
922+
}
823923
}
824924

825925
void __init tdx_early_init(void)
@@ -881,6 +981,9 @@ void __init tdx_early_init(void)
881981
x86_platform.guest.enc_cache_flush_required = tdx_cache_flush_required;
882982
x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required;
883983

984+
x86_platform.guest.enc_kexec_begin = tdx_kexec_begin;
985+
x86_platform.guest.enc_kexec_finish = tdx_kexec_finish;
986+
884987
/*
885988
* TDX intercepts the RDMSR to read the X2APIC ID in the parallel
886989
* bringup low level code. That raises #VE which cannot be handled

arch/x86/hyperv/ivm.c

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -523,9 +523,9 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[],
523523
* transition is complete, hv_vtom_set_host_visibility() marks the pages
524524
* as "present" again.
525525
*/
526-
static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
526+
static int hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc)
527527
{
528-
return !set_memory_np(kbuffer, pagecount);
528+
return set_memory_np(kbuffer, pagecount);
529529
}
530530

531531
/*
@@ -536,20 +536,19 @@ static bool hv_vtom_clear_present(unsigned long kbuffer, int pagecount, bool enc
536536
* with host. This function works as wrap of hv_mark_gpa_visibility()
537537
* with memory base and size.
538538
*/
539-
static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
539+
static int hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bool enc)
540540
{
541541
enum hv_mem_host_visibility visibility = enc ?
542542
VMBUS_PAGE_NOT_VISIBLE : VMBUS_PAGE_VISIBLE_READ_WRITE;
543543
u64 *pfn_array;
544544
phys_addr_t paddr;
545+
int i, pfn, err;
545546
void *vaddr;
546547
int ret = 0;
547-
bool result = true;
548-
int i, pfn;
549548

550549
pfn_array = kmalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
551550
if (!pfn_array) {
552-
result = false;
551+
ret = -ENOMEM;
553552
goto err_set_memory_p;
554553
}
555554

@@ -568,10 +567,8 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
568567
if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
569568
ret = hv_mark_gpa_visibility(pfn, pfn_array,
570569
visibility);
571-
if (ret) {
572-
result = false;
570+
if (ret)
573571
goto err_free_pfn_array;
574-
}
575572
pfn = 0;
576573
}
577574
}
@@ -586,10 +583,11 @@ static bool hv_vtom_set_host_visibility(unsigned long kbuffer, int pagecount, bo
586583
* order to avoid leaving the memory range in a "broken" state. Setting
587584
* the PRESENT bits shouldn't fail, but return an error if it does.
588585
*/
589-
if (set_memory_p(kbuffer, pagecount))
590-
result = false;
586+
err = set_memory_p(kbuffer, pagecount);
587+
if (err && !ret)
588+
ret = err;
591589

592-
return result;
590+
return ret;
593591
}
594592

595593
static bool hv_vtom_tlb_flush_required(bool private)

arch/x86/include/asm/acpi.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,13 @@ static inline bool acpi_skip_set_wakeup_address(void)
7878

7979
#define acpi_skip_set_wakeup_address acpi_skip_set_wakeup_address
8080

81+
union acpi_subtable_headers;
82+
83+
int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
84+
const unsigned long end);
85+
86+
void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
87+
8188
/*
8289
* Check if the CPU can handle C2 and deeper
8390
*/

arch/x86/include/asm/init.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
struct x86_mapping_info {
88
void *(*alloc_pgt_page)(void *); /* allocate buf for page table */
9+
void (*free_pgt_page)(void *, void *); /* free buf for page table */
910
void *context; /* context for alloc_pgt_page */
1011
unsigned long page_flag; /* page flag for PMD or PUD entry */
1112
unsigned long offset; /* ident mapping offset */
@@ -16,4 +17,6 @@ struct x86_mapping_info {
1617
int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
1718
unsigned long pstart, unsigned long pend);
1819

20+
void kernel_ident_mapping_free(struct x86_mapping_info *info, pgd_t *pgd);
21+
1922
#endif /* _ASM_X86_INIT_H */

arch/x86/include/asm/pgtable.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,11 @@ static inline int pte_young(pte_t pte)
140140
return pte_flags(pte) & _PAGE_ACCESSED;
141141
}
142142

143+
static inline bool pte_decrypted(pte_t pte)
144+
{
145+
return cc_mkdec(pte_val(pte)) == pte_val(pte);
146+
}
147+
143148
#define pmd_dirty pmd_dirty
144149
static inline bool pmd_dirty(pmd_t pmd)
145150
{

arch/x86/include/asm/pgtable_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -549,6 +549,7 @@ enum pg_level {
549549
PG_LEVEL_2M,
550550
PG_LEVEL_1G,
551551
PG_LEVEL_512G,
552+
PG_LEVEL_256T,
552553
PG_LEVEL_NUM
553554
};
554555

arch/x86/include/asm/set_memory.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ int set_memory_wb(unsigned long addr, int numpages);
4949
int set_memory_np(unsigned long addr, int numpages);
5050
int set_memory_p(unsigned long addr, int numpages);
5151
int set_memory_4k(unsigned long addr, int numpages);
52+
53+
bool set_memory_enc_stop_conversion(void);
5254
int set_memory_encrypted(unsigned long addr, int numpages);
5355
int set_memory_decrypted(unsigned long addr, int numpages);
56+
5457
int set_memory_np_noalias(unsigned long addr, int numpages);
5558
int set_memory_nonglobal(unsigned long addr, int numpages);
5659
int set_memory_global(unsigned long addr, int numpages);

arch/x86/include/asm/smp.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ struct smp_ops {
3535
int (*cpu_disable)(void);
3636
void (*cpu_die)(unsigned int cpu);
3737
void (*play_dead)(void);
38+
void (*stop_this_cpu)(void);
3839

3940
void (*send_call_func_ipi)(const struct cpumask *mask);
4041
void (*send_call_func_single_ipi)(int cpu);

0 commit comments

Comments
 (0)