Skip to content

Commit cb1c9e0

Browse files
ardbiesheuvelbp3tk0v
authored andcommitted
x86/efistub: Perform 4/5 level paging switch from the stub
In preparation for updating the EFI stub boot flow to avoid the bare metal decompressor code altogether, implement the support code for switching between 4 and 5 levels of paging before jumping to the kernel proper. Reuse the newly refactored trampoline that the bare metal decompressor uses, but relies on EFI APIs to allocate 32-bit addressable memory and remap it with the appropriate permissions. Given that the bare metal decompressor will no longer call into the trampoline if the number of paging levels is already set correctly, it is no longer needed to remove NX restrictions from the memory range where this trampoline may end up. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Link: https://lore.kernel.org/r/20230807162720.545787-17-ardb@kernel.org
1 parent 03dda95 commit cb1c9e0

File tree

6 files changed

+130
-26
lines changed

6 files changed

+130
-26
lines changed

drivers/firmware/efi/libstub/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ lib-$(CONFIG_EFI_GENERIC_STUB) += efi-stub.o string.o intrinsics.o systable.o \
8888
lib-$(CONFIG_ARM) += arm32-stub.o
8989
lib-$(CONFIG_ARM64) += arm64.o arm64-stub.o smbios.o
9090
lib-$(CONFIG_X86) += x86-stub.o
91+
lib-$(CONFIG_X86_64) += x86-5lvl.o
9192
lib-$(CONFIG_RISCV) += riscv.o riscv-stub.o
9293
lib-$(CONFIG_LOONGARCH) += loongarch.o loongarch-stub.o
9394

drivers/firmware/efi/libstub/efi-stub-helper.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@ efi_status_t efi_parse_options(char const *cmdline)
7373
efi_loglevel = CONSOLE_LOGLEVEL_QUIET;
7474
} else if (!strcmp(param, "noinitrd")) {
7575
efi_noinitrd = true;
76+
} else if (IS_ENABLED(CONFIG_X86_64) && !strcmp(param, "no5lvl")) {
77+
efi_no5lvl = true;
7678
} else if (!strcmp(param, "efi") && val) {
7779
efi_nochunk = parse_option_str(val, "nochunk");
7880
efi_novamap |= parse_option_str(val, "novamap");

drivers/firmware/efi/libstub/efistub.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define EFI_ALLOC_LIMIT ULONG_MAX
3434
#endif
3535

36+
extern bool efi_no5lvl;
3637
extern bool efi_nochunk;
3738
extern bool efi_nokaslr;
3839
extern int efi_loglevel;
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
#include <linux/efi.h>
3+
4+
#include <asm/boot.h>
5+
#include <asm/desc.h>
6+
#include <asm/efi.h>
7+
8+
#include "efistub.h"
9+
#include "x86-stub.h"
10+
11+
bool efi_no5lvl;
12+
13+
static void (*la57_toggle)(void *cr3);
14+
15+
static const struct desc_struct gdt[] = {
16+
[GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
17+
[GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
18+
};
19+
20+
/*
21+
* Enabling (or disabling) 5 level paging is tricky, because it can only be
22+
* done from 32-bit mode with paging disabled. This means not only that the
23+
* code itself must be running from 32-bit addressable physical memory, but
24+
* also that the root page table must be 32-bit addressable, as programming
25+
* a 64-bit value into CR3 when running in 32-bit mode is not supported.
26+
*/
27+
efi_status_t efi_setup_5level_paging(void)
28+
{
29+
u8 tmpl_size = (u8 *)&trampoline_ljmp_imm_offset - (u8 *)&trampoline_32bit_src;
30+
efi_status_t status;
31+
u8 *la57_code;
32+
33+
if (!efi_is_64bit())
34+
return EFI_SUCCESS;
35+
36+
/* check for 5 level paging support */
37+
if (native_cpuid_eax(0) < 7 ||
38+
!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
39+
return EFI_SUCCESS;
40+
41+
/* allocate some 32-bit addressable memory for code and a page table */
42+
status = efi_allocate_pages(2 * PAGE_SIZE, (unsigned long *)&la57_code,
43+
U32_MAX);
44+
if (status != EFI_SUCCESS)
45+
return status;
46+
47+
la57_toggle = memcpy(la57_code, trampoline_32bit_src, tmpl_size);
48+
memset(la57_code + tmpl_size, 0x90, PAGE_SIZE - tmpl_size);
49+
50+
/*
51+
* To avoid the need to allocate a 32-bit addressable stack, the
52+
* trampoline uses a LJMP instruction to switch back to long mode.
53+
* LJMP takes an absolute destination address, which needs to be
54+
* fixed up at runtime.
55+
*/
56+
*(u32 *)&la57_code[trampoline_ljmp_imm_offset] += (unsigned long)la57_code;
57+
58+
efi_adjust_memory_range_protection((unsigned long)la57_toggle, PAGE_SIZE);
59+
60+
return EFI_SUCCESS;
61+
}
62+
63+
void efi_5level_switch(void)
64+
{
65+
bool want_la57 = IS_ENABLED(CONFIG_X86_5LEVEL) && !efi_no5lvl;
66+
bool have_la57 = native_read_cr4() & X86_CR4_LA57;
67+
bool need_toggle = want_la57 ^ have_la57;
68+
u64 *pgt = (void *)la57_toggle + PAGE_SIZE;
69+
u64 *cr3 = (u64 *)__native_read_cr3();
70+
u64 *new_cr3;
71+
72+
if (!la57_toggle || !need_toggle)
73+
return;
74+
75+
if (!have_la57) {
76+
/*
77+
* 5 level paging will be enabled, so a root level page needs
78+
* to be allocated from the 32-bit addressable physical region,
79+
* with its first entry referring to the existing hierarchy.
80+
*/
81+
new_cr3 = memset(pgt, 0, PAGE_SIZE);
82+
new_cr3[0] = (u64)cr3 | _PAGE_TABLE_NOENC;
83+
} else {
84+
/* take the new root table pointer from the current entry #0 */
85+
new_cr3 = (u64 *)(cr3[0] & PAGE_MASK);
86+
87+
/* copy the new root table if it is not 32-bit addressable */
88+
if ((u64)new_cr3 > U32_MAX)
89+
new_cr3 = memcpy(pgt, new_cr3, PAGE_SIZE);
90+
}
91+
92+
native_load_gdt(&(struct desc_ptr){ sizeof(gdt) - 1, (u64)gdt });
93+
94+
la57_toggle(new_cr3);
95+
}

drivers/firmware/efi/libstub/x86-stub.c

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <asm/boot.h>
1818

1919
#include "efistub.h"
20+
#include "x86-stub.h"
2021

2122
/* Maximum physical address for 64-bit kernel with 4-level paging */
2223
#define MAXMEM_X86_64_4LEVEL (1ull << 46)
@@ -223,8 +224,8 @@ static void retrieve_apple_device_properties(struct boot_params *boot_params)
223224
}
224225
}
225226

226-
static void
227-
adjust_memory_range_protection(unsigned long start, unsigned long size)
227+
void efi_adjust_memory_range_protection(unsigned long start,
228+
unsigned long size)
228229
{
229230
efi_status_t status;
230231
efi_gcd_memory_space_desc_t desc;
@@ -278,35 +279,14 @@ adjust_memory_range_protection(unsigned long start, unsigned long size)
278279
}
279280
}
280281

281-
/*
282-
* Trampoline takes 2 pages and can be loaded in first megabyte of memory
283-
* with its end placed between 128k and 640k where BIOS might start.
284-
* (see arch/x86/boot/compressed/pgtable_64.c)
285-
*
286-
* We cannot find exact trampoline placement since memory map
287-
* can be modified by UEFI, and it can alter the computed address.
288-
*/
289-
290-
#define TRAMPOLINE_PLACEMENT_BASE ((128 - 8)*1024)
291-
#define TRAMPOLINE_PLACEMENT_SIZE (640*1024 - (128 - 8)*1024)
292-
293282
extern const u8 startup_32[], startup_64[];
294283

295284
static void
296285
setup_memory_protection(unsigned long image_base, unsigned long image_size)
297286
{
298-
/*
299-
* Allow execution of possible trampoline used
300-
* for switching between 4- and 5-level page tables
301-
* and relocated kernel image.
302-
*/
303-
304-
adjust_memory_range_protection(TRAMPOLINE_PLACEMENT_BASE,
305-
TRAMPOLINE_PLACEMENT_SIZE);
306-
307287
#ifdef CONFIG_64BIT
308288
if (image_base != (unsigned long)startup_32)
309-
adjust_memory_range_protection(image_base, image_size);
289+
efi_adjust_memory_range_protection(image_base, image_size);
310290
#else
311291
/*
312292
* Clear protection flags on a whole range of possible
@@ -316,8 +296,8 @@ setup_memory_protection(unsigned long image_base, unsigned long image_size)
316296
* need to remove possible protection on relocated image
317297
* itself disregarding further relocations.
318298
*/
319-
adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
320-
KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
299+
efi_adjust_memory_range_protection(LOAD_PHYSICAL_ADDR,
300+
KERNEL_IMAGE_SIZE - LOAD_PHYSICAL_ADDR);
321301
#endif
322302
}
323303

@@ -839,6 +819,12 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
839819
efi_dxe_table = NULL;
840820
}
841821

822+
status = efi_setup_5level_paging();
823+
if (status != EFI_SUCCESS) {
824+
efi_err("efi_setup_5level_paging() failed!\n");
825+
goto fail;
826+
}
827+
842828
/*
843829
* If the kernel isn't already loaded at a suitable address,
844830
* relocate it.
@@ -959,6 +945,8 @@ void __noreturn efi_stub_entry(efi_handle_t handle,
959945
goto fail;
960946
}
961947

948+
efi_5level_switch();
949+
962950
if (IS_ENABLED(CONFIG_X86_64))
963951
bzimage_addr += startup_64 - startup_32;
964952

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
3+
#include <linux/efi.h>
4+
5+
extern void trampoline_32bit_src(void *, bool);
6+
extern const u16 trampoline_ljmp_imm_offset;
7+
8+
void efi_adjust_memory_range_protection(unsigned long start,
9+
unsigned long size);
10+
11+
#ifdef CONFIG_X86_64
12+
efi_status_t efi_setup_5level_paging(void);
13+
void efi_5level_switch(void);
14+
#else
15+
static inline efi_status_t efi_setup_5level_paging(void) { return EFI_SUCCESS; }
16+
static inline void efi_5level_switch(void) {}
17+
#endif

0 commit comments

Comments
 (0)