Skip to content

Commit bd328aa

Browse files
ardbiesheuvelbp3tk0v
authored andcommitted
x86/decompressor: Avoid the need for a stack in the 32-bit trampoline
The 32-bit trampoline no longer uses the stack for anything except performing a far return back to long mode, and preserving the caller's stack pointer value. Currently, the trampoline stack is placed in the same page that carries the trampoline code, which means this page must be mapped writable and executable, and the stack is therefore executable as well. Replace the far return with a far jump, so that the return address can be pre-calculated and patched into the code before it is called. This removes the need for a 32-bit addressable stack entirely, and in a later patch, this will be taken advantage of by removing writable permissions from (and adding executable permissions to) the trampoline code page when booting via the EFI stub. Note that the value of RSP still needs to be preserved explicitly across the switch into 32-bit mode, as the register may get truncated to 32 bits. Signed-off-by: Ard Biesheuvel <ardb@kernel.org> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Link: https://lore.kernel.org/r/20230807162720.545787-12-ardb@kernel.org
1 parent 918a7a0 commit bd328aa

File tree

3 files changed

+40
-21
lines changed

3 files changed

+40
-21
lines changed

arch/x86/boot/compressed/head_64.S

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,7 @@ SYM_FUNC_END(.Lrelocated)
540540
* trampoline memory. A non-zero second argument (ESI) means that the
541541
* trampoline needs to enable 5-level paging.
542542
*/
543+
.section ".rodata", "a", @progbits
543544
SYM_CODE_START(trampoline_32bit_src)
544545
/*
545546
* Preserve live 64-bit registers on the stack: this is necessary
@@ -550,35 +551,34 @@ SYM_CODE_START(trampoline_32bit_src)
550551
pushq %rbp
551552
pushq %rbx
552553

553-
/* Set up 32-bit addressable stack and push the old RSP value */
554-
leaq (TRAMPOLINE_32BIT_STACK_END - 8)(%rcx), %rbx
555-
movq %rsp, (%rbx)
556-
movq %rbx, %rsp
557-
558-
/* Take the address of the trampoline exit code */
559-
leaq .Lret(%rip), %rbx
554+
/* Preserve top half of RSP in a legacy mode GPR to avoid truncation */
555+
movq %rsp, %rbx
556+
shrq $32, %rbx
560557

561558
/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
562559
pushq $__KERNEL32_CS
563560
leaq 0f(%rip), %rax
564561
pushq %rax
565562
lretq
566563

564+
/*
565+
* The 32-bit code below will do a far jump back to long mode and end
566+
* up here after reconfiguring the number of paging levels. First, the
567+
* stack pointer needs to be restored to its full 64-bit value before
568+
* the callee save register contents can be popped from the stack.
569+
*/
567570
.Lret:
571+
shlq $32, %rbx
572+
orq %rbx, %rsp
573+
568574
/* Restore the preserved 64-bit registers */
569-
movq (%rsp), %rsp
570575
popq %rbx
571576
popq %rbp
572577
popq %r15
573578
retq
574579

575580
.code32
576581
0:
577-
/* Set up data and stack segments */
578-
movl $__KERNEL_DS, %eax
579-
movl %eax, %ds
580-
movl %eax, %ss
581-
582582
/* Disable paging */
583583
movl %cr0, %eax
584584
btrl $X86_CR0_PG_BIT, %eax
@@ -633,25 +633,34 @@ SYM_CODE_START(trampoline_32bit_src)
633633
1:
634634
movl %eax, %cr4
635635

636-
/* Prepare the stack for far return to Long Mode */
637-
pushl $__KERNEL_CS
638-
pushl %ebx
639-
640636
/* Enable paging again. */
641637
movl %cr0, %eax
642638
btsl $X86_CR0_PG_BIT, %eax
643639
movl %eax, %cr0
644640

645-
lret
641+
/*
642+
* Return to the 64-bit calling code using LJMP rather than LRET, to
643+
* avoid the need for a 32-bit addressable stack. The destination
644+
* address will be adjusted after the template code is copied into a
645+
* 32-bit addressable buffer.
646+
*/
647+
.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src)
646648
SYM_CODE_END(trampoline_32bit_src)
647649

650+
/*
651+
* This symbol is placed right after trampoline_32bit_src() so its address can
652+
* be used to infer the size of the trampoline code.
653+
*/
654+
SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src)
655+
648656
/*
649657
* The trampoline code has a size limit.
650658
* Make sure we fail to compile if the trampoline code grows
651659
* beyond TRAMPOLINE_32BIT_CODE_SIZE bytes.
652660
*/
653661
.org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
654662

663+
.text
655664
SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
656665
/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
657666
1:

arch/x86/boot/compressed/pgtable.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,13 @@
88
#define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE
99
#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0
1010

11-
#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE
12-
1311
#ifndef __ASSEMBLER__
1412

1513
extern unsigned long *trampoline_32bit;
1614

1715
extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
1816

17+
extern const u16 trampoline_ljmp_imm_offset;
18+
1919
#endif /* __ASSEMBLER__ */
2020
#endif /* BOOT_COMPRESSED_PAGETABLE_H */

arch/x86/boot/compressed/pgtable_64.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void)
109109
struct paging_config paging_prepare(void *rmode)
110110
{
111111
struct paging_config paging_config = {};
112+
void *tramp_code;
112113

113114
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
114115
boot_params = rmode;
@@ -148,9 +149,18 @@ struct paging_config paging_prepare(void *rmode)
148149
memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
149150

150151
/* Copy trampoline code in place */
151-
memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
152+
tramp_code = memcpy(trampoline_32bit +
153+
TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
152154
&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
153155

156+
/*
157+
* Avoid the need for a stack in the 32-bit trampoline code, by using
158+
* LJMP rather than LRET to return back to long mode. LJMP takes an
159+
* immediate absolute address, which needs to be adjusted based on the
160+
* placement of the trampoline.
161+
*/
162+
*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;
163+
154164
/*
155165
* The code below prepares page table in trampoline memory.
156166
*

0 commit comments

Comments
 (0)