Skip to content

Commit 40c4590

Browse files
amlutoKAGA-KOKO
authored andcommitted
x86/ptrace: Fix 32-bit PTRACE_SETREGS vs fsbase and gsbase
Debuggers expect that doing PTRACE_GETREGS, then poking at a tracee and maybe letting it run for a while, then doing PTRACE_SETREGS will put the tracee back where it was. In the specific case of a 32-bit tracer and tracee, the PTRACE_GETREGS/SETREGS data structure doesn't have fs_base or gs_base fields, so FSBASE and GSBASE fields are never stored anywhere. Everything used to still work because nonzero FS or GS would result full reloads of the segment registers when the tracee resumes, and the bases associated with FS==0 or GS==0 are irrelevant to 32-bit code. Adding FSGSBASE support broke this: when FSGSBASE is enabled, FSBASE and GSBASE are now restored independently of FS and GS for all tasks when context-switched in. This means that, if a 32-bit tracer restores a previous state using PTRACE_SETREGS but the tracee's pre-restore and post-restore bases don't match, then the tracee is resumed with the wrong base. Fix it by explicitly loading the base when a 32-bit tracer pokes FS or GS on a 64-bit kernel. Also add a test case. Fixes: 6739034 ("x86/process/64: Use FSBSBASE in switch_to() if available") Signed-off-by: Andy Lutomirski <luto@kernel.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Link: https://lkml.kernel.org/r/229cc6a50ecbb701abd50fe4ddaf0eda888898cd.1593192140.git.luto@kernel.org
1 parent 8e25903 commit 40c4590

File tree

5 files changed

+280
-16
lines changed

5 files changed

+280
-16
lines changed

arch/x86/include/asm/fsgsbase.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ static inline void x86_fsbase_write_cpu(unsigned long fsbase)
7575

7676
extern unsigned long x86_gsbase_read_cpu_inactive(void);
7777
extern void x86_gsbase_write_cpu_inactive(unsigned long gsbase);
78+
extern unsigned long x86_fsgsbase_read_task(struct task_struct *task,
79+
unsigned short selector);
7880

7981
#endif /* CONFIG_X86_64 */
8082

arch/x86/kernel/process_64.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,8 @@ static __always_inline void x86_fsgsbase_load(struct thread_struct *prev,
347347
}
348348
}
349349

350-
static unsigned long x86_fsgsbase_read_task(struct task_struct *task,
351-
unsigned short selector)
350+
unsigned long x86_fsgsbase_read_task(struct task_struct *task,
351+
unsigned short selector)
352352
{
353353
unsigned short idx = selector >> 3;
354354
unsigned long base;

arch/x86/kernel/ptrace.c

Lines changed: 30 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -281,17 +281,9 @@ static int set_segment_reg(struct task_struct *task,
281281
return -EIO;
282282

283283
/*
284-
* This function has some ABI oddities.
285-
*
286-
* A 32-bit ptracer probably expects that writing FS or GS will change
287-
* FSBASE or GSBASE respectively. In the absence of FSGSBASE support,
288-
* this code indeed has that effect. When FSGSBASE is added, this
289-
* will require a special case.
290-
*
291-
* For existing 64-bit ptracers, writing FS or GS *also* currently
292-
* changes the base if the selector is nonzero the next time the task
293-
* is run. This behavior may not be needed, and trying to preserve it
294-
* when FSGSBASE is added would be complicated at best.
284+
* Writes to FS and GS will change the stored selector. Whether
285+
* this changes the segment base as well depends on whether
286+
* FSGSBASE is enabled.
295287
*/
296288

297289
switch (offset) {
@@ -867,14 +859,39 @@ long arch_ptrace(struct task_struct *child, long request,
867859
static int putreg32(struct task_struct *child, unsigned regno, u32 value)
868860
{
869861
struct pt_regs *regs = task_pt_regs(child);
862+
int ret;
870863

871864
switch (regno) {
872865

873866
SEG32(cs);
874867
SEG32(ds);
875868
SEG32(es);
876-
SEG32(fs);
877-
SEG32(gs);
869+
870+
/*
871+
* A 32-bit ptracer on a 64-bit kernel expects that writing
872+
* FS or GS will also update the base. This is needed for
873+
* operations like PTRACE_SETREGS to fully restore a saved
874+
* CPU state.
875+
*/
876+
877+
case offsetof(struct user32, regs.fs):
878+
ret = set_segment_reg(child,
879+
offsetof(struct user_regs_struct, fs),
880+
value);
881+
if (ret == 0)
882+
child->thread.fsbase =
883+
x86_fsgsbase_read_task(child, value);
884+
return ret;
885+
886+
case offsetof(struct user32, regs.gs):
887+
ret = set_segment_reg(child,
888+
offsetof(struct user_regs_struct, gs),
889+
value);
890+
if (ret == 0)
891+
child->thread.gsbase =
892+
x86_fsgsbase_read_task(child, value);
893+
return ret;
894+
878895
SEG32(ss);
879896

880897
R32(ebx, bx);

tools/testing/selftests/x86/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
1313
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
1414
check_initial_reg_state sigreturn iopl ioperm \
1515
test_vdso test_vsyscall mov_ss_trap \
16-
syscall_arg_fault
16+
syscall_arg_fault fsgsbase_restore
1717
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
1818
test_FCMOV test_FCOMI test_FISTTP \
1919
vdso_restorer
Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* fsgsbase_restore.c, test ptrace vs fsgsbase
4+
* Copyright (c) 2020 Andy Lutomirski
5+
*
6+
* This test case simulates a tracer redirecting tracee execution to
7+
* a function and then restoring tracee state using PTRACE_GETREGS and
8+
* PTRACE_SETREGS. This is similar to what gdb does when doing
9+
* 'p func()'. The catch is that this test has the called function
10+
* modify a segment register. This makes sure that ptrace correctly
11+
* restores segment state when using PTRACE_SETREGS.
12+
*
13+
* This is not part of fsgsbase.c, because that test is 64-bit only.
14+
*/
15+
16+
#define _GNU_SOURCE
17+
#include <stdio.h>
18+
#include <stdlib.h>
19+
#include <stdbool.h>
20+
#include <string.h>
21+
#include <sys/syscall.h>
22+
#include <unistd.h>
23+
#include <err.h>
24+
#include <sys/user.h>
25+
#include <asm/prctl.h>
26+
#include <sys/prctl.h>
27+
#include <asm/ldt.h>
28+
#include <sys/mman.h>
29+
#include <stddef.h>
30+
#include <sys/ptrace.h>
31+
#include <sys/wait.h>
32+
#include <stdint.h>
33+
34+
#define EXPECTED_VALUE 0x1337f00d
35+
36+
#ifdef __x86_64__
37+
# define SEG "%gs"
38+
#else
39+
# define SEG "%fs"
40+
#endif
41+
42+
static unsigned int dereference_seg_base(void)
43+
{
44+
int ret;
45+
asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
46+
return ret;
47+
}
48+
49+
static void init_seg(void)
50+
{
51+
unsigned int *target = mmap(
52+
NULL, sizeof(unsigned int),
53+
PROT_READ | PROT_WRITE,
54+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
55+
if (target == MAP_FAILED)
56+
err(1, "mmap");
57+
58+
*target = EXPECTED_VALUE;
59+
60+
printf("\tsegment base address = 0x%lx\n", (unsigned long)target);
61+
62+
struct user_desc desc = {
63+
.entry_number = 0,
64+
.base_addr = (unsigned int)(uintptr_t)target,
65+
.limit = sizeof(unsigned int) - 1,
66+
.seg_32bit = 1,
67+
.contents = 0, /* Data, grow-up */
68+
.read_exec_only = 0,
69+
.limit_in_pages = 0,
70+
.seg_not_present = 0,
71+
.useable = 0
72+
};
73+
if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
74+
printf("\tusing LDT slot 0\n");
75+
asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7));
76+
} else {
77+
/* No modify_ldt for us (configured out, perhaps) */
78+
79+
struct user_desc *low_desc = mmap(
80+
NULL, sizeof(desc),
81+
PROT_READ | PROT_WRITE,
82+
MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
83+
memcpy(low_desc, &desc, sizeof(desc));
84+
85+
low_desc->entry_number = -1;
86+
87+
/* 32-bit set_thread_area */
88+
long ret;
89+
asm volatile ("int $0x80"
90+
: "=a" (ret), "+m" (*low_desc)
91+
: "a" (243), "b" (low_desc)
92+
#ifdef __x86_64__
93+
: "r8", "r9", "r10", "r11"
94+
#endif
95+
);
96+
memcpy(&desc, low_desc, sizeof(desc));
97+
munmap(low_desc, sizeof(desc));
98+
99+
if (ret != 0) {
100+
printf("[NOTE]\tcould not create a segment -- can't test anything\n");
101+
exit(0);
102+
}
103+
printf("\tusing GDT slot %d\n", desc.entry_number);
104+
105+
unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3);
106+
asm volatile ("mov %0, %" SEG :: "rm" (sel));
107+
}
108+
}
109+
110+
static void tracee_zap_segment(void)
111+
{
112+
/*
113+
* The tracer will redirect execution here. This is meant to
114+
* work like gdb's 'p func()' feature. The tricky bit is that
115+
* we modify a segment register in order to make sure that ptrace
116+
* can correctly restore segment registers.
117+
*/
118+
printf("\tTracee: in tracee_zap_segment()\n");
119+
120+
/*
121+
* Write a nonzero selector with base zero to the segment register.
122+
* Using a null selector would defeat the test on AMD pre-Zen2
123+
* CPUs, as such CPUs don't clear the base when loading a null
124+
* selector.
125+
*/
126+
unsigned short sel;
127+
asm volatile ("mov %%ss, %0\n\t"
128+
"mov %0, %" SEG
129+
: "=rm" (sel));
130+
131+
pid_t pid = getpid(), tid = syscall(SYS_gettid);
132+
133+
printf("\tTracee is going back to sleep\n");
134+
syscall(SYS_tgkill, pid, tid, SIGSTOP);
135+
136+
/* Should not get here. */
137+
while (true) {
138+
printf("[FAIL]\tTracee hit unreachable code\n");
139+
pause();
140+
}
141+
}
142+
143+
int main()
144+
{
145+
printf("\tSetting up a segment\n");
146+
init_seg();
147+
148+
unsigned int val = dereference_seg_base();
149+
if (val != EXPECTED_VALUE) {
150+
printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
151+
return 1;
152+
}
153+
printf("[OK]\tThe segment points to the right place.\n");
154+
155+
pid_t chld = fork();
156+
if (chld < 0)
157+
err(1, "fork");
158+
159+
if (chld == 0) {
160+
prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0);
161+
162+
if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
163+
err(1, "PTRACE_TRACEME");
164+
165+
pid_t pid = getpid(), tid = syscall(SYS_gettid);
166+
167+
printf("\tTracee will take a nap until signaled\n");
168+
syscall(SYS_tgkill, pid, tid, SIGSTOP);
169+
170+
printf("\tTracee was resumed. Will re-check segment.\n");
171+
172+
val = dereference_seg_base();
173+
if (val != EXPECTED_VALUE) {
174+
printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
175+
exit(1);
176+
}
177+
178+
printf("[OK]\tThe segment points to the right place.\n");
179+
exit(0);
180+
}
181+
182+
int status;
183+
184+
/* Wait for SIGSTOP. */
185+
if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
186+
err(1, "waitpid");
187+
188+
struct user_regs_struct regs;
189+
190+
if (ptrace(PTRACE_GETREGS, chld, NULL, &regs) != 0)
191+
err(1, "PTRACE_GETREGS");
192+
193+
#ifdef __x86_64__
194+
printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base);
195+
#else
196+
printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs);
197+
#endif
198+
199+
struct user_regs_struct regs2 = regs;
200+
#ifdef __x86_64__
201+
regs2.rip = (unsigned long)tracee_zap_segment;
202+
regs2.rsp -= 128; /* Don't clobber the redzone. */
203+
#else
204+
regs2.eip = (unsigned long)tracee_zap_segment;
205+
#endif
206+
207+
printf("\tTracer: redirecting tracee to tracee_zap_segment()\n");
208+
if (ptrace(PTRACE_SETREGS, chld, NULL, &regs2) != 0)
209+
err(1, "PTRACE_GETREGS");
210+
if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0)
211+
err(1, "PTRACE_GETREGS");
212+
213+
/* Wait for SIGSTOP. */
214+
if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
215+
err(1, "waitpid");
216+
217+
printf("\tTracer: restoring tracee state\n");
218+
if (ptrace(PTRACE_SETREGS, chld, NULL, &regs) != 0)
219+
err(1, "PTRACE_GETREGS");
220+
if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0)
221+
err(1, "PTRACE_GETREGS");
222+
223+
/* Wait for SIGSTOP. */
224+
if (waitpid(chld, &status, 0) != chld)
225+
err(1, "waitpid");
226+
227+
if (WIFSIGNALED(status)) {
228+
printf("[FAIL]\tTracee crashed\n");
229+
return 1;
230+
}
231+
232+
if (!WIFEXITED(status)) {
233+
printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status);
234+
return 1;
235+
}
236+
237+
int exitcode = WEXITSTATUS(status);
238+
if (exitcode != 0) {
239+
printf("[FAIL]\tTracee reported failure\n");
240+
return 1;
241+
}
242+
243+
printf("[OK]\tAll is well.\n");
244+
return 0;
245+
}

0 commit comments

Comments
 (0)