Skip to content

Commit 05c6ca8

Browse files
committed
Merge tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 fixes from Thomas Gleixner: - Make RESERVE_BRK() work again with older binutils. The recent 'simplification' broke that. - Make early #VE handling increment RIP when successful. - Make the #VE code consistent vs. the RIP adjustments and add comments. - Handle load_unaligned_zeropad() across page boundaries correctly in #VE when the second page is shared. * tag 'x86-urgent-2022-06-19' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/tdx: Handle load_unaligned_zeropad() page-cross to a shared page x86/tdx: Clarify RIP adjustments in #VE handler x86/tdx: Fix early #VE handling x86/mm: Fix RESERVE_BRK() for older binutils
2 parents 5d770f1 + 1e77696 commit 05c6ca8

File tree

4 files changed

+159
-75
lines changed

4 files changed

+159
-75
lines changed

arch/x86/coco/tdx/tdx.c

Lines changed: 136 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
124124
return BIT_ULL(gpa_width - 1);
125125
}
126126

127+
/*
128+
* The TDX module spec states that #VE may be injected for a limited set of
129+
* reasons:
130+
*
131+
* - Emulation of the architectural #VE injection on EPT violation;
132+
*
133+
* - As a result of guest TD execution of a disallowed instruction,
134+
* a disallowed MSR access, or CPUID virtualization;
135+
*
136+
* - A notification to the guest TD about anomalous behavior;
137+
*
138+
* The last one is opt-in and is not used by the kernel.
139+
*
140+
* The Intel Software Developer's Manual describes cases when instruction
141+
* length field can be used in section "Information for VM Exits Due to
142+
* Instruction Execution".
143+
*
144+
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
145+
* information if #VE occurred due to instruction execution, but not for EPT
146+
* violations.
147+
*/
148+
static int ve_instr_len(struct ve_info *ve)
149+
{
150+
switch (ve->exit_reason) {
151+
case EXIT_REASON_HLT:
152+
case EXIT_REASON_MSR_READ:
153+
case EXIT_REASON_MSR_WRITE:
154+
case EXIT_REASON_CPUID:
155+
case EXIT_REASON_IO_INSTRUCTION:
156+
/* It is safe to use ve->instr_len for #VE due instructions */
157+
return ve->instr_len;
158+
case EXIT_REASON_EPT_VIOLATION:
159+
/*
160+
* For EPT violations, ve->insn_len is not defined. For those,
161+
* the kernel must decode instructions manually and should not
162+
* be using this function.
163+
*/
164+
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
165+
return 0;
166+
default:
167+
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
168+
return ve->instr_len;
169+
}
170+
}
171+
127172
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
128173
{
129174
struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
147192
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
148193
}
149194

150-
static bool handle_halt(void)
195+
static int handle_halt(struct ve_info *ve)
151196
{
152197
/*
153198
* Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
158203
const bool do_sti = false;
159204

160205
if (__halt(irq_disabled, do_sti))
161-
return false;
206+
return -EIO;
162207

163-
return true;
208+
return ve_instr_len(ve);
164209
}
165210

166211
void __cpuidle tdx_safe_halt(void)
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
180225
WARN_ONCE(1, "HLT instruction emulation failed\n");
181226
}
182227

183-
static bool read_msr(struct pt_regs *regs)
228+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
184229
{
185230
struct tdx_hypercall_args args = {
186231
.r10 = TDX_HYPERCALL_STANDARD,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
194239
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
195240
*/
196241
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
197-
return false;
242+
return -EIO;
198243

199244
regs->ax = lower_32_bits(args.r11);
200245
regs->dx = upper_32_bits(args.r11);
201-
return true;
246+
return ve_instr_len(ve);
202247
}
203248

204-
static bool write_msr(struct pt_regs *regs)
249+
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
205250
{
206251
struct tdx_hypercall_args args = {
207252
.r10 = TDX_HYPERCALL_STANDARD,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
215260
* can be found in TDX Guest-Host-Communication Interface
216261
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
217262
*/
218-
return !__tdx_hypercall(&args, 0);
263+
if (__tdx_hypercall(&args, 0))
264+
return -EIO;
265+
266+
return ve_instr_len(ve);
219267
}
220268

221-
static bool handle_cpuid(struct pt_regs *regs)
269+
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
222270
{
223271
struct tdx_hypercall_args args = {
224272
.r10 = TDX_HYPERCALL_STANDARD,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
236284
*/
237285
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
238286
regs->ax = regs->bx = regs->cx = regs->dx = 0;
239-
return true;
287+
return ve_instr_len(ve);
240288
}
241289

242290
/*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
245293
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
246294
*/
247295
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
248-
return false;
296+
return -EIO;
249297

250298
/*
251299
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
257305
regs->cx = args.r14;
258306
regs->dx = args.r15;
259307

260-
return true;
308+
return ve_instr_len(ve);
261309
}
262310

263311
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
@@ -283,45 +331,60 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
283331
EPT_WRITE, addr, val);
284332
}
285333

286-
static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334+
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
287335
{
336+
unsigned long *reg, val, vaddr;
288337
char buffer[MAX_INSN_SIZE];
289-
unsigned long *reg, val;
290338
struct insn insn = {};
291339
enum mmio_type mmio;
292340
int size, extend_size;
293341
u8 extend_val = 0;
294342

295343
/* Only in-kernel MMIO is supported */
296344
if (WARN_ON_ONCE(user_mode(regs)))
297-
return false;
345+
return -EFAULT;
298346

299347
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
300-
return false;
348+
return -EFAULT;
301349

302350
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
303-
return false;
351+
return -EINVAL;
304352

305353
mmio = insn_decode_mmio(&insn, &size);
306354
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
307-
return false;
355+
return -EINVAL;
308356

309357
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
310358
reg = insn_get_modrm_reg_ptr(&insn, regs);
311359
if (!reg)
312-
return false;
360+
return -EINVAL;
313361
}
314362

315-
ve->instr_len = insn.length;
363+
/*
364+
* Reject EPT violation #VEs that split pages.
365+
*
366+
* MMIO accesses are supposed to be naturally aligned and therefore
367+
* never cross page boundaries. Seeing split page accesses indicates
368+
* a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
369+
*
370+
* load_unaligned_zeropad() will recover using exception fixups.
371+
*/
372+
vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
373+
if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
374+
return -EFAULT;
316375

317376
/* Handle writes first */
318377
switch (mmio) {
319378
case MMIO_WRITE:
320379
memcpy(&val, reg, size);
321-
return mmio_write(size, ve->gpa, val);
380+
if (!mmio_write(size, ve->gpa, val))
381+
return -EIO;
382+
return insn.length;
322383
case MMIO_WRITE_IMM:
323384
val = insn.immediate.value;
324-
return mmio_write(size, ve->gpa, val);
385+
if (!mmio_write(size, ve->gpa, val))
386+
return -EIO;
387+
return insn.length;
325388
case MMIO_READ:
326389
case MMIO_READ_ZERO_EXTEND:
327390
case MMIO_READ_SIGN_EXTEND:
@@ -334,15 +397,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334397
* decoded or handled properly. It was likely not using io.h
335398
* helpers or accessed MMIO accidentally.
336399
*/
337-
return false;
400+
return -EINVAL;
338401
default:
339402
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
340-
return false;
403+
return -EINVAL;
341404
}
342405

343406
/* Handle reads */
344407
if (!mmio_read(size, ve->gpa, &val))
345-
return false;
408+
return -EIO;
346409

347410
switch (mmio) {
348411
case MMIO_READ:
@@ -364,13 +427,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
364427
default:
365428
/* All other cases has to be covered with the first switch() */
366429
WARN_ON_ONCE(1);
367-
return false;
430+
return -EINVAL;
368431
}
369432

370433
if (extend_size)
371434
memset(reg, extend_val, extend_size);
372435
memcpy(reg, &val, size);
373-
return true;
436+
return insn.length;
374437
}
375438

376439
static bool handle_in(struct pt_regs *regs, int size, int port)
@@ -421,23 +484,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
421484
*
422485
* Return True on success or False on failure.
423486
*/
424-
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
487+
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
425488
{
489+
u32 exit_qual = ve->exit_qual;
426490
int size, port;
427-
bool in;
491+
bool in, ret;
428492

429493
if (VE_IS_IO_STRING(exit_qual))
430-
return false;
494+
return -EIO;
431495

432496
in = VE_IS_IO_IN(exit_qual);
433497
size = VE_GET_IO_SIZE(exit_qual);
434498
port = VE_GET_PORT_NUM(exit_qual);
435499

436500

437501
if (in)
438-
return handle_in(regs, size, port);
502+
ret = handle_in(regs, size, port);
439503
else
440-
return handle_out(regs, size, port);
504+
ret = handle_out(regs, size, port);
505+
if (!ret)
506+
return -EIO;
507+
508+
return ve_instr_len(ve);
441509
}
442510

443511
/*
@@ -447,13 +515,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
447515
__init bool tdx_early_handle_ve(struct pt_regs *regs)
448516
{
449517
struct ve_info ve;
518+
int insn_len;
450519

451520
tdx_get_ve_info(&ve);
452521

453522
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
454523
return false;
455524

456-
return handle_io(regs, ve.exit_qual);
525+
insn_len = handle_io(regs, &ve);
526+
if (insn_len < 0)
527+
return false;
528+
529+
regs->ip += insn_len;
530+
return true;
457531
}
458532

459533
void tdx_get_ve_info(struct ve_info *ve)
@@ -486,54 +560,65 @@ void tdx_get_ve_info(struct ve_info *ve)
486560
ve->instr_info = upper_32_bits(out.r10);
487561
}
488562

489-
/* Handle the user initiated #VE */
490-
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
563+
/*
564+
* Handle the user initiated #VE.
565+
*
566+
* On success, returns the number of bytes RIP should be incremented (>=0)
567+
* or -errno on error.
568+
*/
569+
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
491570
{
492571
switch (ve->exit_reason) {
493572
case EXIT_REASON_CPUID:
494-
return handle_cpuid(regs);
573+
return handle_cpuid(regs, ve);
495574
default:
496575
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
497-
return false;
576+
return -EIO;
498577
}
499578
}
500579

501-
/* Handle the kernel #VE */
502-
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
580+
/*
581+
* Handle the kernel #VE.
582+
*
583+
* On success, returns the number of bytes RIP should be incremented (>=0)
584+
* or -errno on error.
585+
*/
586+
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
503587
{
504588
switch (ve->exit_reason) {
505589
case EXIT_REASON_HLT:
506-
return handle_halt();
590+
return handle_halt(ve);
507591
case EXIT_REASON_MSR_READ:
508-
return read_msr(regs);
592+
return read_msr(regs, ve);
509593
case EXIT_REASON_MSR_WRITE:
510-
return write_msr(regs);
594+
return write_msr(regs, ve);
511595
case EXIT_REASON_CPUID:
512-
return handle_cpuid(regs);
596+
return handle_cpuid(regs, ve);
513597
case EXIT_REASON_EPT_VIOLATION:
514598
return handle_mmio(regs, ve);
515599
case EXIT_REASON_IO_INSTRUCTION:
516-
return handle_io(regs, ve->exit_qual);
600+
return handle_io(regs, ve);
517601
default:
518602
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
519-
return false;
603+
return -EIO;
520604
}
521605
}
522606

523607
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
524608
{
525-
bool ret;
609+
int insn_len;
526610

527611
if (user_mode(regs))
528-
ret = virt_exception_user(regs, ve);
612+
insn_len = virt_exception_user(regs, ve);
529613
else
530-
ret = virt_exception_kernel(regs, ve);
614+
insn_len = virt_exception_kernel(regs, ve);
615+
if (insn_len < 0)
616+
return false;
531617

532618
/* After successful #VE handling, move the IP */
533-
if (ret)
534-
regs->ip += ve->instr_len;
619+
regs->ip += insn_len;
535620

536-
return ret;
621+
return true;
537622
}
538623

539624
static bool tdx_tlb_flush_required(bool private)

0 commit comments

Comments
 (0)