Skip to content

Commit cdd8578

Browse files
kirylhansendc
authored andcommitted
x86/tdx: Clarify RIP adjustments in #VE handler
After successful #VE handling, tdx_handle_virt_exception() has to move RIP to the next instruction. The handler needs to know the length of the instruction. If the #VE happened due to instruction execution, the GET_VEINFO TDX module call provides info on the instruction in R10, including its length. For #VE due to EPT violation, the info in R10 is not populand and the kernel must decode the instruction manually to find out its length. Restructure the code to make it explicit that the instruction length depends on the type of #VE. Make individual #VE handlers return the instruction length on success or -errno on failure. [ dhansen: fix up changelog and comments ] Suggested-by: Dave Hansen <dave.hansen@intel.com> Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> Link: https://lkml.kernel.org/r/20220614120135.14812-3-kirill.shutemov@linux.intel.com
1 parent 60428d8 commit cdd8578

File tree

1 file changed

+123
-55
lines changed

1 file changed

+123
-55
lines changed

arch/x86/coco/tdx/tdx.c

Lines changed: 123 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
124124
return BIT_ULL(gpa_width - 1);
125125
}
126126

127+
/*
128+
* The TDX module spec states that #VE may be injected for a limited set of
129+
* reasons:
130+
*
131+
* - Emulation of the architectural #VE injection on EPT violation;
132+
*
133+
* - As a result of guest TD execution of a disallowed instruction,
134+
* a disallowed MSR access, or CPUID virtualization;
135+
*
136+
* - A notification to the guest TD about anomalous behavior;
137+
*
138+
* The last one is opt-in and is not used by the kernel.
139+
*
140+
* The Intel Software Developer's Manual describes cases when instruction
141+
* length field can be used in section "Information for VM Exits Due to
142+
* Instruction Execution".
143+
*
144+
* For TDX, it ultimately means GET_VEINFO provides reliable instruction length
145+
* information if #VE occurred due to instruction execution, but not for EPT
146+
* violations.
147+
*/
148+
static int ve_instr_len(struct ve_info *ve)
149+
{
150+
switch (ve->exit_reason) {
151+
case EXIT_REASON_HLT:
152+
case EXIT_REASON_MSR_READ:
153+
case EXIT_REASON_MSR_WRITE:
154+
case EXIT_REASON_CPUID:
155+
case EXIT_REASON_IO_INSTRUCTION:
156+
/* It is safe to use ve->instr_len for #VE due instructions */
157+
return ve->instr_len;
158+
case EXIT_REASON_EPT_VIOLATION:
159+
/*
160+
* For EPT violations, ve->insn_len is not defined. For those,
161+
* the kernel must decode instructions manually and should not
162+
* be using this function.
163+
*/
164+
WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
165+
return 0;
166+
default:
167+
WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
168+
return ve->instr_len;
169+
}
170+
}
171+
127172
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
128173
{
129174
struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
147192
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
148193
}
149194

150-
static bool handle_halt(void)
195+
static int handle_halt(struct ve_info *ve)
151196
{
152197
/*
153198
* Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
158203
const bool do_sti = false;
159204

160205
if (__halt(irq_disabled, do_sti))
161-
return false;
206+
return -EIO;
162207

163-
return true;
208+
return ve_instr_len(ve);
164209
}
165210

166211
void __cpuidle tdx_safe_halt(void)
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
180225
WARN_ONCE(1, "HLT instruction emulation failed\n");
181226
}
182227

183-
static bool read_msr(struct pt_regs *regs)
228+
static int read_msr(struct pt_regs *regs, struct ve_info *ve)
184229
{
185230
struct tdx_hypercall_args args = {
186231
.r10 = TDX_HYPERCALL_STANDARD,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
194239
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
195240
*/
196241
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
197-
return false;
242+
return -EIO;
198243

199244
regs->ax = lower_32_bits(args.r11);
200245
regs->dx = upper_32_bits(args.r11);
201-
return true;
246+
return ve_instr_len(ve);
202247
}
203248

204-
static bool write_msr(struct pt_regs *regs)
249+
static int write_msr(struct pt_regs *regs, struct ve_info *ve)
205250
{
206251
struct tdx_hypercall_args args = {
207252
.r10 = TDX_HYPERCALL_STANDARD,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
215260
* can be found in TDX Guest-Host-Communication Interface
216261
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
217262
*/
218-
return !__tdx_hypercall(&args, 0);
263+
if (__tdx_hypercall(&args, 0))
264+
return -EIO;
265+
266+
return ve_instr_len(ve);
219267
}
220268

221-
static bool handle_cpuid(struct pt_regs *regs)
269+
static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
222270
{
223271
struct tdx_hypercall_args args = {
224272
.r10 = TDX_HYPERCALL_STANDARD,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
236284
*/
237285
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
238286
regs->ax = regs->bx = regs->cx = regs->dx = 0;
239-
return true;
287+
return ve_instr_len(ve);
240288
}
241289

242290
/*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
245293
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
246294
*/
247295
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
248-
return false;
296+
return -EIO;
249297

250298
/*
251299
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
257305
regs->cx = args.r14;
258306
regs->dx = args.r15;
259307

260-
return true;
308+
return ve_instr_len(ve);
261309
}
262310

263311
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
@@ -283,7 +331,7 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
283331
EPT_WRITE, addr, val);
284332
}
285333

286-
static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334+
static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
287335
{
288336
char buffer[MAX_INSN_SIZE];
289337
unsigned long *reg, val;
@@ -294,34 +342,36 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
294342

295343
/* Only in-kernel MMIO is supported */
296344
if (WARN_ON_ONCE(user_mode(regs)))
297-
return false;
345+
return -EFAULT;
298346

299347
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
300-
return false;
348+
return -EFAULT;
301349

302350
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
303-
return false;
351+
return -EINVAL;
304352

305353
mmio = insn_decode_mmio(&insn, &size);
306354
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
307-
return false;
355+
return -EINVAL;
308356

309357
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
310358
reg = insn_get_modrm_reg_ptr(&insn, regs);
311359
if (!reg)
312-
return false;
360+
return -EINVAL;
313361
}
314362

315-
ve->instr_len = insn.length;
316-
317363
/* Handle writes first */
318364
switch (mmio) {
319365
case MMIO_WRITE:
320366
memcpy(&val, reg, size);
321-
return mmio_write(size, ve->gpa, val);
367+
if (!mmio_write(size, ve->gpa, val))
368+
return -EIO;
369+
return insn.length;
322370
case MMIO_WRITE_IMM:
323371
val = insn.immediate.value;
324-
return mmio_write(size, ve->gpa, val);
372+
if (!mmio_write(size, ve->gpa, val))
373+
return -EIO;
374+
return insn.length;
325375
case MMIO_READ:
326376
case MMIO_READ_ZERO_EXTEND:
327377
case MMIO_READ_SIGN_EXTEND:
@@ -334,15 +384,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334384
* decoded or handled properly. It was likely not using io.h
335385
* helpers or accessed MMIO accidentally.
336386
*/
337-
return false;
387+
return -EINVAL;
338388
default:
339389
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
340-
return false;
390+
return -EINVAL;
341391
}
342392

343393
/* Handle reads */
344394
if (!mmio_read(size, ve->gpa, &val))
345-
return false;
395+
return -EIO;
346396

347397
switch (mmio) {
348398
case MMIO_READ:
@@ -364,13 +414,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
364414
default:
365415
/* All other cases has to be covered with the first switch() */
366416
WARN_ON_ONCE(1);
367-
return false;
417+
return -EINVAL;
368418
}
369419

370420
if (extend_size)
371421
memset(reg, extend_val, extend_size);
372422
memcpy(reg, &val, size);
373-
return true;
423+
return insn.length;
374424
}
375425

376426
static bool handle_in(struct pt_regs *regs, int size, int port)
@@ -421,23 +471,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
421471
*
422472
* Return True on success or False on failure.
423473
*/
424-
static bool handle_io(struct pt_regs *regs, u32 exit_qual)
474+
static int handle_io(struct pt_regs *regs, struct ve_info *ve)
425475
{
476+
u32 exit_qual = ve->exit_qual;
426477
int size, port;
427-
bool in;
478+
bool in, ret;
428479

429480
if (VE_IS_IO_STRING(exit_qual))
430-
return false;
481+
return -EIO;
431482

432483
in = VE_IS_IO_IN(exit_qual);
433484
size = VE_GET_IO_SIZE(exit_qual);
434485
port = VE_GET_PORT_NUM(exit_qual);
435486

436487

437488
if (in)
438-
return handle_in(regs, size, port);
489+
ret = handle_in(regs, size, port);
439490
else
440-
return handle_out(regs, size, port);
491+
ret = handle_out(regs, size, port);
492+
if (!ret)
493+
return -EIO;
494+
495+
return ve_instr_len(ve);
441496
}
442497

443498
/*
@@ -447,17 +502,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
447502
__init bool tdx_early_handle_ve(struct pt_regs *regs)
448503
{
449504
struct ve_info ve;
450-
bool ret;
505+
int insn_len;
451506

452507
tdx_get_ve_info(&ve);
453508

454509
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
455510
return false;
456511

457-
ret = handle_io(regs, ve.exit_qual);
458-
if (ret)
459-
regs->ip += ve.instr_len;
460-
return ret;
512+
insn_len = handle_io(regs, &ve);
513+
if (insn_len < 0)
514+
return false;
515+
516+
regs->ip += insn_len;
517+
return true;
461518
}
462519

463520
void tdx_get_ve_info(struct ve_info *ve)
@@ -490,54 +547,65 @@ void tdx_get_ve_info(struct ve_info *ve)
490547
ve->instr_info = upper_32_bits(out.r10);
491548
}
492549

493-
/* Handle the user initiated #VE */
494-
static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
550+
/*
551+
* Handle the user initiated #VE.
552+
*
553+
* On success, returns the number of bytes RIP should be incremented (>=0)
554+
* or -errno on error.
555+
*/
556+
static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
495557
{
496558
switch (ve->exit_reason) {
497559
case EXIT_REASON_CPUID:
498-
return handle_cpuid(regs);
560+
return handle_cpuid(regs, ve);
499561
default:
500562
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
501-
return false;
563+
return -EIO;
502564
}
503565
}
504566

505-
/* Handle the kernel #VE */
506-
static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
567+
/*
568+
* Handle the kernel #VE.
569+
*
570+
* On success, returns the number of bytes RIP should be incremented (>=0)
571+
* or -errno on error.
572+
*/
573+
static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
507574
{
508575
switch (ve->exit_reason) {
509576
case EXIT_REASON_HLT:
510-
return handle_halt();
577+
return handle_halt(ve);
511578
case EXIT_REASON_MSR_READ:
512-
return read_msr(regs);
579+
return read_msr(regs, ve);
513580
case EXIT_REASON_MSR_WRITE:
514-
return write_msr(regs);
581+
return write_msr(regs, ve);
515582
case EXIT_REASON_CPUID:
516-
return handle_cpuid(regs);
583+
return handle_cpuid(regs, ve);
517584
case EXIT_REASON_EPT_VIOLATION:
518585
return handle_mmio(regs, ve);
519586
case EXIT_REASON_IO_INSTRUCTION:
520-
return handle_io(regs, ve->exit_qual);
587+
return handle_io(regs, ve);
521588
default:
522589
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
523-
return false;
590+
return -EIO;
524591
}
525592
}
526593

527594
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
528595
{
529-
bool ret;
596+
int insn_len;
530597

531598
if (user_mode(regs))
532-
ret = virt_exception_user(regs, ve);
599+
insn_len = virt_exception_user(regs, ve);
533600
else
534-
ret = virt_exception_kernel(regs, ve);
601+
insn_len = virt_exception_kernel(regs, ve);
602+
if (insn_len < 0)
603+
return false;
535604

536605
/* After successful #VE handling, move the IP */
537-
if (ret)
538-
regs->ip += ve->instr_len;
606+
regs->ip += insn_len;
539607

540-
return ret;
608+
return true;
541609
}
542610

543611
static bool tdx_tlb_flush_required(bool private)

0 commit comments

Comments
 (0)