@@ -124,6 +124,51 @@ static u64 get_cc_mask(void)
124
124
return BIT_ULL (gpa_width - 1 );
125
125
}
126
126
127
+ /*
128
+ * The TDX module spec states that #VE may be injected for a limited set of
129
+ * reasons:
130
+ *
131
+ * - Emulation of the architectural #VE injection on EPT violation;
132
+ *
133
+ * - As a result of guest TD execution of a disallowed instruction,
134
+ * a disallowed MSR access, or CPUID virtualization;
135
+ *
136
+ * - A notification to the guest TD about anomalous behavior;
137
+ *
138
+ * The last one is opt-in and is not used by the kernel.
139
+ *
140
+ * The Intel Software Developer's Manual describes cases when instruction
141
+ * length field can be used in section "Information for VM Exits Due to
142
+ * Instruction Execution".
143
+ *
144
+ * For TDX, it ultimately means GET_VEINFO provides reliable instruction length
145
+ * information if #VE occurred due to instruction execution, but not for EPT
146
+ * violations.
147
+ */
148
+ static int ve_instr_len (struct ve_info * ve )
149
+ {
150
+ switch (ve -> exit_reason ) {
151
+ case EXIT_REASON_HLT :
152
+ case EXIT_REASON_MSR_READ :
153
+ case EXIT_REASON_MSR_WRITE :
154
+ case EXIT_REASON_CPUID :
155
+ case EXIT_REASON_IO_INSTRUCTION :
156
+ /* It is safe to use ve->instr_len for #VE due instructions */
157
+ return ve -> instr_len ;
158
+ case EXIT_REASON_EPT_VIOLATION :
159
+ /*
160
+ * For EPT violations, ve->insn_len is not defined. For those,
161
+ * the kernel must decode instructions manually and should not
162
+ * be using this function.
163
+ */
164
+ WARN_ONCE (1 , "ve->instr_len is not defined for EPT violations" );
165
+ return 0 ;
166
+ default :
167
+ WARN_ONCE (1 , "Unexpected #VE-type: %lld\n" , ve -> exit_reason );
168
+ return ve -> instr_len ;
169
+ }
170
+ }
171
+
127
172
static u64 __cpuidle __halt (const bool irq_disabled , const bool do_sti )
128
173
{
129
174
struct tdx_hypercall_args args = {
@@ -147,7 +192,7 @@ static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
147
192
return __tdx_hypercall (& args , do_sti ? TDX_HCALL_ISSUE_STI : 0 );
148
193
}
149
194
150
- static bool handle_halt (void )
195
+ static int handle_halt (struct ve_info * ve )
151
196
{
152
197
/*
153
198
* Since non safe halt is mainly used in CPU offlining
@@ -158,9 +203,9 @@ static bool handle_halt(void)
158
203
const bool do_sti = false;
159
204
160
205
if (__halt (irq_disabled , do_sti ))
161
- return false ;
206
+ return - EIO ;
162
207
163
- return true ;
208
+ return ve_instr_len ( ve ) ;
164
209
}
165
210
166
211
void __cpuidle tdx_safe_halt (void )
@@ -180,7 +225,7 @@ void __cpuidle tdx_safe_halt(void)
180
225
WARN_ONCE (1 , "HLT instruction emulation failed\n" );
181
226
}
182
227
183
- static bool read_msr (struct pt_regs * regs )
228
+ static int read_msr (struct pt_regs * regs , struct ve_info * ve )
184
229
{
185
230
struct tdx_hypercall_args args = {
186
231
.r10 = TDX_HYPERCALL_STANDARD ,
@@ -194,14 +239,14 @@ static bool read_msr(struct pt_regs *regs)
194
239
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
195
240
*/
196
241
if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
197
- return false ;
242
+ return - EIO ;
198
243
199
244
regs -> ax = lower_32_bits (args .r11 );
200
245
regs -> dx = upper_32_bits (args .r11 );
201
- return true ;
246
+ return ve_instr_len ( ve ) ;
202
247
}
203
248
204
- static bool write_msr (struct pt_regs * regs )
249
+ static int write_msr (struct pt_regs * regs , struct ve_info * ve )
205
250
{
206
251
struct tdx_hypercall_args args = {
207
252
.r10 = TDX_HYPERCALL_STANDARD ,
@@ -215,10 +260,13 @@ static bool write_msr(struct pt_regs *regs)
215
260
* can be found in TDX Guest-Host-Communication Interface
216
261
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
217
262
*/
218
- return !__tdx_hypercall (& args , 0 );
263
+ if (__tdx_hypercall (& args , 0 ))
264
+ return - EIO ;
265
+
266
+ return ve_instr_len (ve );
219
267
}
220
268
221
- static bool handle_cpuid (struct pt_regs * regs )
269
+ static int handle_cpuid (struct pt_regs * regs , struct ve_info * ve )
222
270
{
223
271
struct tdx_hypercall_args args = {
224
272
.r10 = TDX_HYPERCALL_STANDARD ,
@@ -236,7 +284,7 @@ static bool handle_cpuid(struct pt_regs *regs)
236
284
*/
237
285
if (regs -> ax < 0x40000000 || regs -> ax > 0x4FFFFFFF ) {
238
286
regs -> ax = regs -> bx = regs -> cx = regs -> dx = 0 ;
239
- return true ;
287
+ return ve_instr_len ( ve ) ;
240
288
}
241
289
242
290
/*
@@ -245,7 +293,7 @@ static bool handle_cpuid(struct pt_regs *regs)
245
293
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
246
294
*/
247
295
if (__tdx_hypercall (& args , TDX_HCALL_HAS_OUTPUT ))
248
- return false ;
296
+ return - EIO ;
249
297
250
298
/*
251
299
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
@@ -257,7 +305,7 @@ static bool handle_cpuid(struct pt_regs *regs)
257
305
regs -> cx = args .r14 ;
258
306
regs -> dx = args .r15 ;
259
307
260
- return true ;
308
+ return ve_instr_len ( ve ) ;
261
309
}
262
310
263
311
static bool mmio_read (int size , unsigned long addr , unsigned long * val )
@@ -283,7 +331,7 @@ static bool mmio_write(int size, unsigned long addr, unsigned long val)
283
331
EPT_WRITE , addr , val );
284
332
}
285
333
286
- static bool handle_mmio (struct pt_regs * regs , struct ve_info * ve )
334
+ static int handle_mmio (struct pt_regs * regs , struct ve_info * ve )
287
335
{
288
336
char buffer [MAX_INSN_SIZE ];
289
337
unsigned long * reg , val ;
@@ -294,34 +342,36 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
294
342
295
343
/* Only in-kernel MMIO is supported */
296
344
if (WARN_ON_ONCE (user_mode (regs )))
297
- return false ;
345
+ return - EFAULT ;
298
346
299
347
if (copy_from_kernel_nofault (buffer , (void * )regs -> ip , MAX_INSN_SIZE ))
300
- return false ;
348
+ return - EFAULT ;
301
349
302
350
if (insn_decode (& insn , buffer , MAX_INSN_SIZE , INSN_MODE_64 ))
303
- return false ;
351
+ return - EINVAL ;
304
352
305
353
mmio = insn_decode_mmio (& insn , & size );
306
354
if (WARN_ON_ONCE (mmio == MMIO_DECODE_FAILED ))
307
- return false ;
355
+ return - EINVAL ;
308
356
309
357
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS ) {
310
358
reg = insn_get_modrm_reg_ptr (& insn , regs );
311
359
if (!reg )
312
- return false ;
360
+ return - EINVAL ;
313
361
}
314
362
315
- ve -> instr_len = insn .length ;
316
-
317
363
/* Handle writes first */
318
364
switch (mmio ) {
319
365
case MMIO_WRITE :
320
366
memcpy (& val , reg , size );
321
- return mmio_write (size , ve -> gpa , val );
367
+ if (!mmio_write (size , ve -> gpa , val ))
368
+ return - EIO ;
369
+ return insn .length ;
322
370
case MMIO_WRITE_IMM :
323
371
val = insn .immediate .value ;
324
- return mmio_write (size , ve -> gpa , val );
372
+ if (!mmio_write (size , ve -> gpa , val ))
373
+ return - EIO ;
374
+ return insn .length ;
325
375
case MMIO_READ :
326
376
case MMIO_READ_ZERO_EXTEND :
327
377
case MMIO_READ_SIGN_EXTEND :
@@ -334,15 +384,15 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
334
384
* decoded or handled properly. It was likely not using io.h
335
385
* helpers or accessed MMIO accidentally.
336
386
*/
337
- return false ;
387
+ return - EINVAL ;
338
388
default :
339
389
WARN_ONCE (1 , "Unknown insn_decode_mmio() decode value?" );
340
- return false ;
390
+ return - EINVAL ;
341
391
}
342
392
343
393
/* Handle reads */
344
394
if (!mmio_read (size , ve -> gpa , & val ))
345
- return false ;
395
+ return - EIO ;
346
396
347
397
switch (mmio ) {
348
398
case MMIO_READ :
@@ -364,13 +414,13 @@ static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
364
414
default :
365
415
/* All other cases has to be covered with the first switch() */
366
416
WARN_ON_ONCE (1 );
367
- return false ;
417
+ return - EINVAL ;
368
418
}
369
419
370
420
if (extend_size )
371
421
memset (reg , extend_val , extend_size );
372
422
memcpy (reg , & val , size );
373
- return true ;
423
+ return insn . length ;
374
424
}
375
425
376
426
static bool handle_in (struct pt_regs * regs , int size , int port )
@@ -421,23 +471,28 @@ static bool handle_out(struct pt_regs *regs, int size, int port)
421
471
*
422
472
* Return True on success or False on failure.
423
473
*/
424
- static bool handle_io (struct pt_regs * regs , u32 exit_qual )
474
+ static int handle_io (struct pt_regs * regs , struct ve_info * ve )
425
475
{
476
+ u32 exit_qual = ve -> exit_qual ;
426
477
int size , port ;
427
- bool in ;
478
+ bool in , ret ;
428
479
429
480
if (VE_IS_IO_STRING (exit_qual ))
430
- return false ;
481
+ return - EIO ;
431
482
432
483
in = VE_IS_IO_IN (exit_qual );
433
484
size = VE_GET_IO_SIZE (exit_qual );
434
485
port = VE_GET_PORT_NUM (exit_qual );
435
486
436
487
437
488
if (in )
438
- return handle_in (regs , size , port );
489
+ ret = handle_in (regs , size , port );
439
490
else
440
- return handle_out (regs , size , port );
491
+ ret = handle_out (regs , size , port );
492
+ if (!ret )
493
+ return - EIO ;
494
+
495
+ return ve_instr_len (ve );
441
496
}
442
497
443
498
/*
@@ -447,17 +502,19 @@ static bool handle_io(struct pt_regs *regs, u32 exit_qual)
447
502
__init bool tdx_early_handle_ve (struct pt_regs * regs )
448
503
{
449
504
struct ve_info ve ;
450
- bool ret ;
505
+ int insn_len ;
451
506
452
507
tdx_get_ve_info (& ve );
453
508
454
509
if (ve .exit_reason != EXIT_REASON_IO_INSTRUCTION )
455
510
return false;
456
511
457
- ret = handle_io (regs , ve .exit_qual );
458
- if (ret )
459
- regs -> ip += ve .instr_len ;
460
- return ret ;
512
+ insn_len = handle_io (regs , & ve );
513
+ if (insn_len < 0 )
514
+ return false;
515
+
516
+ regs -> ip += insn_len ;
517
+ return true;
461
518
}
462
519
463
520
void tdx_get_ve_info (struct ve_info * ve )
@@ -490,54 +547,65 @@ void tdx_get_ve_info(struct ve_info *ve)
490
547
ve -> instr_info = upper_32_bits (out .r10 );
491
548
}
492
549
493
- /* Handle the user initiated #VE */
494
- static bool virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
550
+ /*
551
+ * Handle the user initiated #VE.
552
+ *
553
+ * On success, returns the number of bytes RIP should be incremented (>=0)
554
+ * or -errno on error.
555
+ */
556
+ static int virt_exception_user (struct pt_regs * regs , struct ve_info * ve )
495
557
{
496
558
switch (ve -> exit_reason ) {
497
559
case EXIT_REASON_CPUID :
498
- return handle_cpuid (regs );
560
+ return handle_cpuid (regs , ve );
499
561
default :
500
562
pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
501
- return false ;
563
+ return - EIO ;
502
564
}
503
565
}
504
566
505
- /* Handle the kernel #VE */
506
- static bool virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
567
+ /*
568
+ * Handle the kernel #VE.
569
+ *
570
+ * On success, returns the number of bytes RIP should be incremented (>=0)
571
+ * or -errno on error.
572
+ */
573
+ static int virt_exception_kernel (struct pt_regs * regs , struct ve_info * ve )
507
574
{
508
575
switch (ve -> exit_reason ) {
509
576
case EXIT_REASON_HLT :
510
- return handle_halt ();
577
+ return handle_halt (ve );
511
578
case EXIT_REASON_MSR_READ :
512
- return read_msr (regs );
579
+ return read_msr (regs , ve );
513
580
case EXIT_REASON_MSR_WRITE :
514
- return write_msr (regs );
581
+ return write_msr (regs , ve );
515
582
case EXIT_REASON_CPUID :
516
- return handle_cpuid (regs );
583
+ return handle_cpuid (regs , ve );
517
584
case EXIT_REASON_EPT_VIOLATION :
518
585
return handle_mmio (regs , ve );
519
586
case EXIT_REASON_IO_INSTRUCTION :
520
- return handle_io (regs , ve -> exit_qual );
587
+ return handle_io (regs , ve );
521
588
default :
522
589
pr_warn ("Unexpected #VE: %lld\n" , ve -> exit_reason );
523
- return false ;
590
+ return - EIO ;
524
591
}
525
592
}
526
593
527
594
bool tdx_handle_virt_exception (struct pt_regs * regs , struct ve_info * ve )
528
595
{
529
- bool ret ;
596
+ int insn_len ;
530
597
531
598
if (user_mode (regs ))
532
- ret = virt_exception_user (regs , ve );
599
+ insn_len = virt_exception_user (regs , ve );
533
600
else
534
- ret = virt_exception_kernel (regs , ve );
601
+ insn_len = virt_exception_kernel (regs , ve );
602
+ if (insn_len < 0 )
603
+ return false;
535
604
536
605
/* After successful #VE handling, move the IP */
537
- if (ret )
538
- regs -> ip += ve -> instr_len ;
606
+ regs -> ip += insn_len ;
539
607
540
- return ret ;
608
+ return true ;
541
609
}
542
610
543
611
static bool tdx_tlb_flush_required (bool private )
0 commit comments