Skip to content

Commit 26a62b7

Browse files
aikmpe
authored andcommitted
KVM: PPC: Fix TCE handling for VFIO
The LoPAPR spec defines a guest visible IOMMU with a variable page size. Currently QEMU advertises 4K, 64K, 2M, 16MB pages, a Linux VM picks the biggest (16MB). In the case of a passed though PCI device, there is a hardware IOMMU which does not support all pages sizes from the above - P8 cannot do 2MB and P9 cannot do 16MB. So for each emulated 16M IOMMU page we may create several smaller mappings ("TCEs") in the hardware IOMMU. The code wrongly uses the emulated TCE index instead of hardware TCE index in error handling. The problem is easier to see on POWER8 with multi-level TCE tables (when only the first level is preallocated) as hash mode uses real mode TCE hypercalls handlers. The kernel starts using indirect tables when VMs get bigger than 128GB (depends on the max page order). The very first real mode hcall is going to fail with H_TOO_HARD as in the real mode we cannot allocate memory for TCEs (we can in the virtual mode) but on the way out the code attempts to clear hardware TCEs using emulated TCE indexes which corrupts random kernel memory because it_offset==1<<59 is subtracted from those indexes and the resulting index is out of the TCE table bounds. This fixes kvmppc_clear_tce() to use the correct TCE indexes. While at it, this fixes TCE cache invalidation which uses emulated TCE indexes instead of the hardware ones. This went unnoticed as 64bit DMA is used these days and VMs map all RAM in one go and only then do DMA and this is when the TCE cache gets populated. Potentially this could slow down mapping, however normally 16MB emulated pages are backed by 64K hardware pages so it is one write to the "TCE Kill" per 256 updates which is not that bad considering the size of the cache (1024 TCEs or so). Fixes: ca1fc48 ("KVM: PPC: Book3S: Allow backing bigger guest IOMMU pages with smaller physical pages") Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Tested-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Frederic Barrat <fbarrat@linux.ibm.com> Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au> Link: https://lore.kernel.org/r/20220420050840.328223-1-aik@ozlabs.ru
1 parent d2b9be1 commit 26a62b7

File tree

2 files changed

+45
-44
lines changed

2 files changed

+45
-44
lines changed

arch/powerpc/kvm/book3s_64_vio.c

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -420,13 +420,19 @@ static void kvmppc_tce_put(struct kvmppc_spapr_tce_table *stt,
420420
tbl[idx % TCES_PER_PAGE] = tce;
421421
}
422422

423-
static void kvmppc_clear_tce(struct mm_struct *mm, struct iommu_table *tbl,
424-
unsigned long entry)
423+
static void kvmppc_clear_tce(struct mm_struct *mm, struct kvmppc_spapr_tce_table *stt,
424+
struct iommu_table *tbl, unsigned long entry)
425425
{
426-
unsigned long hpa = 0;
427-
enum dma_data_direction dir = DMA_NONE;
426+
unsigned long i;
427+
unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
428+
unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
429+
430+
for (i = 0; i < subpages; ++i) {
431+
unsigned long hpa = 0;
432+
enum dma_data_direction dir = DMA_NONE;
428433

429-
iommu_tce_xchg_no_kill(mm, tbl, entry, &hpa, &dir);
434+
iommu_tce_xchg_no_kill(mm, tbl, io_entry + i, &hpa, &dir);
435+
}
430436
}
431437

432438
static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -485,6 +491,8 @@ static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
485491
break;
486492
}
487493

494+
iommu_tce_kill(tbl, io_entry, subpages);
495+
488496
return ret;
489497
}
490498

@@ -544,6 +552,8 @@ static long kvmppc_tce_iommu_map(struct kvm *kvm,
544552
break;
545553
}
546554

555+
iommu_tce_kill(tbl, io_entry, subpages);
556+
547557
return ret;
548558
}
549559

@@ -590,10 +600,9 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
590600
ret = kvmppc_tce_iommu_map(vcpu->kvm, stt, stit->tbl,
591601
entry, ua, dir);
592602

593-
iommu_tce_kill(stit->tbl, entry, 1);
594603

595604
if (ret != H_SUCCESS) {
596-
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
605+
kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry);
597606
goto unlock_exit;
598607
}
599608
}
@@ -669,13 +678,13 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
669678
*/
670679
if (get_user(tce, tces + i)) {
671680
ret = H_TOO_HARD;
672-
goto invalidate_exit;
681+
goto unlock_exit;
673682
}
674683
tce = be64_to_cpu(tce);
675684

676685
if (kvmppc_tce_to_ua(vcpu->kvm, tce, &ua)) {
677686
ret = H_PARAMETER;
678-
goto invalidate_exit;
687+
goto unlock_exit;
679688
}
680689

681690
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -684,19 +693,15 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
684693
iommu_tce_direction(tce));
685694

686695
if (ret != H_SUCCESS) {
687-
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl,
688-
entry);
689-
goto invalidate_exit;
696+
kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl,
697+
entry + i);
698+
goto unlock_exit;
690699
}
691700
}
692701

693702
kvmppc_tce_put(stt, entry + i, tce);
694703
}
695704

696-
invalidate_exit:
697-
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
698-
iommu_tce_kill(stit->tbl, entry, npages);
699-
700705
unlock_exit:
701706
srcu_read_unlock(&vcpu->kvm->srcu, idx);
702707

@@ -735,20 +740,16 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
735740
continue;
736741

737742
if (ret == H_TOO_HARD)
738-
goto invalidate_exit;
743+
return ret;
739744

740745
WARN_ON_ONCE(1);
741-
kvmppc_clear_tce(vcpu->kvm->mm, stit->tbl, entry);
746+
kvmppc_clear_tce(vcpu->kvm->mm, stt, stit->tbl, entry + i);
742747
}
743748
}
744749

745750
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
746751
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
747752

748-
invalidate_exit:
749-
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
750-
iommu_tce_kill(stit->tbl, ioba >> stt->page_shift, npages);
751-
752753
return ret;
753754
}
754755
EXPORT_SYMBOL_GPL(kvmppc_h_stuff_tce);

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -247,13 +247,19 @@ static void iommu_tce_kill_rm(struct iommu_table *tbl,
247247
tbl->it_ops->tce_kill(tbl, entry, pages, true);
248248
}
249249

250-
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct iommu_table *tbl,
251-
unsigned long entry)
250+
static void kvmppc_rm_clear_tce(struct kvm *kvm, struct kvmppc_spapr_tce_table *stt,
251+
struct iommu_table *tbl, unsigned long entry)
252252
{
253-
unsigned long hpa = 0;
254-
enum dma_data_direction dir = DMA_NONE;
253+
unsigned long i;
254+
unsigned long subpages = 1ULL << (stt->page_shift - tbl->it_page_shift);
255+
unsigned long io_entry = entry << (stt->page_shift - tbl->it_page_shift);
256+
257+
for (i = 0; i < subpages; ++i) {
258+
unsigned long hpa = 0;
259+
enum dma_data_direction dir = DMA_NONE;
255260

256-
iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, entry, &hpa, &dir);
261+
iommu_tce_xchg_no_kill_rm(kvm->mm, tbl, io_entry + i, &hpa, &dir);
262+
}
257263
}
258264

259265
static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
@@ -316,6 +322,8 @@ static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
316322
break;
317323
}
318324

325+
iommu_tce_kill_rm(tbl, io_entry, subpages);
326+
319327
return ret;
320328
}
321329

@@ -379,6 +387,8 @@ static long kvmppc_rm_tce_iommu_map(struct kvm *kvm,
379387
break;
380388
}
381389

390+
iommu_tce_kill_rm(tbl, io_entry, subpages);
391+
382392
return ret;
383393
}
384394

@@ -420,10 +430,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
420430
ret = kvmppc_rm_tce_iommu_map(vcpu->kvm, stt,
421431
stit->tbl, entry, ua, dir);
422432

423-
iommu_tce_kill_rm(stit->tbl, entry, 1);
424-
425433
if (ret != H_SUCCESS) {
426-
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
434+
kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry);
427435
return ret;
428436
}
429437
}
@@ -561,7 +569,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
561569
ua = 0;
562570
if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) {
563571
ret = H_PARAMETER;
564-
goto invalidate_exit;
572+
goto unlock_exit;
565573
}
566574

567575
list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
@@ -570,19 +578,15 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
570578
iommu_tce_direction(tce));
571579

572580
if (ret != H_SUCCESS) {
573-
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl,
574-
entry);
575-
goto invalidate_exit;
581+
kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl,
582+
entry + i);
583+
goto unlock_exit;
576584
}
577585
}
578586

579587
kvmppc_rm_tce_put(stt, entry + i, tce);
580588
}
581589

582-
invalidate_exit:
583-
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
584-
iommu_tce_kill_rm(stit->tbl, entry, npages);
585-
586590
unlock_exit:
587591
if (!prereg)
588592
arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock);
@@ -620,20 +624,16 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
620624
continue;
621625

622626
if (ret == H_TOO_HARD)
623-
goto invalidate_exit;
627+
return ret;
624628

625629
WARN_ON_ONCE_RM(1);
626-
kvmppc_rm_clear_tce(vcpu->kvm, stit->tbl, entry);
630+
kvmppc_rm_clear_tce(vcpu->kvm, stt, stit->tbl, entry + i);
627631
}
628632
}
629633

630634
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
631635
kvmppc_rm_tce_put(stt, ioba >> stt->page_shift, tce_value);
632636

633-
invalidate_exit:
634-
list_for_each_entry_lockless(stit, &stt->iommu_tables, next)
635-
iommu_tce_kill_rm(stit->tbl, ioba >> stt->page_shift, npages);
636-
637637
return ret;
638638
}
639639

0 commit comments

Comments
 (0)