Skip to content

Commit 9bd8d7d

Browse files
committed
Merge branch kvm-arm64/vfio-normal-nc into kvmarm/next
* kvm-arm64/vfio-normal-nc: : Normal-NC support for vfio-pci @ stage-2, courtesy of Ankit Agrawal : : KVM's policy to date has been that any and all MMIO mapping at stage-2 : is treated as Device-nGnRE. This is primarily done due to concerns of : the guest triggering uncontainable failures in the system if they manage : to tickle the device / memory system the wrong way, though this is : unnecessarily restrictive for devices that can be reasoned as 'safe'. : : Unsurprisingly, the Device-* mapping can really hurt the performance of : assigned devices that can handle Gathering, and can be an outright : correctness issue if the guest driver does unaligned accesses. : : Rather than opening the floodgates to the full ecosystem of devices that : can be exposed to VMs, take the conservative approach and allow PCI : devices to be mapped as Normal-NC since it has been determined to be : 'safe'. vfio: Convey kvm that the vfio-pci device is wc safe KVM: arm64: Set io memory s2 pte as normalnc for vfio pci device mm: Introduce new flag to indicate wc safe KVM: arm64: Introduce new flag for non-cacheable IO memory Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
2 parents 8dbc411 + a39d3a9 commit 9bd8d7d

File tree

6 files changed

+65
-10
lines changed

6 files changed

+65
-10
lines changed

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,7 @@ enum kvm_pgtable_stage2_flags {
197197
* @KVM_PGTABLE_PROT_W: Write permission.
198198
* @KVM_PGTABLE_PROT_R: Read permission.
199199
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
200+
* @KVM_PGTABLE_PROT_NORMAL_NC: Normal noncacheable attributes.
200201
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
201202
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
202203
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
@@ -208,6 +209,7 @@ enum kvm_pgtable_prot {
208209
KVM_PGTABLE_PROT_R = BIT(2),
209210

210211
KVM_PGTABLE_PROT_DEVICE = BIT(3),
212+
KVM_PGTABLE_PROT_NORMAL_NC = BIT(4),
211213

212214
KVM_PGTABLE_PROT_SW0 = BIT(55),
213215
KVM_PGTABLE_PROT_SW1 = BIT(56),

arch/arm64/include/asm/memory.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,15 @@
173173
* Memory types for Stage-2 translation
174174
*/
175175
#define MT_S2_NORMAL 0xf
176+
#define MT_S2_NORMAL_NC 0x5
176177
#define MT_S2_DEVICE_nGnRE 0x1
177178

178179
/*
179180
* Memory types for Stage-2 translation when ID_AA64MMFR2_EL1.FWB is 0001
180181
* Stage-2 enforces Normal-WB and Device-nGnRE
181182
*/
182183
#define MT_S2_FWB_NORMAL 6
184+
#define MT_S2_FWB_NORMAL_NC 5
183185
#define MT_S2_FWB_DEVICE_nGnRE 1
184186

185187
#ifdef CONFIG_ARM64_4K_PAGES

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -717,15 +717,29 @@ void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
717717
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
718718
kvm_pte_t *ptep)
719719
{
720-
bool device = prot & KVM_PGTABLE_PROT_DEVICE;
721-
kvm_pte_t attr = device ? KVM_S2_MEMATTR(pgt, DEVICE_nGnRE) :
722-
KVM_S2_MEMATTR(pgt, NORMAL);
720+
kvm_pte_t attr;
723721
u32 sh = KVM_PTE_LEAF_ATTR_LO_S2_SH_IS;
724722

723+
switch (prot & (KVM_PGTABLE_PROT_DEVICE |
724+
KVM_PGTABLE_PROT_NORMAL_NC)) {
725+
case KVM_PGTABLE_PROT_DEVICE | KVM_PGTABLE_PROT_NORMAL_NC:
726+
return -EINVAL;
727+
case KVM_PGTABLE_PROT_DEVICE:
728+
if (prot & KVM_PGTABLE_PROT_X)
729+
return -EINVAL;
730+
attr = KVM_S2_MEMATTR(pgt, DEVICE_nGnRE);
731+
break;
732+
case KVM_PGTABLE_PROT_NORMAL_NC:
733+
if (prot & KVM_PGTABLE_PROT_X)
734+
return -EINVAL;
735+
attr = KVM_S2_MEMATTR(pgt, NORMAL_NC);
736+
break;
737+
default:
738+
attr = KVM_S2_MEMATTR(pgt, NORMAL);
739+
}
740+
725741
if (!(prot & KVM_PGTABLE_PROT_X))
726742
attr |= KVM_PTE_LEAF_ATTR_HI_S2_XN;
727-
else if (device)
728-
return -EINVAL;
729743

730744
if (prot & KVM_PGTABLE_PROT_R)
731745
attr |= KVM_PTE_LEAF_ATTR_LO_S2_S2AP_R;

arch/arm64/kvm/mmu.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1381,7 +1381,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
13811381
int ret = 0;
13821382
bool write_fault, writable, force_pte = false;
13831383
bool exec_fault, mte_allowed;
1384-
bool device = false;
1384+
bool device = false, vfio_allow_any_uc = false;
13851385
unsigned long mmu_seq;
13861386
struct kvm *kvm = vcpu->kvm;
13871387
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
@@ -1472,6 +1472,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14721472
gfn = fault_ipa >> PAGE_SHIFT;
14731473
mte_allowed = kvm_vma_mte_allowed(vma);
14741474

1475+
vfio_allow_any_uc = vma->vm_flags & VM_ALLOW_ANY_UNCACHED;
1476+
14751477
/* Don't use the VMA after the unlock -- it may have vanished */
14761478
vma = NULL;
14771479

@@ -1557,10 +1559,14 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
15571559
if (exec_fault)
15581560
prot |= KVM_PGTABLE_PROT_X;
15591561

1560-
if (device)
1561-
prot |= KVM_PGTABLE_PROT_DEVICE;
1562-
else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC))
1562+
if (device) {
1563+
if (vfio_allow_any_uc)
1564+
prot |= KVM_PGTABLE_PROT_NORMAL_NC;
1565+
else
1566+
prot |= KVM_PGTABLE_PROT_DEVICE;
1567+
} else if (cpus_have_final_cap(ARM64_HAS_CACHE_DIC)) {
15631568
prot |= KVM_PGTABLE_PROT_X;
1569+
}
15641570

15651571
/*
15661572
* Under the premise of getting a FSC_PERM fault, we just need to relax

drivers/vfio/pci/vfio_pci_core.c

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1862,8 +1862,25 @@ int vfio_pci_core_mmap(struct vfio_device *core_vdev, struct vm_area_struct *vma
18621862
/*
18631863
* See remap_pfn_range(), called from vfio_pci_fault() but we can't
18641864
* change vm_flags within the fault handler. Set them now.
1865+
*
1866+
* VM_ALLOW_ANY_UNCACHED: The VMA flag is implemented for ARM64,
1867+
* allowing KVM stage 2 device mapping attributes to use Normal-NC
1868+
* rather than DEVICE_nGnRE, which allows guest mappings
1869+
* supporting write-combining attributes (WC). ARM does not
1870+
* architecturally guarantee this is safe, and indeed some MMIO
1871+
* regions like the GICv2 VCPU interface can trigger uncontained
1872+
* faults if Normal-NC is used.
1873+
*
1874+
* To safely use VFIO in KVM the platform must guarantee full
1875+
* safety in the guest where no action taken against a MMIO
1876+
* mapping can trigger an uncontained failure. The assumption is
1877+
* that most VFIO PCI platforms support this for both mapping types,
1878+
* at least in common flows, based on some expectations of how
1879+
* PCI IP is integrated. Hence VM_ALLOW_ANY_UNCACHED is set in
1880+
* the VMA flags.
18651881
*/
1866-
vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP);
1882+
vm_flags_set(vma, VM_ALLOW_ANY_UNCACHED | VM_IO | VM_PFNMAP |
1883+
VM_DONTEXPAND | VM_DONTDUMP);
18671884
vma->vm_ops = &vfio_pci_mmap_ops;
18681885

18691886
return 0;

include/linux/mm.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,6 +391,20 @@ extern unsigned int kobjsize(const void *objp);
391391
# define VM_UFFD_MINOR VM_NONE
392392
#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
393393

394+
/*
395+
* This flag is used to connect VFIO to arch specific KVM code. It
396+
* indicates that the memory under this VMA is safe for use with any
397+
* non-cachable memory type inside KVM. Some VFIO devices, on some
398+
* platforms, are thought to be unsafe and can cause machine crashes
399+
* if KVM does not lock down the memory type.
400+
*/
401+
#ifdef CONFIG_64BIT
402+
#define VM_ALLOW_ANY_UNCACHED_BIT 39
403+
#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT)
404+
#else
405+
#define VM_ALLOW_ANY_UNCACHED VM_NONE
406+
#endif
407+
394408
/* Bits set in the VMA until the stack is in its final location */
395409
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY)
396410

0 commit comments

Comments
 (0)