Skip to content

Commit 67e4fe3

Browse files
committed
iommu/arm-smmu-v3: Use S2FWB for NESTED domains
Force Write Back (FWB) changes how the S2 IOPTE's MemAttr field works. When S2FWB is supported and enabled the IOPTE will force cachable access to IOMMU_CACHE memory when nesting with a S1 and deny cachable access when !IOMMU_CACHE. When using a single stage of translation, a simple S2 domain, it doesn't change things for PCI devices as it is just a different encoding for the existing mapping of the IOMMU protection flags to cachability attributes. For non-PCI it also changes the combining rules when incoming transactions have inconsistent attributes. However, when used with a nested S1, FWB has the effect of preventing the guest from choosing a MemAttr in it's S1 that would cause ordinary DMA to bypass the cache. Consistent with KVM we wish to deny the guest the ability to become incoherent with cached memory the hypervisor believes is cachable so we don't have to flush it. Allow NESTED domains to be created if the SMMU has S2FWB support and use S2FWB for NESTING_PARENTS. This is an additional option to CANWBS. Link: https://patch.msgid.link/r/10-v4-9e99b76f3518+3a8-smmuv3_nesting_jgg@nvidia.com Reviewed-by: Nicolin Chen <nicolinc@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com> Reviewed-by: Donald Dutile <ddutile@redhat.com> Tested-by: Nicolin Chen <nicolinc@nvidia.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 1e8be08 commit 67e4fe3

File tree

5 files changed

+38
-9
lines changed

5 files changed

+38
-9
lines changed

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,9 +220,12 @@ struct iommufd_viommu *arm_vsmmu_alloc(struct device *dev,
220220
* Must support some way to prevent the VM from bypassing the cache
221221
* because VFIO currently does not do any cache maintenance. canwbs
222222
* indicates the device is fully coherent and no cache maintenance is
223-
* ever required, even for PCI No-Snoop.
223+
* ever required, even for PCI No-Snoop. S2FWB means the S1 can't make
224+
* things non-coherent using the memattr, but No-Snoop behavior is not
225+
* effected.
224226
*/
225-
if (!arm_smmu_master_canwbs(master))
227+
if (!arm_smmu_master_canwbs(master) &&
228+
!(smmu->features & ARM_SMMU_FEAT_S2FWB))
226229
return ERR_PTR(-EOPNOTSUPP);
227230

228231
vsmmu = iommufd_viommu_alloc(ictx, struct arm_vsmmu, core,

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1046,7 +1046,8 @@ void arm_smmu_get_ste_used(const __le64 *ent, __le64 *used_bits)
10461046
/* S2 translates */
10471047
if (cfg & BIT(1)) {
10481048
used_bits[1] |=
1049-
cpu_to_le64(STRTAB_STE_1_EATS | STRTAB_STE_1_SHCFG);
1049+
cpu_to_le64(STRTAB_STE_1_S2FWB | STRTAB_STE_1_EATS |
1050+
STRTAB_STE_1_SHCFG);
10501051
used_bits[2] |=
10511052
cpu_to_le64(STRTAB_STE_2_S2VMID | STRTAB_STE_2_VTCR |
10521053
STRTAB_STE_2_S2AA64 | STRTAB_STE_2_S2ENDI |
@@ -1654,6 +1655,8 @@ void arm_smmu_make_s2_domain_ste(struct arm_smmu_ste *target,
16541655
FIELD_PREP(STRTAB_STE_1_EATS,
16551656
ats_enabled ? STRTAB_STE_1_EATS_TRANS : 0));
16561657

1658+
if (pgtbl_cfg->quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
1659+
target->data[1] |= cpu_to_le64(STRTAB_STE_1_S2FWB);
16571660
if (smmu->features & ARM_SMMU_FEAT_ATTR_TYPES_OVR)
16581661
target->data[1] |= cpu_to_le64(FIELD_PREP(STRTAB_STE_1_SHCFG,
16591662
STRTAB_STE_1_SHCFG_INCOMING));
@@ -2472,6 +2475,9 @@ static int arm_smmu_domain_finalise(struct arm_smmu_domain *smmu_domain,
24722475
pgtbl_cfg.oas = smmu->oas;
24732476
fmt = ARM_64_LPAE_S2;
24742477
finalise_stage_fn = arm_smmu_domain_finalise_s2;
2478+
if ((smmu->features & ARM_SMMU_FEAT_S2FWB) &&
2479+
(flags & IOMMU_HWPT_ALLOC_NEST_PARENT))
2480+
pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_ARM_S2FWB;
24752481
break;
24762482
default:
24772483
return -EINVAL;

drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ struct arm_smmu_device;
5858
#define IDR1_SIDSIZE GENMASK(5, 0)
5959

6060
#define ARM_SMMU_IDR3 0xc
61+
#define IDR3_FWB (1 << 8)
6162
#define IDR3_RIL (1 << 10)
6263

6364
#define ARM_SMMU_IDR5 0x14
@@ -265,6 +266,7 @@ static inline u32 arm_smmu_strtab_l2_idx(u32 sid)
265266
#define STRTAB_STE_1_S1COR GENMASK_ULL(5, 4)
266267
#define STRTAB_STE_1_S1CSH GENMASK_ULL(7, 6)
267268

269+
#define STRTAB_STE_1_S2FWB (1UL << 25)
268270
#define STRTAB_STE_1_S1STALLD (1UL << 27)
269271

270272
#define STRTAB_STE_1_EATS GENMASK_ULL(29, 28)
@@ -740,6 +742,7 @@ struct arm_smmu_device {
740742
#define ARM_SMMU_FEAT_ATTR_TYPES_OVR (1 << 20)
741743
#define ARM_SMMU_FEAT_HA (1 << 21)
742744
#define ARM_SMMU_FEAT_HD (1 << 22)
745+
#define ARM_SMMU_FEAT_S2FWB (1 << 23)
743746
u32 features;
744747

745748
#define ARM_SMMU_OPT_SKIP_PREFETCH (1 << 0)

drivers/iommu/io-pgtable-arm.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,18 @@
106106
#define ARM_LPAE_PTE_HAP_FAULT (((arm_lpae_iopte)0) << 6)
107107
#define ARM_LPAE_PTE_HAP_READ (((arm_lpae_iopte)1) << 6)
108108
#define ARM_LPAE_PTE_HAP_WRITE (((arm_lpae_iopte)2) << 6)
109+
/*
110+
* For !FWB these code to:
111+
* 1111 = Normal outer write back cachable / Inner Write Back Cachable
112+
* Permit S1 to override
113+
* 0101 = Normal Non-cachable / Inner Non-cachable
114+
* 0001 = Device / Device-nGnRE
115+
* For S2FWB these code:
116+
* 0110 Force Normal Write Back
117+
* 0101 Normal* is forced Normal-NC, Device unchanged
118+
* 0001 Force Device-nGnRE
119+
*/
120+
#define ARM_LPAE_PTE_MEMATTR_FWB_WB (((arm_lpae_iopte)0x6) << 2)
109121
#define ARM_LPAE_PTE_MEMATTR_OIWB (((arm_lpae_iopte)0xf) << 2)
110122
#define ARM_LPAE_PTE_MEMATTR_NC (((arm_lpae_iopte)0x5) << 2)
111123
#define ARM_LPAE_PTE_MEMATTR_DEV (((arm_lpae_iopte)0x1) << 2)
@@ -458,12 +470,16 @@ static arm_lpae_iopte arm_lpae_prot_to_pte(struct arm_lpae_io_pgtable *data,
458470
*/
459471
if (data->iop.fmt == ARM_64_LPAE_S2 ||
460472
data->iop.fmt == ARM_32_LPAE_S2) {
461-
if (prot & IOMMU_MMIO)
473+
if (prot & IOMMU_MMIO) {
462474
pte |= ARM_LPAE_PTE_MEMATTR_DEV;
463-
else if (prot & IOMMU_CACHE)
464-
pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
465-
else
475+
} else if (prot & IOMMU_CACHE) {
476+
if (data->iop.cfg.quirks & IO_PGTABLE_QUIRK_ARM_S2FWB)
477+
pte |= ARM_LPAE_PTE_MEMATTR_FWB_WB;
478+
else
479+
pte |= ARM_LPAE_PTE_MEMATTR_OIWB;
480+
} else {
466481
pte |= ARM_LPAE_PTE_MEMATTR_NC;
482+
}
467483
} else {
468484
if (prot & IOMMU_MMIO)
469485
pte |= (ARM_LPAE_MAIR_ATTR_IDX_DEV
@@ -1035,8 +1051,7 @@ arm_64_lpae_alloc_pgtable_s2(struct io_pgtable_cfg *cfg, void *cookie)
10351051
struct arm_lpae_io_pgtable *data;
10361052
typeof(&cfg->arm_lpae_s2_cfg.vtcr) vtcr = &cfg->arm_lpae_s2_cfg.vtcr;
10371053

1038-
/* The NS quirk doesn't apply at stage 2 */
1039-
if (cfg->quirks)
1054+
if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_S2FWB))
10401055
return NULL;
10411056

10421057
data = arm_lpae_alloc_pgtable(cfg);

include/linux/io-pgtable.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ struct io_pgtable_cfg {
8787
* attributes set in the TCR for a non-coherent page-table walker.
8888
*
8989
* IO_PGTABLE_QUIRK_ARM_HD: Enables dirty tracking in stage 1 pagetable.
90+
* IO_PGTABLE_QUIRK_ARM_S2FWB: Use the FWB format for the MemAttrs bits
9091
*/
9192
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
9293
#define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
@@ -95,6 +96,7 @@ struct io_pgtable_cfg {
9596
#define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
9697
#define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
9798
#define IO_PGTABLE_QUIRK_ARM_HD BIT(7)
99+
#define IO_PGTABLE_QUIRK_ARM_S2FWB BIT(8)
98100
unsigned long quirks;
99101
unsigned long pgsize_bitmap;
100102
unsigned int ias;

0 commit comments

Comments
 (0)