Skip to content

Commit f35f22c

Browse files
jpemartinsjgunthorpe
authored andcommitted
iommu/vt-d: Access/Dirty bit support for SS domains
IOMMU advertises Access/Dirty bits for second-stage page table if the extended capability DMAR register reports it (ECAP, mnemonic ECAP.SSADS). The first stage table is compatible with CPU page table thus A/D bits are implicitly supported. Relevant Intel IOMMU SDM ref for first stage table "3.6.2 Accessed, Extended Accessed, and Dirty Flags" and second stage table "3.7.2 Accessed and Dirty Flags". First stage page table is enabled by default so it's allowed to set dirty tracking and no control bits needed, it just returns 0. To use SSADS, set bit 9 (SSADE) in the scalable-mode PASID table entry and flush the IOTLB via pasid_flush_caches() following the manual. Relevant SDM refs: "3.7.2 Accessed and Dirty Flags" "6.5.3.3 Guidance to Software for Invalidations, Table 23. Guidance to Software for Invalidations" PTE dirty bit is located in bit 9 and it's cached in the IOTLB so flush IOTLB to make sure IOMMU attempts to set the dirty bit again. Note that iommu_dirty_bitmap_record() will add the IOVA to iotlb_gather and thus the caller of the iommu op will flush the IOTLB. Relevant manuals over the hardware translation is chapter 6 with some special mention to: "6.2.3.1 Scalable-Mode PASID-Table Entry Programming Considerations" "6.2.4 IOTLB" Select IOMMUFD_DRIVER only if IOMMUFD is enabled, given that IOMMU dirty tracking requires IOMMUFD. Link: https://lore.kernel.org/r/20231024135109.73787-13-joao.m.martins@oracle.com Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent 421a511 commit f35f22c

File tree

5 files changed

+232
-1
lines changed

5 files changed

+232
-1
lines changed

drivers/iommu/intel/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ config INTEL_IOMMU
1515
select DMA_OPS
1616
select IOMMU_API
1717
select IOMMU_IOVA
18+
select IOMMUFD_DRIVER if IOMMUFD
1819
select NEED_DMA_MAP_STATE
1920
select DMAR_TABLE
2021
select SWIOTLB

drivers/iommu/intel/iommu.c

Lines changed: 102 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ static int iommu_skip_te_disable;
300300
#define IDENTMAP_AZALIA 4
301301

302302
const struct iommu_ops intel_iommu_ops;
303+
const struct iommu_dirty_ops intel_dirty_ops;
303304

304305
static bool translation_pre_enabled(struct intel_iommu *iommu)
305306
{
@@ -4079,8 +4080,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
40794080
{
40804081
struct iommu_domain *domain;
40814082
struct intel_iommu *iommu;
4083+
bool dirty_tracking;
40824084

4083-
if (flags & (~IOMMU_HWPT_ALLOC_NEST_PARENT))
4085+
if (flags &
4086+
(~(IOMMU_HWPT_ALLOC_NEST_PARENT | IOMMU_HWPT_ALLOC_DIRTY_TRACKING)))
40844087
return ERR_PTR(-EOPNOTSUPP);
40854088

40864089
iommu = device_to_iommu(dev, NULL, NULL);
@@ -4090,6 +4093,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
40904093
if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !ecap_nest(iommu->ecap))
40914094
return ERR_PTR(-EOPNOTSUPP);
40924095

4096+
dirty_tracking = (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING);
4097+
if (dirty_tracking && !ssads_supported(iommu))
4098+
return ERR_PTR(-EOPNOTSUPP);
4099+
40934100
/*
40944101
* domain_alloc_user op needs to fully initialize a domain
40954102
* before return, so uses iommu_domain_alloc() here for
@@ -4098,6 +4105,15 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
40984105
domain = iommu_domain_alloc(dev->bus);
40994106
if (!domain)
41004107
domain = ERR_PTR(-ENOMEM);
4108+
4109+
if (!IS_ERR(domain) && dirty_tracking) {
4110+
if (to_dmar_domain(domain)->use_first_level) {
4111+
iommu_domain_free(domain);
4112+
return ERR_PTR(-EOPNOTSUPP);
4113+
}
4114+
domain->dirty_ops = &intel_dirty_ops;
4115+
}
4116+
41014117
return domain;
41024118
}
41034119

@@ -4121,6 +4137,9 @@ static int prepare_domain_attach_device(struct iommu_domain *domain,
41214137
if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
41224138
return -EINVAL;
41234139

4140+
if (domain->dirty_ops && !ssads_supported(iommu))
4141+
return -EINVAL;
4142+
41244143
/* check if this iommu agaw is sufficient for max mapped address */
41254144
addr_width = agaw_to_width(iommu->agaw);
41264145
if (addr_width > cap_mgaw(iommu->cap))
@@ -4375,6 +4394,8 @@ static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap)
43754394
return dmar_platform_optin();
43764395
case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
43774396
return ecap_sc_support(info->iommu->ecap);
4397+
case IOMMU_CAP_DIRTY_TRACKING:
4398+
return ssads_supported(info->iommu);
43784399
default:
43794400
return false;
43804401
}
@@ -4772,6 +4793,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
47724793
if (!pasid_supported(iommu) || dev_is_real_dma_subdevice(dev))
47734794
return -EOPNOTSUPP;
47744795

4796+
if (domain->dirty_ops)
4797+
return -EINVAL;
4798+
47754799
if (context_copied(iommu, info->bus, info->devfn))
47764800
return -EBUSY;
47774801

@@ -4830,6 +4854,83 @@ static void *intel_iommu_hw_info(struct device *dev, u32 *length, u32 *type)
48304854
return vtd;
48314855
}
48324856

4857+
static int intel_iommu_set_dirty_tracking(struct iommu_domain *domain,
4858+
bool enable)
4859+
{
4860+
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4861+
struct device_domain_info *info;
4862+
int ret;
4863+
4864+
spin_lock(&dmar_domain->lock);
4865+
if (dmar_domain->dirty_tracking == enable)
4866+
goto out_unlock;
4867+
4868+
list_for_each_entry(info, &dmar_domain->devices, link) {
4869+
ret = intel_pasid_setup_dirty_tracking(info->iommu,
4870+
info->domain, info->dev,
4871+
IOMMU_NO_PASID, enable);
4872+
if (ret)
4873+
goto err_unwind;
4874+
}
4875+
4876+
dmar_domain->dirty_tracking = enable;
4877+
out_unlock:
4878+
spin_unlock(&dmar_domain->lock);
4879+
4880+
return 0;
4881+
4882+
err_unwind:
4883+
list_for_each_entry(info, &dmar_domain->devices, link)
4884+
intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
4885+
info->dev, IOMMU_NO_PASID,
4886+
dmar_domain->dirty_tracking);
4887+
spin_unlock(&dmar_domain->lock);
4888+
return ret;
4889+
}
4890+
4891+
static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
4892+
unsigned long iova, size_t size,
4893+
unsigned long flags,
4894+
struct iommu_dirty_bitmap *dirty)
4895+
{
4896+
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
4897+
unsigned long end = iova + size - 1;
4898+
unsigned long pgsize;
4899+
4900+
/*
4901+
* IOMMUFD core calls into a dirty tracking disabled domain without an
4902+
* IOVA bitmap set in order to clean dirty bits in all PTEs that might
4903+
* have occurred when we stopped dirty tracking. This ensures that we
4904+
* never inherit dirtied bits from a previous cycle.
4905+
*/
4906+
if (!dmar_domain->dirty_tracking && dirty->bitmap)
4907+
return -EINVAL;
4908+
4909+
do {
4910+
struct dma_pte *pte;
4911+
int lvl = 0;
4912+
4913+
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
4914+
GFP_ATOMIC);
4915+
pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
4916+
if (!pte || !dma_pte_present(pte)) {
4917+
iova += pgsize;
4918+
continue;
4919+
}
4920+
4921+
if (dma_sl_pte_test_and_clear_dirty(pte, flags))
4922+
iommu_dirty_bitmap_record(dirty, iova, pgsize);
4923+
iova += pgsize;
4924+
} while (iova < end);
4925+
4926+
return 0;
4927+
}
4928+
4929+
const struct iommu_dirty_ops intel_dirty_ops = {
4930+
.set_dirty_tracking = intel_iommu_set_dirty_tracking,
4931+
.read_and_clear_dirty = intel_iommu_read_and_clear_dirty,
4932+
};
4933+
48334934
const struct iommu_ops intel_iommu_ops = {
48344935
.capable = intel_iommu_capable,
48354936
.hw_info = intel_iommu_hw_info,

drivers/iommu/intel/iommu.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
#define DMA_FL_PTE_DIRTY BIT_ULL(6)
4949
#define DMA_FL_PTE_XD BIT_ULL(63)
5050

51+
#define DMA_SL_PTE_DIRTY_BIT 9
52+
#define DMA_SL_PTE_DIRTY BIT_ULL(DMA_SL_PTE_DIRTY_BIT)
53+
5154
#define ADDR_WIDTH_5LEVEL (57)
5255
#define ADDR_WIDTH_4LEVEL (48)
5356

@@ -539,6 +542,8 @@ enum {
539542
#define sm_supported(iommu) (intel_iommu_sm && ecap_smts((iommu)->ecap))
540543
#define pasid_supported(iommu) (sm_supported(iommu) && \
541544
ecap_pasid((iommu)->ecap))
545+
#define ssads_supported(iommu) (sm_supported(iommu) && \
546+
ecap_slads((iommu)->ecap))
542547

543548
struct pasid_entry;
544549
struct pasid_state_entry;
@@ -592,6 +597,7 @@ struct dmar_domain {
592597
* otherwise, goes through the second
593598
* level.
594599
*/
600+
u8 dirty_tracking:1; /* Dirty tracking is enabled */
595601

596602
spinlock_t lock; /* Protect device tracking lists */
597603
struct list_head devices; /* all devices' list */
@@ -781,6 +787,16 @@ static inline bool dma_pte_present(struct dma_pte *pte)
781787
return (pte->val & 3) != 0;
782788
}
783789

790+
static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
791+
unsigned long flags)
792+
{
793+
if (flags & IOMMU_DIRTY_NO_CLEAR)
794+
return (pte->val & DMA_SL_PTE_DIRTY) != 0;
795+
796+
return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
797+
(unsigned long *)&pte->val);
798+
}
799+
784800
static inline bool dma_pte_superpage(struct dma_pte *pte)
785801
{
786802
return (pte->val & DMA_PTE_LARGE_PAGE);

drivers/iommu/intel/pasid.c

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,11 @@ static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits)
277277
WRITE_ONCE(*ptr, (old & ~mask) | bits);
278278
}
279279

280+
static inline u64 pasid_get_bits(u64 *ptr)
281+
{
282+
return READ_ONCE(*ptr);
283+
}
284+
280285
/*
281286
* Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode
282287
* PASID entry.
@@ -335,6 +340,36 @@ static inline void pasid_set_fault_enable(struct pasid_entry *pe)
335340
pasid_set_bits(&pe->val[0], 1 << 1, 0);
336341
}
337342

343+
/*
344+
* Enable second level A/D bits by setting the SLADE (Second Level
345+
* Access Dirty Enable) field (Bit 9) of a scalable mode PASID
346+
* entry.
347+
*/
348+
static inline void pasid_set_ssade(struct pasid_entry *pe)
349+
{
350+
pasid_set_bits(&pe->val[0], 1 << 9, 1 << 9);
351+
}
352+
353+
/*
354+
* Disable second level A/D bits by clearing the SLADE (Second Level
355+
* Access Dirty Enable) field (Bit 9) of a scalable mode PASID
356+
* entry.
357+
*/
358+
static inline void pasid_clear_ssade(struct pasid_entry *pe)
359+
{
360+
pasid_set_bits(&pe->val[0], 1 << 9, 0);
361+
}
362+
363+
/*
364+
* Checks if second level A/D bits specifically the SLADE (Second Level
365+
* Access Dirty Enable) field (Bit 9) of a scalable mode PASID
366+
* entry is set.
367+
*/
368+
static inline bool pasid_get_ssade(struct pasid_entry *pe)
369+
{
370+
return pasid_get_bits(&pe->val[0]) & (1 << 9);
371+
}
372+
338373
/*
339374
* Setup the WPE(Write Protect Enable) field (Bit 132) of a
340375
* scalable mode PASID entry.
@@ -627,6 +662,8 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
627662
pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
628663
pasid_set_fault_enable(pte);
629664
pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
665+
if (domain->dirty_tracking)
666+
pasid_set_ssade(pte);
630667

631668
pasid_set_present(pte);
632669
spin_unlock(&iommu->lock);
@@ -636,6 +673,78 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
636673
return 0;
637674
}
638675

676+
/*
677+
* Set up dirty tracking on a second only or nested translation type.
678+
*/
679+
int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
680+
struct dmar_domain *domain,
681+
struct device *dev, u32 pasid,
682+
bool enabled)
683+
{
684+
struct pasid_entry *pte;
685+
u16 did, pgtt;
686+
687+
spin_lock(&iommu->lock);
688+
689+
pte = intel_pasid_get_entry(dev, pasid);
690+
if (!pte) {
691+
spin_unlock(&iommu->lock);
692+
dev_err_ratelimited(
693+
dev, "Failed to get pasid entry of PASID %d\n", pasid);
694+
return -ENODEV;
695+
}
696+
697+
did = domain_id_iommu(domain, iommu);
698+
pgtt = pasid_pte_get_pgtt(pte);
699+
if (pgtt != PASID_ENTRY_PGTT_SL_ONLY &&
700+
pgtt != PASID_ENTRY_PGTT_NESTED) {
701+
spin_unlock(&iommu->lock);
702+
dev_err_ratelimited(
703+
dev,
704+
"Dirty tracking not supported on translation type %d\n",
705+
pgtt);
706+
return -EOPNOTSUPP;
707+
}
708+
709+
if (pasid_get_ssade(pte) == enabled) {
710+
spin_unlock(&iommu->lock);
711+
return 0;
712+
}
713+
714+
if (enabled)
715+
pasid_set_ssade(pte);
716+
else
717+
pasid_clear_ssade(pte);
718+
spin_unlock(&iommu->lock);
719+
720+
if (!ecap_coherent(iommu->ecap))
721+
clflush_cache_range(pte, sizeof(*pte));
722+
723+
/*
724+
* From VT-d spec table 25 "Guidance to Software for Invalidations":
725+
*
726+
* - PASID-selective-within-Domain PASID-cache invalidation
727+
* If (PGTT=SS or Nested)
728+
* - Domain-selective IOTLB invalidation
729+
* Else
730+
* - PASID-selective PASID-based IOTLB invalidation
731+
* - If (pasid is RID_PASID)
732+
* - Global Device-TLB invalidation to affected functions
733+
* Else
734+
* - PASID-based Device-TLB invalidation (with S=1 and
735+
* Addr[63:12]=0x7FFFFFFF_FFFFF) to affected functions
736+
*/
737+
pasid_cache_invalidation_with_pasid(iommu, did, pasid);
738+
739+
iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
740+
741+
/* Device IOTLB doesn't need to be flushed in caching mode. */
742+
if (!cap_caching_mode(iommu->cap))
743+
devtlb_invalidation_with_pasid(iommu, dev, pasid);
744+
745+
return 0;
746+
}
747+
639748
/*
640749
* Set up the scalable mode pasid entry for passthrough translation type.
641750
*/

drivers/iommu/intel/pasid.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,10 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
106106
int intel_pasid_setup_second_level(struct intel_iommu *iommu,
107107
struct dmar_domain *domain,
108108
struct device *dev, u32 pasid);
109+
int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
110+
struct dmar_domain *domain,
111+
struct device *dev, u32 pasid,
112+
bool enabled);
109113
int intel_pasid_setup_pass_through(struct intel_iommu *iommu,
110114
struct dmar_domain *domain,
111115
struct device *dev, u32 pasid);

0 commit comments

Comments
 (0)