Skip to content

Commit e2c0150

Browse files
committed
Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd.git
# Conflicts: # drivers/iommu/iommufd/selftest.c # include/linux/iommu.h
2 parents 00141e2 + 03476e6 commit e2c0150

35 files changed

+2694
-211
lines changed

drivers/iommu/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ config IOMMU_IOVA
77
config IOMMU_API
88
bool
99

10+
config IOMMUFD_DRIVER
11+
bool
12+
default n
13+
1014
menuconfig IOMMU_SUPPORT
1115
bool "IOMMU Hardware Support"
1216
depends on MMU

drivers/iommu/amd/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ config AMD_IOMMU
1010
select IOMMU_API
1111
select IOMMU_IOVA
1212
select IOMMU_IO_PGTABLE
13+
select IOMMUFD_DRIVER if IOMMUFD
1314
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
1415
help
1516
With this option you can enable support for AMD IOMMU hardware in

drivers/iommu/amd/amd_iommu_types.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,9 @@
9797
#define FEATURE_GATS_MASK (3ULL)
9898
#define FEATURE_GAM_VAPIC BIT_ULL(21)
9999
#define FEATURE_GIOSUP BIT_ULL(48)
100+
#define FEATURE_HASUP BIT_ULL(49)
100101
#define FEATURE_EPHSUP BIT_ULL(50)
102+
#define FEATURE_HDSUP BIT_ULL(52)
101103
#define FEATURE_SNP BIT_ULL(63)
102104

103105
#define FEATURE_PASID_SHIFT 32
@@ -212,6 +214,7 @@
212214
/* macros and definitions for device table entries */
213215
#define DEV_ENTRY_VALID 0x00
214216
#define DEV_ENTRY_TRANSLATION 0x01
217+
#define DEV_ENTRY_HAD 0x07
215218
#define DEV_ENTRY_PPR 0x34
216219
#define DEV_ENTRY_IR 0x3d
217220
#define DEV_ENTRY_IW 0x3e
@@ -370,10 +373,16 @@
370373
#define PTE_LEVEL_PAGE_SIZE(level) \
371374
(1ULL << (12 + (9 * (level))))
372375

376+
/*
377+
* The IOPTE dirty bit
378+
*/
379+
#define IOMMU_PTE_HD_BIT (6)
380+
373381
/*
374382
* Bit value definition for I/O PTE fields
375383
*/
376384
#define IOMMU_PTE_PR BIT_ULL(0)
385+
#define IOMMU_PTE_HD BIT_ULL(IOMMU_PTE_HD_BIT)
377386
#define IOMMU_PTE_U BIT_ULL(59)
378387
#define IOMMU_PTE_FC BIT_ULL(60)
379388
#define IOMMU_PTE_IR BIT_ULL(61)
@@ -384,6 +393,7 @@
384393
*/
385394
#define DTE_FLAG_V BIT_ULL(0)
386395
#define DTE_FLAG_TV BIT_ULL(1)
396+
#define DTE_FLAG_HAD (3ULL << 7)
387397
#define DTE_FLAG_GIOV BIT_ULL(54)
388398
#define DTE_FLAG_GV BIT_ULL(55)
389399
#define DTE_GLX_SHIFT (56)
@@ -413,6 +423,7 @@
413423

414424
#define IOMMU_PAGE_MASK (((1ULL << 52) - 1) & ~0xfffULL)
415425
#define IOMMU_PTE_PRESENT(pte) ((pte) & IOMMU_PTE_PR)
426+
#define IOMMU_PTE_DIRTY(pte) ((pte) & IOMMU_PTE_HD)
416427
#define IOMMU_PTE_PAGE(pte) (iommu_phys_to_virt((pte) & IOMMU_PAGE_MASK))
417428
#define IOMMU_PTE_MODE(pte) (((pte) >> 9) & 0x07)
418429

@@ -553,6 +564,7 @@ struct protection_domain {
553564
int nid; /* Node ID */
554565
u64 *gcr3_tbl; /* Guest CR3 table */
555566
unsigned long flags; /* flags to find out type of domain */
567+
bool dirty_tracking; /* dirty tracking is enabled in the domain */
556568
unsigned dev_cnt; /* devices assigned to this domain */
557569
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
558570
};

drivers/iommu/amd/io_pgtable.c

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -486,6 +486,73 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
486486
return (__pte & ~offset_mask) | (iova & offset_mask);
487487
}
488488

489+
static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
490+
unsigned long flags)
491+
{
492+
bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
493+
bool dirty = false;
494+
int i, count;
495+
496+
/*
497+
* 2.2.3.2 Host Dirty Support
498+
* When a non-default page size is used , software must OR the
499+
* Dirty bits in all of the replicated host PTEs used to map
500+
* the page. The IOMMU does not guarantee the Dirty bits are
501+
* set in all of the replicated PTEs. Any portion of the page
502+
* may have been written even if the Dirty bit is set in only
503+
* one of the replicated PTEs.
504+
*/
505+
count = PAGE_SIZE_PTE_COUNT(size);
506+
for (i = 0; i < count && test_only; i++) {
507+
if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *)&ptep[i])) {
508+
dirty = true;
509+
break;
510+
}
511+
}
512+
513+
for (i = 0; i < count && !test_only; i++) {
514+
if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
515+
(unsigned long *)&ptep[i])) {
516+
dirty = true;
517+
}
518+
}
519+
520+
return dirty;
521+
}
522+
523+
static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
524+
unsigned long iova, size_t size,
525+
unsigned long flags,
526+
struct iommu_dirty_bitmap *dirty)
527+
{
528+
struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
529+
unsigned long end = iova + size - 1;
530+
531+
do {
532+
unsigned long pgsize = 0;
533+
u64 *ptep, pte;
534+
535+
ptep = fetch_pte(pgtable, iova, &pgsize);
536+
if (ptep)
537+
pte = READ_ONCE(*ptep);
538+
if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
539+
pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
540+
iova += pgsize;
541+
continue;
542+
}
543+
544+
/*
545+
* Mark the whole IOVA range as dirty even if only one of
546+
* the replicated PTEs were marked dirty.
547+
*/
548+
if (pte_test_and_clear_dirty(ptep, pgsize, flags))
549+
iommu_dirty_bitmap_record(dirty, iova, pgsize);
550+
iova += pgsize;
551+
} while (iova < end);
552+
553+
return 0;
554+
}
555+
489556
/*
490557
* ----------------------------------------------------
491558
*/
@@ -527,6 +594,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
527594
pgtable->iop.ops.map_pages = iommu_v1_map_pages;
528595
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
529596
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
597+
pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
530598

531599
return &pgtable->iop;
532600
}

drivers/iommu/amd/iommu.c

Lines changed: 144 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
#include <asm/iommu.h>
3838
#include <asm/gart.h>
3939
#include <asm/dma.h>
40+
#include <uapi/linux/iommufd.h>
4041

4142
#include "amd_iommu.h"
4243
#include "../dma-iommu.h"
@@ -63,6 +64,7 @@ LIST_HEAD(hpet_map);
6364
LIST_HEAD(acpihid_map);
6465

6566
const struct iommu_ops amd_iommu_ops;
67+
const struct iommu_dirty_ops amd_dirty_ops;
6668

6769
int amd_iommu_max_glx_val = -1;
6870

@@ -1738,6 +1740,9 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
17381740
if (ppr)
17391741
pte_root |= 1ULL << DEV_ENTRY_PPR;
17401742

1743+
if (domain->dirty_tracking)
1744+
pte_root |= DTE_FLAG_HAD;
1745+
17411746
if (domain->flags & PD_IOMMUV2_MASK) {
17421747
u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
17431748
u64 glx = domain->glx;
@@ -2195,28 +2200,79 @@ static inline u64 dma_max_address(void)
21952200
return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
21962201
}
21972202

2198-
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
2203+
static bool amd_iommu_hd_support(struct amd_iommu *iommu)
21992204
{
2205+
return iommu && (iommu->features & FEATURE_HDSUP);
2206+
}
2207+
2208+
static struct iommu_domain *do_iommu_domain_alloc(unsigned int type,
2209+
struct device *dev, u32 flags)
2210+
{
2211+
bool dirty_tracking = flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
22002212
struct protection_domain *domain;
2213+
struct amd_iommu *iommu = NULL;
2214+
2215+
if (dev) {
2216+
iommu = rlookup_amd_iommu(dev);
2217+
if (!iommu)
2218+
return ERR_PTR(-ENODEV);
2219+
}
22012220

22022221
/*
22032222
* Since DTE[Mode]=0 is prohibited on SNP-enabled system,
22042223
* default to use IOMMU_DOMAIN_DMA[_FQ].
22052224
*/
22062225
if (amd_iommu_snp_en && (type == IOMMU_DOMAIN_IDENTITY))
2207-
return NULL;
2226+
return ERR_PTR(-EINVAL);
2227+
2228+
if (dirty_tracking && !amd_iommu_hd_support(iommu))
2229+
return ERR_PTR(-EOPNOTSUPP);
22082230

22092231
domain = protection_domain_alloc(type);
22102232
if (!domain)
2211-
return NULL;
2233+
return ERR_PTR(-ENOMEM);
22122234

22132235
domain->domain.geometry.aperture_start = 0;
22142236
domain->domain.geometry.aperture_end = dma_max_address();
22152237
domain->domain.geometry.force_aperture = true;
22162238

2239+
if (iommu) {
2240+
domain->domain.type = type;
2241+
domain->domain.pgsize_bitmap = iommu->iommu.ops->pgsize_bitmap;
2242+
domain->domain.ops = iommu->iommu.ops->default_domain_ops;
2243+
2244+
if (dirty_tracking)
2245+
domain->domain.dirty_ops = &amd_dirty_ops;
2246+
}
2247+
22172248
return &domain->domain;
22182249
}
22192250

2251+
static struct iommu_domain *amd_iommu_domain_alloc(unsigned int type)
2252+
{
2253+
struct iommu_domain *domain;
2254+
2255+
domain = do_iommu_domain_alloc(type, NULL, 0);
2256+
if (IS_ERR(domain))
2257+
return NULL;
2258+
2259+
return domain;
2260+
}
2261+
2262+
static struct iommu_domain *
2263+
amd_iommu_domain_alloc_user(struct device *dev, u32 flags,
2264+
struct iommu_domain *parent,
2265+
const struct iommu_user_data *user_data)
2266+
2267+
{
2268+
unsigned int type = IOMMU_DOMAIN_UNMANAGED;
2269+
2270+
if ((flags & ~IOMMU_HWPT_ALLOC_DIRTY_TRACKING) || parent || user_data)
2271+
return ERR_PTR(-EOPNOTSUPP);
2272+
2273+
return do_iommu_domain_alloc(type, dev, flags);
2274+
}
2275+
22202276
static void amd_iommu_domain_free(struct iommu_domain *dom)
22212277
{
22222278
struct protection_domain *domain;
@@ -2253,6 +2309,13 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
22532309

22542310
dev_data->defer_attach = false;
22552311

2312+
/*
2313+
* Restrict to devices with compatible IOMMU hardware support
2314+
* when enforcement of dirty tracking is enabled.
2315+
*/
2316+
if (dom->dirty_ops && !amd_iommu_hd_support(iommu))
2317+
return -EINVAL;
2318+
22562319
if (dev_data->domain)
22572320
detach_device(dev);
22582321

@@ -2372,13 +2435,85 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
23722435
return true;
23732436
case IOMMU_CAP_DEFERRED_FLUSH:
23742437
return true;
2438+
case IOMMU_CAP_DIRTY_TRACKING: {
2439+
struct amd_iommu *iommu = rlookup_amd_iommu(dev);
2440+
2441+
return amd_iommu_hd_support(iommu);
2442+
}
23752443
default:
23762444
break;
23772445
}
23782446

23792447
return false;
23802448
}
23812449

2450+
static int amd_iommu_set_dirty_tracking(struct iommu_domain *domain,
2451+
bool enable)
2452+
{
2453+
struct protection_domain *pdomain = to_pdomain(domain);
2454+
struct dev_table_entry *dev_table;
2455+
struct iommu_dev_data *dev_data;
2456+
bool domain_flush = false;
2457+
struct amd_iommu *iommu;
2458+
unsigned long flags;
2459+
u64 pte_root;
2460+
2461+
spin_lock_irqsave(&pdomain->lock, flags);
2462+
if (!(pdomain->dirty_tracking ^ enable)) {
2463+
spin_unlock_irqrestore(&pdomain->lock, flags);
2464+
return 0;
2465+
}
2466+
2467+
list_for_each_entry(dev_data, &pdomain->dev_list, list) {
2468+
iommu = rlookup_amd_iommu(dev_data->dev);
2469+
if (!iommu)
2470+
continue;
2471+
2472+
dev_table = get_dev_table(iommu);
2473+
pte_root = dev_table[dev_data->devid].data[0];
2474+
2475+
pte_root = (enable ? pte_root | DTE_FLAG_HAD :
2476+
pte_root & ~DTE_FLAG_HAD);
2477+
2478+
/* Flush device DTE */
2479+
dev_table[dev_data->devid].data[0] = pte_root;
2480+
device_flush_dte(dev_data);
2481+
domain_flush = true;
2482+
}
2483+
2484+
/* Flush IOTLB to mark IOPTE dirty on the next translation(s) */
2485+
if (domain_flush) {
2486+
amd_iommu_domain_flush_tlb_pde(pdomain);
2487+
amd_iommu_domain_flush_complete(pdomain);
2488+
}
2489+
pdomain->dirty_tracking = enable;
2490+
spin_unlock_irqrestore(&pdomain->lock, flags);
2491+
2492+
return 0;
2493+
}
2494+
2495+
static int amd_iommu_read_and_clear_dirty(struct iommu_domain *domain,
2496+
unsigned long iova, size_t size,
2497+
unsigned long flags,
2498+
struct iommu_dirty_bitmap *dirty)
2499+
{
2500+
struct protection_domain *pdomain = to_pdomain(domain);
2501+
struct io_pgtable_ops *ops = &pdomain->iop.iop.ops;
2502+
unsigned long lflags;
2503+
2504+
if (!ops || !ops->read_and_clear_dirty)
2505+
return -EOPNOTSUPP;
2506+
2507+
spin_lock_irqsave(&pdomain->lock, lflags);
2508+
if (!pdomain->dirty_tracking && dirty->bitmap) {
2509+
spin_unlock_irqrestore(&pdomain->lock, lflags);
2510+
return -EINVAL;
2511+
}
2512+
spin_unlock_irqrestore(&pdomain->lock, lflags);
2513+
2514+
return ops->read_and_clear_dirty(ops, iova, size, flags, dirty);
2515+
}
2516+
23822517
static void amd_iommu_get_resv_regions(struct device *dev,
23832518
struct list_head *head)
23842519
{
@@ -2500,9 +2635,15 @@ static bool amd_iommu_enforce_cache_coherency(struct iommu_domain *domain)
25002635
return true;
25012636
}
25022637

2638+
const struct iommu_dirty_ops amd_dirty_ops = {
2639+
.set_dirty_tracking = amd_iommu_set_dirty_tracking,
2640+
.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
2641+
};
2642+
25032643
const struct iommu_ops amd_iommu_ops = {
25042644
.capable = amd_iommu_capable,
25052645
.domain_alloc = amd_iommu_domain_alloc,
2646+
.domain_alloc_user = amd_iommu_domain_alloc_user,
25062647
.probe_device = amd_iommu_probe_device,
25072648
.release_device = amd_iommu_release_device,
25082649
.probe_finalize = amd_iommu_probe_finalize,

drivers/iommu/intel/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ config INTEL_IOMMU
1515
select DMA_OPS
1616
select IOMMU_API
1717
select IOMMU_IOVA
18+
select IOMMUFD_DRIVER if IOMMUFD
1819
select NEED_DMA_MAP_STATE
1920
select DMAR_TABLE
2021
select SWIOTLB

0 commit comments

Comments
 (0)