Skip to content

Commit b9a60d6

Browse files
jpemartinsjgunthorpe
authored andcommitted
iommufd: Add IOMMU_HWPT_GET_DIRTY_BITMAP
Connect a hw_pagetable to the IOMMU core dirty tracking read_and_clear_dirty iommu domain op. It exposes all of the functionality for the UAPI that read the dirtied IOVAs while clearing the Dirty bits from the PTEs. In doing so, add an IO pagetable API iopt_read_and_clear_dirty_data() that performs the reading of dirty IOPTEs for a given IOVA range and then copying back to userspace bitmap. Underneath it uses the IOMMU domain kernel API which will read the dirty bits, as well as atomically clearing the IOPTE dirty bit and flushing the IOTLB at the end. The IOVA bitmaps usage takes care of the iteration of the bitmaps user pages efficiently and without copies. Within the iterator function we iterate over io-pagetable contigous areas that have been mapped. Contrary to past incantation of a similar interface in VFIO the IOVA range to be scanned is tied in to the bitmap size, thus the application needs to pass a appropriately sized bitmap address taking into account the iova range being passed *and* page size ... as opposed to allowing bitmap-iova != iova. Link: https://lore.kernel.org/r/20231024135109.73787-8-joao.m.martins@oracle.com Signed-off-by: Joao Martins <joao.m.martins@oracle.com> Reviewed-by: Jason Gunthorpe <jgg@nvidia.com> Reviewed-by: Kevin Tian <kevin.tian@intel.com> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
1 parent e2a4b29 commit b9a60d6

File tree

5 files changed

+184
-0
lines changed

5 files changed

+184
-0
lines changed

drivers/iommu/iommufd/hw_pagetable.c

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,3 +220,25 @@ int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd)
220220
iommufd_put_object(&hwpt->obj);
221221
return rc;
222222
}
223+
224+
int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd)
225+
{
226+
struct iommu_hwpt_get_dirty_bitmap *cmd = ucmd->cmd;
227+
struct iommufd_hw_pagetable *hwpt;
228+
struct iommufd_ioas *ioas;
229+
int rc = -EOPNOTSUPP;
230+
231+
if ((cmd->flags || cmd->__reserved))
232+
return -EOPNOTSUPP;
233+
234+
hwpt = iommufd_get_hwpt(ucmd, cmd->hwpt_id);
235+
if (IS_ERR(hwpt))
236+
return PTR_ERR(hwpt);
237+
238+
ioas = hwpt->ioas;
239+
rc = iopt_read_and_clear_dirty_data(&ioas->iopt, hwpt->domain,
240+
cmd->flags, cmd);
241+
242+
iommufd_put_object(&hwpt->obj);
243+
return rc;
244+
}

drivers/iommu/iommufd/io_pagetable.c

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <linux/err.h>
1616
#include <linux/slab.h>
1717
#include <linux/errno.h>
18+
#include <uapi/linux/iommufd.h>
1819

1920
#include "io_pagetable.h"
2021
#include "double_span.h"
@@ -412,6 +413,118 @@ int iopt_map_user_pages(struct iommufd_ctx *ictx, struct io_pagetable *iopt,
412413
return 0;
413414
}
414415

416+
struct iova_bitmap_fn_arg {
417+
struct io_pagetable *iopt;
418+
struct iommu_domain *domain;
419+
struct iommu_dirty_bitmap *dirty;
420+
};
421+
422+
static int __iommu_read_and_clear_dirty(struct iova_bitmap *bitmap,
423+
unsigned long iova, size_t length,
424+
void *opaque)
425+
{
426+
struct iopt_area *area;
427+
struct iopt_area_contig_iter iter;
428+
struct iova_bitmap_fn_arg *arg = opaque;
429+
struct iommu_domain *domain = arg->domain;
430+
struct iommu_dirty_bitmap *dirty = arg->dirty;
431+
const struct iommu_dirty_ops *ops = domain->dirty_ops;
432+
unsigned long last_iova = iova + length - 1;
433+
int ret;
434+
435+
iopt_for_each_contig_area(&iter, area, arg->iopt, iova, last_iova) {
436+
unsigned long last = min(last_iova, iopt_area_last_iova(area));
437+
438+
ret = ops->read_and_clear_dirty(domain, iter.cur_iova,
439+
last - iter.cur_iova + 1, 0,
440+
dirty);
441+
if (ret)
442+
return ret;
443+
}
444+
445+
if (!iopt_area_contig_done(&iter))
446+
return -EINVAL;
447+
return 0;
448+
}
449+
450+
static int
451+
iommu_read_and_clear_dirty(struct iommu_domain *domain,
452+
struct io_pagetable *iopt, unsigned long flags,
453+
struct iommu_hwpt_get_dirty_bitmap *bitmap)
454+
{
455+
const struct iommu_dirty_ops *ops = domain->dirty_ops;
456+
struct iommu_iotlb_gather gather;
457+
struct iommu_dirty_bitmap dirty;
458+
struct iova_bitmap_fn_arg arg;
459+
struct iova_bitmap *iter;
460+
int ret = 0;
461+
462+
if (!ops || !ops->read_and_clear_dirty)
463+
return -EOPNOTSUPP;
464+
465+
iter = iova_bitmap_alloc(bitmap->iova, bitmap->length,
466+
bitmap->page_size,
467+
u64_to_user_ptr(bitmap->data));
468+
if (IS_ERR(iter))
469+
return -ENOMEM;
470+
471+
iommu_dirty_bitmap_init(&dirty, iter, &gather);
472+
473+
arg.iopt = iopt;
474+
arg.domain = domain;
475+
arg.dirty = &dirty;
476+
iova_bitmap_for_each(iter, &arg, __iommu_read_and_clear_dirty);
477+
478+
iommu_iotlb_sync(domain, &gather);
479+
iova_bitmap_free(iter);
480+
481+
return ret;
482+
}
483+
484+
int iommufd_check_iova_range(struct io_pagetable *iopt,
485+
struct iommu_hwpt_get_dirty_bitmap *bitmap)
486+
{
487+
size_t iommu_pgsize = iopt->iova_alignment;
488+
u64 last_iova;
489+
490+
if (check_add_overflow(bitmap->iova, bitmap->length - 1, &last_iova))
491+
return -EOVERFLOW;
492+
493+
if (bitmap->iova > ULONG_MAX || last_iova > ULONG_MAX)
494+
return -EOVERFLOW;
495+
496+
if ((bitmap->iova & (iommu_pgsize - 1)) ||
497+
((last_iova + 1) & (iommu_pgsize - 1)))
498+
return -EINVAL;
499+
500+
if (!bitmap->page_size)
501+
return -EINVAL;
502+
503+
if ((bitmap->iova & (bitmap->page_size - 1)) ||
504+
((last_iova + 1) & (bitmap->page_size - 1)))
505+
return -EINVAL;
506+
507+
return 0;
508+
}
509+
510+
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
511+
struct iommu_domain *domain,
512+
unsigned long flags,
513+
struct iommu_hwpt_get_dirty_bitmap *bitmap)
514+
{
515+
int ret;
516+
517+
ret = iommufd_check_iova_range(iopt, bitmap);
518+
if (ret)
519+
return ret;
520+
521+
down_read(&iopt->iova_rwsem);
522+
ret = iommu_read_and_clear_dirty(domain, iopt, flags, bitmap);
523+
up_read(&iopt->iova_rwsem);
524+
525+
return ret;
526+
}
527+
415528
static int iopt_clear_dirty_data(struct io_pagetable *iopt,
416529
struct iommu_domain *domain)
417530
{

drivers/iommu/iommufd/iommufd_private.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
#include <linux/xarray.h>
99
#include <linux/refcount.h>
1010
#include <linux/uaccess.h>
11+
#include <linux/iommu.h>
12+
#include <linux/iova_bitmap.h>
1113
#include <uapi/linux/iommufd.h>
1214

1315
struct iommu_domain;
@@ -71,6 +73,10 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova,
7173
unsigned long length, unsigned long *unmapped);
7274
int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped);
7375

76+
int iopt_read_and_clear_dirty_data(struct io_pagetable *iopt,
77+
struct iommu_domain *domain,
78+
unsigned long flags,
79+
struct iommu_hwpt_get_dirty_bitmap *bitmap);
7480
int iopt_set_dirty_tracking(struct io_pagetable *iopt,
7581
struct iommu_domain *domain, bool enable);
7682

@@ -226,6 +232,8 @@ int iommufd_option_rlimit_mode(struct iommu_option *cmd,
226232
struct iommufd_ctx *ictx);
227233

228234
int iommufd_vfio_ioas(struct iommufd_ucmd *ucmd);
235+
int iommufd_check_iova_range(struct io_pagetable *iopt,
236+
struct iommu_hwpt_get_dirty_bitmap *bitmap);
229237

230238
/*
231239
* A HW pagetable is called an iommu_domain inside the kernel. This user object
@@ -252,6 +260,8 @@ iommufd_get_hwpt(struct iommufd_ucmd *ucmd, u32 id)
252260
struct iommufd_hw_pagetable, obj);
253261
}
254262
int iommufd_hwpt_set_dirty_tracking(struct iommufd_ucmd *ucmd);
263+
int iommufd_hwpt_get_dirty_bitmap(struct iommufd_ucmd *ucmd);
264+
255265
struct iommufd_hw_pagetable *
256266
iommufd_hw_pagetable_alloc(struct iommufd_ctx *ictx, struct iommufd_ioas *ioas,
257267
struct iommufd_device *idev, u32 flags,

drivers/iommu/iommufd/main.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,7 @@ union ucmd_buffer {
307307
struct iommu_destroy destroy;
308308
struct iommu_hw_info info;
309309
struct iommu_hwpt_alloc hwpt;
310+
struct iommu_hwpt_get_dirty_bitmap get_dirty_bitmap;
310311
struct iommu_hwpt_set_dirty_tracking set_dirty_tracking;
311312
struct iommu_ioas_alloc alloc;
312313
struct iommu_ioas_allow_iovas allow_iovas;
@@ -343,6 +344,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
343344
__reserved),
344345
IOCTL_OP(IOMMU_HWPT_ALLOC, iommufd_hwpt_alloc, struct iommu_hwpt_alloc,
345346
__reserved),
347+
IOCTL_OP(IOMMU_HWPT_GET_DIRTY_BITMAP, iommufd_hwpt_get_dirty_bitmap,
348+
struct iommu_hwpt_get_dirty_bitmap, data),
346349
IOCTL_OP(IOMMU_HWPT_SET_DIRTY_TRACKING, iommufd_hwpt_set_dirty_tracking,
347350
struct iommu_hwpt_set_dirty_tracking, __reserved),
348351
IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
@@ -555,5 +558,6 @@ MODULE_ALIAS_MISCDEV(VFIO_MINOR);
555558
MODULE_ALIAS("devname:vfio/vfio");
556559
#endif
557560
MODULE_IMPORT_NS(IOMMUFD_INTERNAL);
561+
MODULE_IMPORT_NS(IOMMUFD);
558562
MODULE_DESCRIPTION("I/O Address Space Management for passthrough devices");
559563
MODULE_LICENSE("GPL");

include/uapi/linux/iommufd.h

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ enum {
4848
IOMMUFD_CMD_HWPT_ALLOC,
4949
IOMMUFD_CMD_GET_HW_INFO,
5050
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING,
51+
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP,
5152
};
5253

5354
/**
@@ -481,4 +482,38 @@ struct iommu_hwpt_set_dirty_tracking {
481482
};
482483
#define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \
483484
IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING)
485+
486+
/**
487+
* struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP)
488+
* @size: sizeof(struct iommu_hwpt_get_dirty_bitmap)
489+
* @hwpt_id: HW pagetable ID that represents the IOMMU domain
490+
* @flags: Must be zero
491+
* @__reserved: Must be 0
492+
* @iova: base IOVA of the bitmap first bit
493+
* @length: IOVA range size
494+
* @page_size: page size granularity of each bit in the bitmap
495+
* @data: bitmap where to set the dirty bits. The bitmap bits each
496+
* represent a page_size which you deviate from an arbitrary iova.
497+
*
498+
* Checking a given IOVA is dirty:
499+
*
500+
* data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64))
501+
*
502+
* Walk the IOMMU pagetables for a given IOVA range to return a bitmap
503+
* with the dirty IOVAs. In doing so it will also by default clear any
504+
* dirty bit metadata set in the IOPTE.
505+
*/
506+
struct iommu_hwpt_get_dirty_bitmap {
507+
__u32 size;
508+
__u32 hwpt_id;
509+
__u32 flags;
510+
__u32 __reserved;
511+
__aligned_u64 iova;
512+
__aligned_u64 length;
513+
__aligned_u64 page_size;
514+
__aligned_u64 data;
515+
};
516+
#define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \
517+
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP)
518+
484519
#endif

0 commit comments

Comments
 (0)