@@ -103,9 +103,9 @@ struct vfio_dma {
103
103
struct vfio_batch {
104
104
struct page * * pages ; /* for pin_user_pages_remote */
105
105
struct page * fallback_page ; /* if pages alloc fails */
106
- int capacity ; /* length of pages array */
107
- int size ; /* of batch currently */
108
- int offset ; /* of next entry in pages */
106
+ unsigned int capacity ; /* length of pages array */
107
+ unsigned int size ; /* of batch currently */
108
+ unsigned int offset ; /* of next entry in pages */
109
109
};
110
110
111
111
struct vfio_iommu_group {
@@ -471,12 +471,12 @@ static int put_pfn(unsigned long pfn, int prot)
471
471
472
472
#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
473
473
474
- static void vfio_batch_init (struct vfio_batch * batch )
474
+ static void __vfio_batch_init (struct vfio_batch * batch , bool single )
475
475
{
476
476
batch -> size = 0 ;
477
477
batch -> offset = 0 ;
478
478
479
- if (unlikely (disable_hugepages ))
479
+ if (single || unlikely (disable_hugepages ))
480
480
goto fallback ;
481
481
482
482
batch -> pages = (struct page * * ) __get_free_page (GFP_KERNEL );
@@ -491,6 +491,16 @@ static void vfio_batch_init(struct vfio_batch *batch)
491
491
batch -> capacity = 1 ;
492
492
}
493
493
494
+ static void vfio_batch_init (struct vfio_batch * batch )
495
+ {
496
+ __vfio_batch_init (batch , false);
497
+ }
498
+
499
+ static void vfio_batch_init_single (struct vfio_batch * batch )
500
+ {
501
+ __vfio_batch_init (batch , true);
502
+ }
503
+
494
504
static void vfio_batch_unpin (struct vfio_batch * batch , struct vfio_dma * dma )
495
505
{
496
506
while (batch -> size ) {
@@ -510,7 +520,7 @@ static void vfio_batch_fini(struct vfio_batch *batch)
510
520
511
521
static int follow_fault_pfn (struct vm_area_struct * vma , struct mm_struct * mm ,
512
522
unsigned long vaddr , unsigned long * pfn ,
513
- bool write_fault )
523
+ unsigned long * addr_mask , bool write_fault )
514
524
{
515
525
struct follow_pfnmap_args args = { .vma = vma , .address = vaddr };
516
526
int ret ;
@@ -534,36 +544,46 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
534
544
return ret ;
535
545
}
536
546
537
- if (write_fault && !args .writable )
547
+ if (write_fault && !args .writable ) {
538
548
ret = - EFAULT ;
539
- else
549
+ } else {
540
550
* pfn = args .pfn ;
551
+ * addr_mask = args .addr_mask ;
552
+ }
541
553
542
554
follow_pfnmap_end (& args );
543
555
return ret ;
544
556
}
545
557
546
558
/*
547
559
* Returns the positive number of pfns successfully obtained or a negative
548
- * error code.
560
+ * error code. The initial pfn is stored in the pfn arg. For page-backed
561
+ * pfns, the provided batch is also updated to indicate the filled pages and
562
+ * initial offset. For VM_PFNMAP pfns, only the returned number of pfns and
563
+ * returned initial pfn are provided; subsequent pfns are contiguous.
549
564
*/
550
- static int vaddr_get_pfns (struct mm_struct * mm , unsigned long vaddr ,
551
- long npages , int prot , unsigned long * pfn ,
552
- struct page * * pages )
565
+ static long vaddr_get_pfns (struct mm_struct * mm , unsigned long vaddr ,
566
+ unsigned long npages , int prot , unsigned long * pfn ,
567
+ struct vfio_batch * batch )
553
568
{
569
+ unsigned long pin_pages = min_t (unsigned long , npages , batch -> capacity );
554
570
struct vm_area_struct * vma ;
555
571
unsigned int flags = 0 ;
556
- int ret ;
572
+ long ret ;
557
573
558
574
if (prot & IOMMU_WRITE )
559
575
flags |= FOLL_WRITE ;
560
576
561
577
mmap_read_lock (mm );
562
- ret = pin_user_pages_remote (mm , vaddr , npages , flags | FOLL_LONGTERM ,
563
- pages , NULL );
578
+ ret = pin_user_pages_remote (mm , vaddr , pin_pages , flags | FOLL_LONGTERM ,
579
+ batch -> pages , NULL );
564
580
if (ret > 0 ) {
565
- * pfn = page_to_pfn (pages [0 ]);
581
+ * pfn = page_to_pfn (batch -> pages [0 ]);
582
+ batch -> size = ret ;
583
+ batch -> offset = 0 ;
566
584
goto done ;
585
+ } else if (!ret ) {
586
+ ret = - EFAULT ;
567
587
}
568
588
569
589
vaddr = untagged_addr_remote (mm , vaddr );
@@ -572,15 +592,22 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
572
592
vma = vma_lookup (mm , vaddr );
573
593
574
594
if (vma && vma -> vm_flags & VM_PFNMAP ) {
575
- ret = follow_fault_pfn (vma , mm , vaddr , pfn , prot & IOMMU_WRITE );
595
+ unsigned long addr_mask ;
596
+
597
+ ret = follow_fault_pfn (vma , mm , vaddr , pfn , & addr_mask ,
598
+ prot & IOMMU_WRITE );
576
599
if (ret == - EAGAIN )
577
600
goto retry ;
578
601
579
602
if (!ret ) {
580
- if (is_invalid_reserved_pfn (* pfn ))
581
- ret = 1 ;
582
- else
603
+ if (is_invalid_reserved_pfn (* pfn )) {
604
+ unsigned long epfn ;
605
+
606
+ epfn = (* pfn | (~addr_mask >> PAGE_SHIFT )) + 1 ;
607
+ ret = min_t (long , npages , epfn - * pfn );
608
+ } else {
583
609
ret = - EFAULT ;
610
+ }
584
611
}
585
612
}
586
613
done :
@@ -594,7 +621,7 @@ static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
594
621
* first page and all consecutive pages with the same locking.
595
622
*/
596
623
static long vfio_pin_pages_remote (struct vfio_dma * dma , unsigned long vaddr ,
597
- long npage , unsigned long * pfn_base ,
624
+ unsigned long npage , unsigned long * pfn_base ,
598
625
unsigned long limit , struct vfio_batch * batch )
599
626
{
600
627
unsigned long pfn ;
@@ -616,32 +643,42 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
616
643
* pfn_base = 0 ;
617
644
}
618
645
646
+ if (unlikely (disable_hugepages ))
647
+ npage = 1 ;
648
+
619
649
while (npage ) {
620
650
if (!batch -> size ) {
621
651
/* Empty batch, so refill it. */
622
- long req_pages = min_t (long , npage , batch -> capacity );
623
-
624
- ret = vaddr_get_pfns (mm , vaddr , req_pages , dma -> prot ,
625
- & pfn , batch -> pages );
652
+ ret = vaddr_get_pfns (mm , vaddr , npage , dma -> prot ,
653
+ & pfn , batch );
626
654
if (ret < 0 )
627
655
goto unpin_out ;
628
656
629
- batch -> size = ret ;
630
- batch -> offset = 0 ;
631
-
632
657
if (!* pfn_base ) {
633
658
* pfn_base = pfn ;
634
659
rsvd = is_invalid_reserved_pfn (* pfn_base );
635
660
}
661
+
662
+ /* Handle pfnmap */
663
+ if (!batch -> size ) {
664
+ if (pfn != * pfn_base + pinned || !rsvd )
665
+ goto out ;
666
+
667
+ pinned += ret ;
668
+ npage -= ret ;
669
+ vaddr += (PAGE_SIZE * ret );
670
+ iova += (PAGE_SIZE * ret );
671
+ continue ;
672
+ }
636
673
}
637
674
638
675
/*
639
- * pfn is preset for the first iteration of this inner loop and
640
- * updated at the end to handle a VM_PFNMAP pfn. In that case,
641
- * batch->pages isn't valid (there's no struct page), so allow
642
- * batch->pages to be touched only when there's more than one
643
- * pfn to check, which guarantees the pfns are from a
644
- * !VM_PFNMAP vma .
676
+ * pfn is preset for the first iteration of this inner loop
677
+ * due to the fact that vaddr_get_pfns() needs to provide the
678
+ * initial pfn for pfnmaps. Therefore to reduce redundancy,
679
+ * the next pfn is fetched at the end of the loop.
680
+ * A PageReserved() page could still qualify as page backed
681
+ * and rsvd here, and therefore continues to use the batch .
645
682
*/
646
683
while (true) {
647
684
if (pfn != * pfn_base + pinned ||
@@ -676,21 +713,12 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
676
713
677
714
pfn = page_to_pfn (batch -> pages [batch -> offset ]);
678
715
}
679
-
680
- if (unlikely (disable_hugepages ))
681
- break ;
682
716
}
683
717
684
718
out :
685
719
ret = vfio_lock_acct (dma , lock_acct , false);
686
720
687
721
unpin_out :
688
- if (batch -> size == 1 && !batch -> offset ) {
689
- /* May be a VM_PFNMAP pfn, which the batch can't remember. */
690
- put_pfn (pfn , dma -> prot );
691
- batch -> size = 0 ;
692
- }
693
-
694
722
if (ret < 0 ) {
695
723
if (pinned && !rsvd ) {
696
724
for (pfn = * pfn_base ; pinned ; pfn ++ , pinned -- )
@@ -705,7 +733,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
705
733
}
706
734
707
735
static long vfio_unpin_pages_remote (struct vfio_dma * dma , dma_addr_t iova ,
708
- unsigned long pfn , long npage ,
736
+ unsigned long pfn , unsigned long npage ,
709
737
bool do_accounting )
710
738
{
711
739
long unlocked = 0 , locked = 0 ;
@@ -728,15 +756,17 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
728
756
static int vfio_pin_page_external (struct vfio_dma * dma , unsigned long vaddr ,
729
757
unsigned long * pfn_base , bool do_accounting )
730
758
{
731
- struct page * pages [ 1 ] ;
759
+ struct vfio_batch batch ;
732
760
struct mm_struct * mm ;
733
761
int ret ;
734
762
735
763
mm = dma -> mm ;
736
764
if (!mmget_not_zero (mm ))
737
765
return - ENODEV ;
738
766
739
- ret = vaddr_get_pfns (mm , vaddr , 1 , dma -> prot , pfn_base , pages );
767
+ vfio_batch_init_single (& batch );
768
+
769
+ ret = vaddr_get_pfns (mm , vaddr , 1 , dma -> prot , pfn_base , & batch );
740
770
if (ret != 1 )
741
771
goto out ;
742
772
@@ -755,6 +785,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
755
785
}
756
786
757
787
out :
788
+ vfio_batch_fini (& batch );
758
789
mmput (mm );
759
790
return ret ;
760
791
}
0 commit comments