@@ -23,18 +23,25 @@ static void cacheless_tid_rb_remove(struct hfi1_filedata *fdata,
23
23
static bool tid_rb_invalidate (struct mmu_interval_notifier * mni ,
24
24
const struct mmu_notifier_range * range ,
25
25
unsigned long cur_seq );
26
+ static bool tid_cover_invalidate (struct mmu_interval_notifier * mni ,
27
+ const struct mmu_notifier_range * range ,
28
+ unsigned long cur_seq );
26
29
static int program_rcvarray (struct hfi1_filedata * fd , struct tid_user_buf * ,
27
30
struct tid_group * grp ,
28
31
unsigned int start , u16 count ,
29
32
u32 * tidlist , unsigned int * tididx ,
30
33
unsigned int * pmapped );
31
- static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo ,
32
- struct tid_group * * grp );
34
+ static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo );
35
+ static void __clear_tid_node (struct hfi1_filedata * fd ,
36
+ struct tid_rb_node * node );
33
37
static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node );
34
38
35
39
static const struct mmu_interval_notifier_ops tid_mn_ops = {
36
40
.invalidate = tid_rb_invalidate ,
37
41
};
42
+ static const struct mmu_interval_notifier_ops tid_cover_ops = {
43
+ .invalidate = tid_cover_invalidate ,
44
+ };
38
45
39
46
/*
40
47
* Initialize context and file private data needed for Expected
@@ -253,53 +260,65 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
253
260
tididx = 0 , mapped , mapped_pages = 0 ;
254
261
u32 * tidlist = NULL ;
255
262
struct tid_user_buf * tidbuf ;
263
+ unsigned long mmu_seq = 0 ;
256
264
257
265
if (!PAGE_ALIGNED (tinfo -> vaddr ))
258
266
return - EINVAL ;
267
+ if (tinfo -> length == 0 )
268
+ return - EINVAL ;
259
269
260
270
tidbuf = kzalloc (sizeof (* tidbuf ), GFP_KERNEL );
261
271
if (!tidbuf )
262
272
return - ENOMEM ;
263
273
274
+ mutex_init (& tidbuf -> cover_mutex );
264
275
tidbuf -> vaddr = tinfo -> vaddr ;
265
276
tidbuf -> length = tinfo -> length ;
266
277
tidbuf -> psets = kcalloc (uctxt -> expected_count , sizeof (* tidbuf -> psets ),
267
278
GFP_KERNEL );
268
279
if (!tidbuf -> psets ) {
269
- kfree (tidbuf );
270
- return - ENOMEM ;
280
+ ret = - ENOMEM ;
281
+ goto fail_release_mem ;
282
+ }
283
+
284
+ if (fd -> use_mn ) {
285
+ ret = mmu_interval_notifier_insert (
286
+ & tidbuf -> notifier , current -> mm ,
287
+ tidbuf -> vaddr , tidbuf -> npages * PAGE_SIZE ,
288
+ & tid_cover_ops );
289
+ if (ret )
290
+ goto fail_release_mem ;
291
+ mmu_seq = mmu_interval_read_begin (& tidbuf -> notifier );
271
292
}
272
293
273
294
pinned = pin_rcv_pages (fd , tidbuf );
274
295
if (pinned <= 0 ) {
275
- kfree (tidbuf -> psets );
276
- kfree (tidbuf );
277
- return pinned ;
296
+ ret = (pinned < 0 ) ? pinned : - ENOSPC ;
297
+ goto fail_unpin ;
278
298
}
279
299
280
300
/* Find sets of physically contiguous pages */
281
301
tidbuf -> n_psets = find_phys_blocks (tidbuf , pinned );
282
302
283
- /*
284
- * We don't need to access this under a lock since tid_used is per
285
- * process and the same process cannot be in hfi1_user_exp_rcv_clear()
286
- * and hfi1_user_exp_rcv_setup() at the same time.
287
- */
303
+ /* Reserve the number of expected tids to be used. */
288
304
spin_lock (& fd -> tid_lock );
289
305
if (fd -> tid_used + tidbuf -> n_psets > fd -> tid_limit )
290
306
pageset_count = fd -> tid_limit - fd -> tid_used ;
291
307
else
292
308
pageset_count = tidbuf -> n_psets ;
309
+ fd -> tid_used += pageset_count ;
293
310
spin_unlock (& fd -> tid_lock );
294
311
295
- if (!pageset_count )
296
- goto bail ;
312
+ if (!pageset_count ) {
313
+ ret = - ENOSPC ;
314
+ goto fail_unreserve ;
315
+ }
297
316
298
317
ngroups = pageset_count / dd -> rcv_entries .group_size ;
299
318
tidlist = kcalloc (pageset_count , sizeof (* tidlist ), GFP_KERNEL );
300
319
if (!tidlist ) {
301
320
ret = - ENOMEM ;
302
- goto nomem ;
321
+ goto fail_unreserve ;
303
322
}
304
323
305
324
tididx = 0 ;
@@ -395,43 +414,78 @@ int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd,
395
414
}
396
415
unlock :
397
416
mutex_unlock (& uctxt -> exp_mutex );
398
- nomem :
399
417
hfi1_cdbg (TID , "total mapped: tidpairs:%u pages:%u (%d)" , tididx ,
400
418
mapped_pages , ret );
401
- if (tididx ) {
402
- spin_lock (& fd -> tid_lock );
403
- fd -> tid_used += tididx ;
404
- spin_unlock (& fd -> tid_lock );
405
- tinfo -> tidcnt = tididx ;
406
- tinfo -> length = mapped_pages * PAGE_SIZE ;
407
-
408
- if (copy_to_user (u64_to_user_ptr (tinfo -> tidlist ),
409
- tidlist , sizeof (tidlist [0 ]) * tididx )) {
410
- /*
411
- * On failure to copy to the user level, we need to undo
412
- * everything done so far so we don't leak resources.
413
- */
414
- tinfo -> tidlist = (unsigned long )& tidlist ;
415
- hfi1_user_exp_rcv_clear (fd , tinfo );
416
- tinfo -> tidlist = 0 ;
417
- ret = - EFAULT ;
418
- goto bail ;
419
+
420
+ /* fail if nothing was programmed, set error if none provided */
421
+ if (tididx == 0 ) {
422
+ if (ret >= 0 )
423
+ ret = - ENOSPC ;
424
+ goto fail_unreserve ;
425
+ }
426
+
427
+ /* adjust reserved tid_used to actual count */
428
+ spin_lock (& fd -> tid_lock );
429
+ fd -> tid_used -= pageset_count - tididx ;
430
+ spin_unlock (& fd -> tid_lock );
431
+
432
+ /* unpin all pages not covered by a TID */
433
+ unpin_rcv_pages (fd , tidbuf , NULL , mapped_pages , pinned - mapped_pages ,
434
+ false);
435
+
436
+ if (fd -> use_mn ) {
437
+ /* check for an invalidate during setup */
438
+ bool fail = false;
439
+
440
+ mutex_lock (& tidbuf -> cover_mutex );
441
+ fail = mmu_interval_read_retry (& tidbuf -> notifier , mmu_seq );
442
+ mutex_unlock (& tidbuf -> cover_mutex );
443
+
444
+ if (fail ) {
445
+ ret = - EBUSY ;
446
+ goto fail_unprogram ;
419
447
}
420
448
}
421
449
422
- /*
423
- * If not everything was mapped (due to insufficient RcvArray entries,
424
- * for example), unpin all unmapped pages so we can pin them nex time.
425
- */
426
- if (mapped_pages != pinned )
427
- unpin_rcv_pages (fd , tidbuf , NULL , mapped_pages ,
428
- (pinned - mapped_pages ), false);
429
- bail :
450
+ tinfo -> tidcnt = tididx ;
451
+ tinfo -> length = mapped_pages * PAGE_SIZE ;
452
+
453
+ if (copy_to_user (u64_to_user_ptr (tinfo -> tidlist ),
454
+ tidlist , sizeof (tidlist [0 ]) * tididx )) {
455
+ ret = - EFAULT ;
456
+ goto fail_unprogram ;
457
+ }
458
+
459
+ if (fd -> use_mn )
460
+ mmu_interval_notifier_remove (& tidbuf -> notifier );
461
+ kfree (tidbuf -> pages );
430
462
kfree (tidbuf -> psets );
463
+ kfree (tidbuf );
431
464
kfree (tidlist );
465
+ return 0 ;
466
+
467
+ fail_unprogram :
468
+ /* unprogram, unmap, and unpin all allocated TIDs */
469
+ tinfo -> tidlist = (unsigned long )tidlist ;
470
+ hfi1_user_exp_rcv_clear (fd , tinfo );
471
+ tinfo -> tidlist = 0 ;
472
+ pinned = 0 ; /* nothing left to unpin */
473
+ pageset_count = 0 ; /* nothing left reserved */
474
+ fail_unreserve :
475
+ spin_lock (& fd -> tid_lock );
476
+ fd -> tid_used -= pageset_count ;
477
+ spin_unlock (& fd -> tid_lock );
478
+ fail_unpin :
479
+ if (fd -> use_mn )
480
+ mmu_interval_notifier_remove (& tidbuf -> notifier );
481
+ if (pinned > 0 )
482
+ unpin_rcv_pages (fd , tidbuf , NULL , 0 , pinned , false);
483
+ fail_release_mem :
432
484
kfree (tidbuf -> pages );
485
+ kfree (tidbuf -> psets );
433
486
kfree (tidbuf );
434
- return ret > 0 ? 0 : ret ;
487
+ kfree (tidlist );
488
+ return ret ;
435
489
}
436
490
437
491
int hfi1_user_exp_rcv_clear (struct hfi1_filedata * fd ,
@@ -452,7 +506,7 @@ int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd,
452
506
453
507
mutex_lock (& uctxt -> exp_mutex );
454
508
for (tididx = 0 ; tididx < tinfo -> tidcnt ; tididx ++ ) {
455
- ret = unprogram_rcvarray (fd , tidinfo [tididx ], NULL );
509
+ ret = unprogram_rcvarray (fd , tidinfo [tididx ]);
456
510
if (ret ) {
457
511
hfi1_cdbg (TID , "Failed to unprogram rcv array %d" ,
458
512
ret );
@@ -706,6 +760,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
706
760
}
707
761
708
762
node -> fdata = fd ;
763
+ mutex_init (& node -> invalidate_mutex );
709
764
node -> phys = page_to_phys (pages [0 ]);
710
765
node -> npages = npages ;
711
766
node -> rcventry = rcventry ;
@@ -721,11 +776,6 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
721
776
& tid_mn_ops );
722
777
if (ret )
723
778
goto out_unmap ;
724
- /*
725
- * FIXME: This is in the wrong order, the notifier should be
726
- * established before the pages are pinned by pin_rcv_pages.
727
- */
728
- mmu_interval_read_begin (& node -> notifier );
729
779
}
730
780
fd -> entry_to_rb [node -> rcventry - uctxt -> expected_base ] = node ;
731
781
@@ -745,8 +795,7 @@ static int set_rcvarray_entry(struct hfi1_filedata *fd,
745
795
return - EFAULT ;
746
796
}
747
797
748
- static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo ,
749
- struct tid_group * * grp )
798
+ static int unprogram_rcvarray (struct hfi1_filedata * fd , u32 tidinfo )
750
799
{
751
800
struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
752
801
struct hfi1_devdata * dd = uctxt -> dd ;
@@ -769,33 +818,41 @@ static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
769
818
if (!node || node -> rcventry != (uctxt -> expected_base + rcventry ))
770
819
return - EBADF ;
771
820
772
- if (grp )
773
- * grp = node -> grp ;
774
-
775
821
if (fd -> use_mn )
776
822
mmu_interval_notifier_remove (& node -> notifier );
777
823
cacheless_tid_rb_remove (fd , node );
778
824
779
825
return 0 ;
780
826
}
781
827
782
- static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
828
+ static void __clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
783
829
{
784
830
struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
785
831
struct hfi1_devdata * dd = uctxt -> dd ;
786
832
833
+ mutex_lock (& node -> invalidate_mutex );
834
+ if (node -> freed )
835
+ goto done ;
836
+ node -> freed = true;
837
+
787
838
trace_hfi1_exp_tid_unreg (uctxt -> ctxt , fd -> subctxt , node -> rcventry ,
788
839
node -> npages ,
789
840
node -> notifier .interval_tree .start , node -> phys ,
790
841
node -> dma_addr );
791
842
792
- /*
793
- * Make sure device has seen the write before we unpin the
794
- * pages.
795
- */
843
+ /* Make sure device has seen the write before pages are unpinned */
796
844
hfi1_put_tid (dd , node -> rcventry , PT_INVALID_FLUSH , 0 , 0 );
797
845
798
846
unpin_rcv_pages (fd , NULL , node , 0 , node -> npages , true);
847
+ done :
848
+ mutex_unlock (& node -> invalidate_mutex );
849
+ }
850
+
851
+ static void clear_tid_node (struct hfi1_filedata * fd , struct tid_rb_node * node )
852
+ {
853
+ struct hfi1_ctxtdata * uctxt = fd -> uctxt ;
854
+
855
+ __clear_tid_node (fd , node );
799
856
800
857
node -> grp -> used -- ;
801
858
node -> grp -> map &= ~(1 << (node -> rcventry - node -> grp -> base ));
@@ -854,10 +911,16 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
854
911
if (node -> freed )
855
912
return true;
856
913
914
+ /* take action only if unmapping */
915
+ if (range -> event != MMU_NOTIFY_UNMAP )
916
+ return true;
917
+
857
918
trace_hfi1_exp_tid_inval (uctxt -> ctxt , fdata -> subctxt ,
858
919
node -> notifier .interval_tree .start ,
859
920
node -> rcventry , node -> npages , node -> dma_addr );
860
- node -> freed = true;
921
+
922
+ /* clear the hardware rcvarray entry */
923
+ __clear_tid_node (fdata , node );
861
924
862
925
spin_lock (& fdata -> invalid_lock );
863
926
if (fdata -> invalid_tid_idx < uctxt -> expected_count ) {
@@ -887,6 +950,23 @@ static bool tid_rb_invalidate(struct mmu_interval_notifier *mni,
887
950
return true;
888
951
}
889
952
953
+ static bool tid_cover_invalidate (struct mmu_interval_notifier * mni ,
954
+ const struct mmu_notifier_range * range ,
955
+ unsigned long cur_seq )
956
+ {
957
+ struct tid_user_buf * tidbuf =
958
+ container_of (mni , struct tid_user_buf , notifier );
959
+
960
+ /* take action only if unmapping */
961
+ if (range -> event == MMU_NOTIFY_UNMAP ) {
962
+ mutex_lock (& tidbuf -> cover_mutex );
963
+ mmu_interval_set_seq (mni , cur_seq );
964
+ mutex_unlock (& tidbuf -> cover_mutex );
965
+ }
966
+
967
+ return true;
968
+ }
969
+
890
970
static void cacheless_tid_rb_remove (struct hfi1_filedata * fdata ,
891
971
struct tid_rb_node * tnode )
892
972
{
0 commit comments