|
19 | 19 | #include "xfs_reflink.h"
|
20 | 20 | #include "xfs_errortag.h"
|
21 | 21 | #include "xfs_error.h"
|
| 22 | +#include "xfs_icache.h" |
22 | 23 |
|
23 | 24 | struct xfs_writepage_ctx {
|
24 | 25 | struct iomap_writepage_ctx ctx;
|
@@ -533,7 +534,39 @@ xfs_vm_swap_activate(
|
533 | 534 | struct file *swap_file,
|
534 | 535 | sector_t *span)
|
535 | 536 | {
|
536 |
| - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; |
| 537 | + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); |
| 538 | + |
| 539 | + /* |
| 540 | + * Swap file activation can race against concurrent shared extent |
| 541 | + * removal in files that have been cloned. If this happens, |
| 542 | + * iomap_swapfile_iter() can fail because it encountered a shared |
| 543 | + * extent even though an operation is in progress to remove those |
| 544 | + * shared extents. |
| 545 | + * |
| 546 | + * This race becomes problematic when we defer extent removal |
| 547 | + * operations beyond the end of a syscall (i.e. use async background |
| 548 | + * processing algorithms). Users think the extents are no longer |
| 549 | + * shared, but iomap_swapfile_iter() still sees them as shared |
| 550 | + * because the refcountbt entries for the extents being removed have |
| 551 | + * not yet been updated. Hence the swapon call fails unexpectedly. |
| 552 | + * |
| 553 | + * The race condition is currently most obvious from the unlink() |
| 554 | + * operation as extent removal is deferred until after the last |
| 555 | + * reference to the inode goes away. We then process the extent |
| 556 | + * removal asynchronously, hence triggers the "syscall completed but |
| 557 | + * work not done" condition mentioned above. To close this race |
| 558 | + * window, we need to flush any pending inodegc operations to ensure |
| 559 | + * they have updated the refcountbt records before we try to map the |
| 560 | + * swapfile. |
| 561 | + */ |
| 562 | + xfs_inodegc_flush(ip->i_mount); |
| 563 | + |
| 564 | + /* |
| 565 | + * Direct the swap code to the correct block device when this file |
| 566 | + * sits on the RT device. |
| 567 | + */ |
| 568 | + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; |
| 569 | + |
537 | 570 | return iomap_swapfile_activate(sis, swap_file, span,
|
538 | 571 | &xfs_read_iomap_ops);
|
539 | 572 | }
|
|
0 commit comments