@@ -9078,9 +9078,9 @@ static ssize_t btrfs_encoded_read_inline(
9078
9078
}
9079
9079
9080
9080
struct btrfs_encoded_read_private {
9081
- wait_queue_head_t wait ;
9081
+ struct completion done ;
9082
9082
void * uring_ctx ;
9083
- atomic_t pending ;
9083
+ refcount_t pending_refs ;
9084
9084
blk_status_t status ;
9085
9085
};
9086
9086
@@ -9099,14 +9099,14 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
9099
9099
*/
9100
9100
WRITE_ONCE (priv -> status , bbio -> bio .bi_status );
9101
9101
}
9102
- if (atomic_dec_and_test (& priv -> pending )) {
9102
+ if (refcount_dec_and_test (& priv -> pending_refs )) {
9103
9103
int err = blk_status_to_errno (READ_ONCE (priv -> status ));
9104
9104
9105
9105
if (priv -> uring_ctx ) {
9106
9106
btrfs_uring_read_extent_endio (priv -> uring_ctx , err );
9107
9107
kfree (priv );
9108
9108
} else {
9109
- wake_up (& priv -> wait );
9109
+ complete (& priv -> done );
9110
9110
}
9111
9111
}
9112
9112
bio_put (& bbio -> bio );
@@ -9126,8 +9126,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
9126
9126
if (!priv )
9127
9127
return - ENOMEM ;
9128
9128
9129
- init_waitqueue_head (& priv -> wait );
9130
- atomic_set (& priv -> pending , 1 );
9129
+ init_completion (& priv -> done );
9130
+ refcount_set (& priv -> pending_refs , 1 );
9131
9131
priv -> status = 0 ;
9132
9132
priv -> uring_ctx = uring_ctx ;
9133
9133
@@ -9140,7 +9140,7 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
9140
9140
size_t bytes = min_t (u64 , disk_io_size , PAGE_SIZE );
9141
9141
9142
9142
if (bio_add_page (& bbio -> bio , pages [i ], bytes , 0 ) < bytes ) {
9143
- atomic_inc (& priv -> pending );
9143
+ refcount_inc (& priv -> pending_refs );
9144
9144
btrfs_submit_bbio (bbio , 0 );
9145
9145
9146
9146
bbio = btrfs_bio_alloc (BIO_MAX_VECS , REQ_OP_READ , fs_info ,
@@ -9155,11 +9155,11 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
9155
9155
disk_io_size -= bytes ;
9156
9156
} while (disk_io_size );
9157
9157
9158
- atomic_inc (& priv -> pending );
9158
+ refcount_inc (& priv -> pending_refs );
9159
9159
btrfs_submit_bbio (bbio , 0 );
9160
9160
9161
9161
if (uring_ctx ) {
9162
- if (atomic_dec_return (& priv -> pending ) == 0 ) {
9162
+ if (refcount_dec_and_test (& priv -> pending_refs ) ) {
9163
9163
ret = blk_status_to_errno (READ_ONCE (priv -> status ));
9164
9164
btrfs_uring_read_extent_endio (uring_ctx , ret );
9165
9165
kfree (priv );
@@ -9168,8 +9168,8 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
9168
9168
9169
9169
return - EIOCBQUEUED ;
9170
9170
} else {
9171
- if (atomic_dec_return (& priv -> pending ) != 0 )
9172
- io_wait_event ( priv -> wait , ! atomic_read ( & priv -> pending ) );
9171
+ if (! refcount_dec_and_test (& priv -> pending_refs ) )
9172
+ wait_for_completion_io ( & priv -> done );
9173
9173
/* See btrfs_encoded_read_endio() for ordering. */
9174
9174
ret = blk_status_to_errno (READ_ONCE (priv -> status ));
9175
9175
kfree (priv );
@@ -9799,15 +9799,25 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9799
9799
struct btrfs_fs_info * fs_info = root -> fs_info ;
9800
9800
struct extent_io_tree * io_tree = & BTRFS_I (inode )-> io_tree ;
9801
9801
struct extent_state * cached_state = NULL ;
9802
- struct extent_map * em = NULL ;
9803
9802
struct btrfs_chunk_map * map = NULL ;
9804
9803
struct btrfs_device * device = NULL ;
9805
9804
struct btrfs_swap_info bsi = {
9806
9805
.lowest_ppage = (sector_t )- 1ULL ,
9807
9806
};
9807
+ struct btrfs_backref_share_check_ctx * backref_ctx = NULL ;
9808
+ struct btrfs_path * path = NULL ;
9808
9809
int ret = 0 ;
9809
9810
u64 isize ;
9810
- u64 start ;
9811
+ u64 prev_extent_end = 0 ;
9812
+
9813
+ /*
9814
+ * Acquire the inode's mmap lock to prevent races with memory mapped
9815
+ * writes, as they could happen after we flush delalloc below and before
9816
+ * we lock the extent range further below. The inode was already locked
9817
+ * up in the call chain.
9818
+ */
9819
+ btrfs_assert_inode_locked (BTRFS_I (inode ));
9820
+ down_write (& BTRFS_I (inode )-> i_mmap_lock );
9811
9821
9812
9822
/*
9813
9823
* If the swap file was just created, make sure delalloc is done. If the
@@ -9816,22 +9826,32 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9816
9826
*/
9817
9827
ret = btrfs_wait_ordered_range (BTRFS_I (inode ), 0 , (u64 )- 1 );
9818
9828
if (ret )
9819
- return ret ;
9829
+ goto out_unlock_mmap ;
9820
9830
9821
9831
/*
9822
9832
* The inode is locked, so these flags won't change after we check them.
9823
9833
*/
9824
9834
if (BTRFS_I (inode )-> flags & BTRFS_INODE_COMPRESS ) {
9825
9835
btrfs_warn (fs_info , "swapfile must not be compressed" );
9826
- return - EINVAL ;
9836
+ ret = - EINVAL ;
9837
+ goto out_unlock_mmap ;
9827
9838
}
9828
9839
if (!(BTRFS_I (inode )-> flags & BTRFS_INODE_NODATACOW )) {
9829
9840
btrfs_warn (fs_info , "swapfile must not be copy-on-write" );
9830
- return - EINVAL ;
9841
+ ret = - EINVAL ;
9842
+ goto out_unlock_mmap ;
9831
9843
}
9832
9844
if (!(BTRFS_I (inode )-> flags & BTRFS_INODE_NODATASUM )) {
9833
9845
btrfs_warn (fs_info , "swapfile must not be checksummed" );
9834
- return - EINVAL ;
9846
+ ret = - EINVAL ;
9847
+ goto out_unlock_mmap ;
9848
+ }
9849
+
9850
+ path = btrfs_alloc_path ();
9851
+ backref_ctx = btrfs_alloc_backref_share_check_ctx ();
9852
+ if (!path || !backref_ctx ) {
9853
+ ret = - ENOMEM ;
9854
+ goto out_unlock_mmap ;
9835
9855
}
9836
9856
9837
9857
/*
@@ -9846,7 +9866,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9846
9866
if (!btrfs_exclop_start (fs_info , BTRFS_EXCLOP_SWAP_ACTIVATE )) {
9847
9867
btrfs_warn (fs_info ,
9848
9868
"cannot activate swapfile while exclusive operation is running" );
9849
- return - EBUSY ;
9869
+ ret = - EBUSY ;
9870
+ goto out_unlock_mmap ;
9850
9871
}
9851
9872
9852
9873
/*
@@ -9860,7 +9881,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9860
9881
btrfs_exclop_finish (fs_info );
9861
9882
btrfs_warn (fs_info ,
9862
9883
"cannot activate swapfile because snapshot creation is in progress" );
9863
- return - EINVAL ;
9884
+ ret = - EINVAL ;
9885
+ goto out_unlock_mmap ;
9864
9886
}
9865
9887
/*
9866
9888
* Snapshots can create extents which require COW even if NODATACOW is
@@ -9881,32 +9903,48 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9881
9903
btrfs_warn (fs_info ,
9882
9904
"cannot activate swapfile because subvolume %llu is being deleted" ,
9883
9905
btrfs_root_id (root ));
9884
- return - EPERM ;
9906
+ ret = - EPERM ;
9907
+ goto out_unlock_mmap ;
9885
9908
}
9886
9909
atomic_inc (& root -> nr_swapfiles );
9887
9910
spin_unlock (& root -> root_item_lock );
9888
9911
9889
9912
isize = ALIGN_DOWN (inode -> i_size , fs_info -> sectorsize );
9890
9913
9891
9914
lock_extent (io_tree , 0 , isize - 1 , & cached_state );
9892
- start = 0 ;
9893
- while (start < isize ) {
9894
- u64 logical_block_start , physical_block_start ;
9915
+ while (prev_extent_end < isize ) {
9916
+ struct btrfs_key key ;
9917
+ struct extent_buffer * leaf ;
9918
+ struct btrfs_file_extent_item * ei ;
9895
9919
struct btrfs_block_group * bg ;
9896
- u64 len = isize - start ;
9920
+ u64 logical_block_start ;
9921
+ u64 physical_block_start ;
9922
+ u64 extent_gen ;
9923
+ u64 disk_bytenr ;
9924
+ u64 len ;
9897
9925
9898
- em = btrfs_get_extent (BTRFS_I (inode ), NULL , start , len );
9899
- if (IS_ERR (em )) {
9900
- ret = PTR_ERR (em );
9926
+ key .objectid = btrfs_ino (BTRFS_I (inode ));
9927
+ key .type = BTRFS_EXTENT_DATA_KEY ;
9928
+ key .offset = prev_extent_end ;
9929
+
9930
+ ret = btrfs_search_slot (NULL , root , & key , path , 0 , 0 );
9931
+ if (ret < 0 )
9901
9932
goto out ;
9902
- }
9903
9933
9904
- if (em -> disk_bytenr == EXTENT_MAP_HOLE ) {
9934
+ /*
9935
+ * If key not found it means we have an implicit hole (NO_HOLES
9936
+ * is enabled).
9937
+ */
9938
+ if (ret > 0 ) {
9905
9939
btrfs_warn (fs_info , "swapfile must not have holes" );
9906
9940
ret = - EINVAL ;
9907
9941
goto out ;
9908
9942
}
9909
- if (em -> disk_bytenr == EXTENT_MAP_INLINE ) {
9943
+
9944
+ leaf = path -> nodes [0 ];
9945
+ ei = btrfs_item_ptr (leaf , path -> slots [0 ], struct btrfs_file_extent_item );
9946
+
9947
+ if (btrfs_file_extent_type (leaf , ei ) == BTRFS_FILE_EXTENT_INLINE ) {
9910
9948
/*
9911
9949
* It's unlikely we'll ever actually find ourselves
9912
9950
* here, as a file small enough to fit inline won't be
@@ -9918,23 +9956,45 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9918
9956
ret = - EINVAL ;
9919
9957
goto out ;
9920
9958
}
9921
- if (extent_map_is_compressed (em )) {
9959
+
9960
+ if (btrfs_file_extent_compression (leaf , ei ) != BTRFS_COMPRESS_NONE ) {
9922
9961
btrfs_warn (fs_info , "swapfile must not be compressed" );
9923
9962
ret = - EINVAL ;
9924
9963
goto out ;
9925
9964
}
9926
9965
9927
- logical_block_start = extent_map_block_start (em ) + (start - em -> start );
9928
- len = min (len , em -> len - (start - em -> start ));
9929
- free_extent_map (em );
9930
- em = NULL ;
9966
+ disk_bytenr = btrfs_file_extent_disk_bytenr (leaf , ei );
9967
+ if (disk_bytenr == 0 ) {
9968
+ btrfs_warn (fs_info , "swapfile must not have holes" );
9969
+ ret = - EINVAL ;
9970
+ goto out ;
9971
+ }
9972
+
9973
+ logical_block_start = disk_bytenr + btrfs_file_extent_offset (leaf , ei );
9974
+ extent_gen = btrfs_file_extent_generation (leaf , ei );
9975
+ prev_extent_end = btrfs_file_extent_end (path );
9931
9976
9932
- ret = can_nocow_extent (inode , start , & len , NULL , false, true);
9977
+ if (prev_extent_end > isize )
9978
+ len = isize - key .offset ;
9979
+ else
9980
+ len = btrfs_file_extent_num_bytes (leaf , ei );
9981
+
9982
+ backref_ctx -> curr_leaf_bytenr = leaf -> start ;
9983
+
9984
+ /*
9985
+ * Don't need the path anymore, release to avoid deadlocks when
9986
+ * calling btrfs_is_data_extent_shared() because when joining a
9987
+ * transaction it can block waiting for the current one's commit
9988
+ * which in turn may be trying to lock the same leaf to flush
9989
+ * delayed items for example.
9990
+ */
9991
+ btrfs_release_path (path );
9992
+
9993
+ ret = btrfs_is_data_extent_shared (BTRFS_I (inode ), disk_bytenr ,
9994
+ extent_gen , backref_ctx );
9933
9995
if (ret < 0 ) {
9934
9996
goto out ;
9935
- } else if (ret ) {
9936
- ret = 0 ;
9937
- } else {
9997
+ } else if (ret > 0 ) {
9938
9998
btrfs_warn (fs_info ,
9939
9999
"swapfile must not be copy-on-write" );
9940
10000
ret = - EINVAL ;
@@ -9969,7 +10029,6 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
9969
10029
9970
10030
physical_block_start = (map -> stripes [0 ].physical +
9971
10031
(logical_block_start - map -> start ));
9972
- len = min (len , map -> chunk_len - (logical_block_start - map -> start ));
9973
10032
btrfs_free_chunk_map (map );
9974
10033
map = NULL ;
9975
10034
@@ -10010,20 +10069,23 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10010
10069
if (ret )
10011
10070
goto out ;
10012
10071
}
10013
- bsi .start = start ;
10072
+ bsi .start = key . offset ;
10014
10073
bsi .block_start = physical_block_start ;
10015
10074
bsi .block_len = len ;
10016
10075
}
10017
10076
10018
- start += len ;
10077
+ if (fatal_signal_pending (current )) {
10078
+ ret = - EINTR ;
10079
+ goto out ;
10080
+ }
10081
+
10082
+ cond_resched ();
10019
10083
}
10020
10084
10021
10085
if (bsi .block_len )
10022
10086
ret = btrfs_add_swap_extent (sis , & bsi );
10023
10087
10024
10088
out :
10025
- if (!IS_ERR_OR_NULL (em ))
10026
- free_extent_map (em );
10027
10089
if (!IS_ERR_OR_NULL (map ))
10028
10090
btrfs_free_chunk_map (map );
10029
10091
@@ -10036,6 +10098,10 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
10036
10098
10037
10099
btrfs_exclop_finish (fs_info );
10038
10100
10101
+ out_unlock_mmap :
10102
+ up_write (& BTRFS_I (inode )-> i_mmap_lock );
10103
+ btrfs_free_backref_share_ctx (backref_ctx );
10104
+ btrfs_free_path (path );
10039
10105
if (ret )
10040
10106
return ret ;
10041
10107
0 commit comments