@@ -802,8 +802,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
802
802
static int __bch2_buffered_write (struct bch_inode_info * inode ,
803
803
struct address_space * mapping ,
804
804
struct iov_iter * iter ,
805
- loff_t pos , unsigned len ,
806
- bool inode_locked )
805
+ loff_t pos , unsigned len )
807
806
{
808
807
struct bch_fs * c = inode -> v .i_sb -> s_fs_info ;
809
808
struct bch2_folio_reservation res ;
@@ -827,15 +826,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
827
826
828
827
BUG_ON (!fs .nr );
829
828
830
- /*
831
- * If we're not using the inode lock, we need to lock all the folios for
832
- * atomiticity of writes vs. other writes:
833
- */
834
- if (!inode_locked && folio_end_pos (darray_last (fs )) < end ) {
835
- ret = - BCH_ERR_need_inode_lock ;
836
- goto out ;
837
- }
838
-
839
829
f = darray_first (fs );
840
830
if (pos != folio_pos (f ) && !folio_test_uptodate (f )) {
841
831
ret = bch2_read_single_folio (f , mapping );
@@ -932,10 +922,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
932
922
end = pos + copied ;
933
923
934
924
spin_lock (& inode -> v .i_lock );
935
- if (end > inode -> v .i_size ) {
936
- BUG_ON (!inode_locked );
925
+ if (end > inode -> v .i_size )
937
926
i_size_write (& inode -> v , end );
938
- }
939
927
spin_unlock (& inode -> v .i_lock );
940
928
941
929
f_pos = pos ;
@@ -979,68 +967,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
979
967
struct file * file = iocb -> ki_filp ;
980
968
struct address_space * mapping = file -> f_mapping ;
981
969
struct bch_inode_info * inode = file_bch_inode (file );
982
- loff_t pos ;
983
- bool inode_locked = false;
984
- ssize_t written = 0 , written2 = 0 , ret = 0 ;
985
-
986
- /*
987
- * We don't take the inode lock unless i_size will be changing. Folio
988
- * locks provide exclusion with other writes, and the pagecache add lock
989
- * provides exclusion with truncate and hole punching.
990
- *
991
- * There is one nasty corner case where atomicity would be broken
992
- * without great care: when copying data from userspace to the page
993
- * cache, we do that with faults disable - a page fault would recurse
994
- * back into the filesystem, taking filesystem locks again, and
995
- * deadlock; so it's done with faults disabled, and we fault in the user
996
- * buffer when we aren't holding locks.
997
- *
998
- * If we do part of the write, but we then race and in the userspace
999
- * buffer have been evicted and are no longer resident, then we have to
1000
- * drop our folio locks to re-fault them in, breaking write atomicity.
1001
- *
1002
- * To fix this, we restart the write from the start, if we weren't
1003
- * holding the inode lock.
1004
- *
1005
- * There is another wrinkle after that; if we restart the write from the
1006
- * start, and then get an unrecoverable error, we _cannot_ claim to
1007
- * userspace that we did not write data we actually did - so we must
1008
- * track (written2) the most we ever wrote.
1009
- */
1010
-
1011
- if ((iocb -> ki_flags & IOCB_APPEND ) ||
1012
- (iocb -> ki_pos + iov_iter_count (iter ) > i_size_read (& inode -> v ))) {
1013
- inode_lock (& inode -> v );
1014
- inode_locked = true;
1015
- }
1016
-
1017
- ret = generic_write_checks (iocb , iter );
1018
- if (ret <= 0 )
1019
- goto unlock ;
1020
-
1021
- ret = file_remove_privs_flags (file , !inode_locked ? IOCB_NOWAIT : 0 );
1022
- if (ret ) {
1023
- if (!inode_locked ) {
1024
- inode_lock (& inode -> v );
1025
- inode_locked = true;
1026
- ret = file_remove_privs_flags (file , 0 );
1027
- }
1028
- if (ret )
1029
- goto unlock ;
1030
- }
1031
-
1032
- ret = file_update_time (file );
1033
- if (ret )
1034
- goto unlock ;
1035
-
1036
- pos = iocb -> ki_pos ;
970
+ loff_t pos = iocb -> ki_pos ;
971
+ ssize_t written = 0 ;
972
+ int ret = 0 ;
1037
973
1038
974
bch2_pagecache_add_get (inode );
1039
975
1040
- if (!inode_locked &&
1041
- (iocb -> ki_pos + iov_iter_count (iter ) > i_size_read (& inode -> v )))
1042
- goto get_inode_lock ;
1043
-
1044
976
do {
1045
977
unsigned offset = pos & (PAGE_SIZE - 1 );
1046
978
unsigned bytes = iov_iter_count (iter );
@@ -1065,17 +997,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
1065
997
}
1066
998
}
1067
999
1068
- if (unlikely (bytes != iov_iter_count (iter ) && !inode_locked ))
1069
- goto get_inode_lock ;
1070
-
1071
1000
if (unlikely (fatal_signal_pending (current ))) {
1072
1001
ret = - EINTR ;
1073
1002
break ;
1074
1003
}
1075
1004
1076
- ret = __bch2_buffered_write (inode , mapping , iter , pos , bytes , inode_locked );
1077
- if (ret == - BCH_ERR_need_inode_lock )
1078
- goto get_inode_lock ;
1005
+ ret = __bch2_buffered_write (inode , mapping , iter , pos , bytes );
1079
1006
if (unlikely (ret < 0 ))
1080
1007
break ;
1081
1008
@@ -1096,46 +1023,50 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
1096
1023
}
1097
1024
pos += ret ;
1098
1025
written += ret ;
1099
- written2 = max (written , written2 );
1100
-
1101
- if (ret != bytes && !inode_locked )
1102
- goto get_inode_lock ;
1103
1026
ret = 0 ;
1104
1027
1105
1028
balance_dirty_pages_ratelimited (mapping );
1106
-
1107
- if (0 ) {
1108
- get_inode_lock :
1109
- bch2_pagecache_add_put (inode );
1110
- inode_lock (& inode -> v );
1111
- inode_locked = true;
1112
- bch2_pagecache_add_get (inode );
1113
-
1114
- iov_iter_revert (iter , written );
1115
- pos -= written ;
1116
- written = 0 ;
1117
- ret = 0 ;
1118
- }
1119
1029
} while (iov_iter_count (iter ));
1120
- bch2_pagecache_add_put (inode );
1121
- unlock :
1122
- if (inode_locked )
1123
- inode_unlock (& inode -> v );
1124
1030
1125
- iocb -> ki_pos += written ;
1031
+ bch2_pagecache_add_put ( inode ) ;
1126
1032
1127
- ret = max (written , written2 ) ?: ret ;
1128
- if (ret > 0 )
1129
- ret = generic_write_sync (iocb , ret );
1130
- return ret ;
1033
+ return written ? written : ret ;
1131
1034
}
1132
1035
1133
- ssize_t bch2_write_iter (struct kiocb * iocb , struct iov_iter * iter )
1036
+ ssize_t bch2_write_iter (struct kiocb * iocb , struct iov_iter * from )
1134
1037
{
1135
- ssize_t ret = iocb -> ki_flags & IOCB_DIRECT
1136
- ? bch2_direct_write (iocb , iter )
1137
- : bch2_buffered_write (iocb , iter );
1038
+ struct file * file = iocb -> ki_filp ;
1039
+ struct bch_inode_info * inode = file_bch_inode (file );
1040
+ ssize_t ret ;
1041
+
1042
+ if (iocb -> ki_flags & IOCB_DIRECT ) {
1043
+ ret = bch2_direct_write (iocb , from );
1044
+ goto out ;
1045
+ }
1046
+
1047
+ inode_lock (& inode -> v );
1048
+
1049
+ ret = generic_write_checks (iocb , from );
1050
+ if (ret <= 0 )
1051
+ goto unlock ;
1052
+
1053
+ ret = file_remove_privs (file );
1054
+ if (ret )
1055
+ goto unlock ;
1056
+
1057
+ ret = file_update_time (file );
1058
+ if (ret )
1059
+ goto unlock ;
1060
+
1061
+ ret = bch2_buffered_write (iocb , from );
1062
+ if (likely (ret > 0 ))
1063
+ iocb -> ki_pos += ret ;
1064
+ unlock :
1065
+ inode_unlock (& inode -> v );
1138
1066
1067
+ if (ret > 0 )
1068
+ ret = generic_write_sync (iocb , ret );
1069
+ out :
1139
1070
return bch2_err_class (ret );
1140
1071
}
1141
1072
0 commit comments