21
21
22
22
#include "../internal.h"
23
23
24
+ #define IOEND_BATCH_SIZE 4096
25
+
24
26
/*
25
27
* Structure allocated for each folio when block size < folio size
26
28
* to track sub-folio uptodate status and I/O completions.
@@ -1039,7 +1041,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
1039
1041
* state, release holds on bios, and finally free up memory. Do not use the
1040
1042
* ioend after this.
1041
1043
*/
1042
- static void
1044
+ static u32
1043
1045
iomap_finish_ioend (struct iomap_ioend * ioend , int error )
1044
1046
{
1045
1047
struct inode * inode = ioend -> io_inode ;
@@ -1048,6 +1050,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
1048
1050
u64 start = bio -> bi_iter .bi_sector ;
1049
1051
loff_t offset = ioend -> io_offset ;
1050
1052
bool quiet = bio_flagged (bio , BIO_QUIET );
1053
+ u32 folio_count = 0 ;
1051
1054
1052
1055
for (bio = & ioend -> io_inline_bio ; bio ; bio = next ) {
1053
1056
struct folio_iter fi ;
@@ -1062,9 +1065,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
1062
1065
next = bio -> bi_private ;
1063
1066
1064
1067
/* walk all folios in bio, ending page IO on them */
1065
- bio_for_each_folio_all (fi , bio )
1068
+ bio_for_each_folio_all (fi , bio ) {
1066
1069
iomap_finish_folio_write (inode , fi .folio , fi .length ,
1067
1070
error );
1071
+ folio_count ++ ;
1072
+ }
1068
1073
bio_put (bio );
1069
1074
}
1070
1075
/* The ioend has been freed by bio_put() */
@@ -1074,20 +1079,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
1074
1079
"%s: writeback error on inode %lu, offset %lld, sector %llu" ,
1075
1080
inode -> i_sb -> s_id , inode -> i_ino , offset , start );
1076
1081
}
1082
+ return folio_count ;
1077
1083
}
1078
1084
1085
+ /*
1086
+ * Ioend completion routine for merged bios. This can only be called from task
1087
+ * contexts as merged ioends can be of unbound length. Hence we have to break up
1088
+ * the writeback completions into manageable chunks to avoid long scheduler
1089
+ * holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
1090
+ * good batch processing throughput without creating adverse scheduler latency
1091
+ * conditions.
1092
+ */
1079
1093
void
1080
1094
iomap_finish_ioends (struct iomap_ioend * ioend , int error )
1081
1095
{
1082
1096
struct list_head tmp ;
1097
+ u32 completions ;
1098
+
1099
+ might_sleep ();
1083
1100
1084
1101
list_replace_init (& ioend -> io_list , & tmp );
1085
- iomap_finish_ioend (ioend , error );
1102
+ completions = iomap_finish_ioend (ioend , error );
1086
1103
1087
1104
while (!list_empty (& tmp )) {
1105
+ if (completions > IOEND_BATCH_SIZE * 8 ) {
1106
+ cond_resched ();
1107
+ completions = 0 ;
1108
+ }
1088
1109
ioend = list_first_entry (& tmp , struct iomap_ioend , io_list );
1089
1110
list_del_init (& ioend -> io_list );
1090
- iomap_finish_ioend (ioend , error );
1111
+ completions += iomap_finish_ioend (ioend , error );
1091
1112
}
1092
1113
}
1093
1114
EXPORT_SYMBOL_GPL (iomap_finish_ioends );
@@ -1108,6 +1129,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
1108
1129
return false;
1109
1130
if (ioend -> io_offset + ioend -> io_size != next -> io_offset )
1110
1131
return false;
1132
+ /*
1133
+ * Do not merge physically discontiguous ioends. The filesystem
1134
+ * completion functions will have to iterate the physical
1135
+ * discontiguities even if we merge the ioends at a logical level, so
1136
+ * we don't gain anything by merging physical discontiguities here.
1137
+ *
1138
+ * We cannot use bio->bi_iter.bi_sector here as it is modified during
1139
+ * submission so does not point to the start sector of the bio at
1140
+ * completion.
1141
+ */
1142
+ if (ioend -> io_sector + (ioend -> io_size >> 9 ) != next -> io_sector )
1143
+ return false;
1111
1144
return true;
1112
1145
}
1113
1146
@@ -1209,8 +1242,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
1209
1242
ioend -> io_flags = wpc -> iomap .flags ;
1210
1243
ioend -> io_inode = inode ;
1211
1244
ioend -> io_size = 0 ;
1245
+ ioend -> io_folios = 0 ;
1212
1246
ioend -> io_offset = offset ;
1213
1247
ioend -> io_bio = bio ;
1248
+ ioend -> io_sector = sector ;
1214
1249
return ioend ;
1215
1250
}
1216
1251
@@ -1251,6 +1286,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
1251
1286
return false;
1252
1287
if (sector != bio_end_sector (wpc -> ioend -> io_bio ))
1253
1288
return false;
1289
+ /*
1290
+ * Limit ioend bio chain lengths to minimise IO completion latency. This
1291
+ * also prevents long tight loops ending page writeback on all the
1292
+ * folios in the ioend.
1293
+ */
1294
+ if (wpc -> ioend -> io_folios >= IOEND_BATCH_SIZE )
1295
+ return false;
1254
1296
return true;
1255
1297
}
1256
1298
@@ -1335,6 +1377,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1335
1377
& submit_list );
1336
1378
count ++ ;
1337
1379
}
1380
+ if (count )
1381
+ wpc -> ioend -> io_folios ++ ;
1338
1382
1339
1383
WARN_ON_ONCE (!wpc -> ioend && !list_empty (& submit_list ));
1340
1384
WARN_ON_ONCE (!folio_test_locked (folio ));
0 commit comments