Skip to content

Commit 524446e

Browse files
committed
Merge tag 'iomap-5.17-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull iomap fix from Darrick Wong: "A single bugfix for iomap. The fix should eliminate occasional complaints about stall warnings when a lot of writeback IO completes all at once and we have to then go clearing status on a large number of folios. Summary: - Limit the length of ioend chains in writeback so that we don't trip the softlockup watchdog and to limit long tail latency on clearing PageWriteback" * tag 'iomap-5.17-fixes-1' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs, iomap: limit individual ioend chain lengths in writeback
2 parents 0457e51 + ebb7fb1 commit 524446e

File tree

3 files changed

+65
-5
lines changed

3 files changed

+65
-5
lines changed

fs/iomap/buffered-io.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121

2222
#include "../internal.h"
2323

24+
#define IOEND_BATCH_SIZE 4096
25+
2426
/*
2527
* Structure allocated for each folio when block size < folio size
2628
* to track sub-folio uptodate status and I/O completions.
@@ -1039,7 +1041,7 @@ static void iomap_finish_folio_write(struct inode *inode, struct folio *folio,
10391041
* state, release holds on bios, and finally free up memory. Do not use the
10401042
* ioend after this.
10411043
*/
1042-
static void
1044+
static u32
10431045
iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10441046
{
10451047
struct inode *inode = ioend->io_inode;
@@ -1048,6 +1050,7 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10481050
u64 start = bio->bi_iter.bi_sector;
10491051
loff_t offset = ioend->io_offset;
10501052
bool quiet = bio_flagged(bio, BIO_QUIET);
1053+
u32 folio_count = 0;
10511054

10521055
for (bio = &ioend->io_inline_bio; bio; bio = next) {
10531056
struct folio_iter fi;
@@ -1062,9 +1065,11 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10621065
next = bio->bi_private;
10631066

10641067
/* walk all folios in bio, ending page IO on them */
1065-
bio_for_each_folio_all(fi, bio)
1068+
bio_for_each_folio_all(fi, bio) {
10661069
iomap_finish_folio_write(inode, fi.folio, fi.length,
10671070
error);
1071+
folio_count++;
1072+
}
10681073
bio_put(bio);
10691074
}
10701075
/* The ioend has been freed by bio_put() */
@@ -1074,20 +1079,36 @@ iomap_finish_ioend(struct iomap_ioend *ioend, int error)
10741079
"%s: writeback error on inode %lu, offset %lld, sector %llu",
10751080
inode->i_sb->s_id, inode->i_ino, offset, start);
10761081
}
1082+
return folio_count;
10771083
}
10781084

1085+
/*
1086+
* Ioend completion routine for merged bios. This can only be called from task
1087+
* contexts as merged ioends can be of unbound length. Hence we have to break up
1088+
* the writeback completions into manageable chunks to avoid long scheduler
1089+
* holdoffs. We aim to keep scheduler holdoffs down below 10ms so that we get
1090+
* good batch processing throughput without creating adverse scheduler latency
1091+
* conditions.
1092+
*/
10791093
void
10801094
iomap_finish_ioends(struct iomap_ioend *ioend, int error)
10811095
{
10821096
struct list_head tmp;
1097+
u32 completions;
1098+
1099+
might_sleep();
10831100

10841101
list_replace_init(&ioend->io_list, &tmp);
1085-
iomap_finish_ioend(ioend, error);
1102+
completions = iomap_finish_ioend(ioend, error);
10861103

10871104
while (!list_empty(&tmp)) {
1105+
if (completions > IOEND_BATCH_SIZE * 8) {
1106+
cond_resched();
1107+
completions = 0;
1108+
}
10881109
ioend = list_first_entry(&tmp, struct iomap_ioend, io_list);
10891110
list_del_init(&ioend->io_list);
1090-
iomap_finish_ioend(ioend, error);
1111+
completions += iomap_finish_ioend(ioend, error);
10911112
}
10921113
}
10931114
EXPORT_SYMBOL_GPL(iomap_finish_ioends);
@@ -1108,6 +1129,18 @@ iomap_ioend_can_merge(struct iomap_ioend *ioend, struct iomap_ioend *next)
11081129
return false;
11091130
if (ioend->io_offset + ioend->io_size != next->io_offset)
11101131
return false;
1132+
/*
1133+
* Do not merge physically discontiguous ioends. The filesystem
1134+
* completion functions will have to iterate the physical
1135+
* discontiguities even if we merge the ioends at a logical level, so
1136+
* we don't gain anything by merging physical discontiguities here.
1137+
*
1138+
* We cannot use bio->bi_iter.bi_sector here as it is modified during
1139+
* submission so does not point to the start sector of the bio at
1140+
* completion.
1141+
*/
1142+
if (ioend->io_sector + (ioend->io_size >> 9) != next->io_sector)
1143+
return false;
11111144
return true;
11121145
}
11131146

@@ -1209,8 +1242,10 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
12091242
ioend->io_flags = wpc->iomap.flags;
12101243
ioend->io_inode = inode;
12111244
ioend->io_size = 0;
1245+
ioend->io_folios = 0;
12121246
ioend->io_offset = offset;
12131247
ioend->io_bio = bio;
1248+
ioend->io_sector = sector;
12141249
return ioend;
12151250
}
12161251

@@ -1251,6 +1286,13 @@ iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t offset,
12511286
return false;
12521287
if (sector != bio_end_sector(wpc->ioend->io_bio))
12531288
return false;
1289+
/*
1290+
* Limit ioend bio chain lengths to minimise IO completion latency. This
1291+
* also prevents long tight loops ending page writeback on all the
1292+
* folios in the ioend.
1293+
*/
1294+
if (wpc->ioend->io_folios >= IOEND_BATCH_SIZE)
1295+
return false;
12541296
return true;
12551297
}
12561298

@@ -1335,6 +1377,8 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
13351377
&submit_list);
13361378
count++;
13371379
}
1380+
if (count)
1381+
wpc->ioend->io_folios++;
13381382

13391383
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
13401384
WARN_ON_ONCE(!folio_test_locked(folio));

fs/xfs/xfs_aops.c

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,20 @@ xfs_end_ioend(
136136
memalloc_nofs_restore(nofs_flag);
137137
}
138138

139-
/* Finish all pending io completions. */
139+
/*
140+
* Finish all pending IO completions that require transactional modifications.
141+
*
142+
* We try to merge physical and logically contiguous ioends before completion to
143+
* minimise the number of transactions we need to perform during IO completion.
144+
* Both unwritten extent conversion and COW remapping need to iterate and modify
145+
* one physical extent at a time, so we gain nothing by merging physically
146+
* discontiguous extents here.
147+
*
148+
* The ioend chain length that we can be processing here is largely unbound in
149+
* length and we may have to perform significant amounts of work on each ioend
150+
* to complete it. Hence we have to be careful about holding the CPU for too
151+
* long in this loop.
152+
*/
140153
void
141154
xfs_end_io(
142155
struct work_struct *work)
@@ -157,6 +170,7 @@ xfs_end_io(
157170
list_del_init(&ioend->io_list);
158171
iomap_ioend_try_merge(ioend, &tmp);
159172
xfs_end_ioend(ioend);
173+
cond_resched();
160174
}
161175
}
162176

include/linux/iomap.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -263,9 +263,11 @@ struct iomap_ioend {
263263
struct list_head io_list; /* next ioend in chain */
264264
u16 io_type;
265265
u16 io_flags; /* IOMAP_F_* */
266+
u32 io_folios; /* folios added to ioend */
266267
struct inode *io_inode; /* file being written to */
267268
size_t io_size; /* size of the extent */
268269
loff_t io_offset; /* offset in the file */
270+
sector_t io_sector; /* start sector of ioend */
269271
struct bio *io_bio; /* bio being built */
270272
struct bio io_inline_bio; /* MUST BE LAST! */
271273
};

0 commit comments

Comments
 (0)