Skip to content

Commit 410bb2c

Browse files
Christoph Hellwigbrauner
authored andcommitted
iomap: submit ioends immediately
Currently the writeback code delays submitting fill ioends until we reach the end of the folio. The reason for that is that otherwise the end I/O handler could clear the writeback bit before we've even finished submitting all I/O for the folio. Add a bias to ifs->write_bytes_pending while we are submitting I/O for a folio so that it never reaches zero until all I/O is completed to prevent the early writeback bit clearing, and remove the now superfluous submit_list. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20231207072710.176093-13-hch@lst.de Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent f525152 commit 410bb2c

File tree

1 file changed

+76
-86
lines changed

1 file changed

+76
-86
lines changed

fs/iomap/buffered-io.c

Lines changed: 76 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -1610,30 +1610,34 @@ static void iomap_writepage_end_bio(struct bio *bio)
16101610
* Submit the final bio for an ioend.
16111611
*
16121612
* If @error is non-zero, it means that we have a situation where some part of
1613-
* the submission process has failed after we've marked pages for writeback
1614-
* and unlocked them. In this situation, we need to fail the bio instead of
1615-
* submitting it. This typically only happens on a filesystem shutdown.
1613+
* the submission process has failed after we've marked pages for writeback.
1614+
* We cannot cancel ioend directly in that case, so call the bio end I/O handler
1615+
* with the error status here to run the normal I/O completion handler to clear
1616+
* the writeback bit and let the file system proess the errors.
16161617
*/
1617-
static int
1618-
iomap_submit_ioend(struct iomap_writepage_ctx *wpc, struct iomap_ioend *ioend,
1619-
int error)
1618+
static int iomap_submit_ioend(struct iomap_writepage_ctx *wpc, int error)
16201619
{
1620+
if (!wpc->ioend)
1621+
return error;
1622+
1623+
/*
1624+
* Let the file systems prepare the I/O submission and hook in an I/O
1625+
* comletion handler. This also needs to happen in case after a
1626+
* failure happened so that the file system end I/O handler gets called
1627+
* to clean up.
1628+
*/
16211629
if (wpc->ops->prepare_ioend)
1622-
error = wpc->ops->prepare_ioend(ioend, error);
1630+
error = wpc->ops->prepare_ioend(wpc->ioend, error);
1631+
16231632
if (error) {
1624-
/*
1625-
* If we're failing the IO now, just mark the ioend with an
1626-
* error and finish it. This will run IO completion immediately
1627-
* as there is only one reference to the ioend at this point in
1628-
* time.
1629-
*/
1630-
ioend->io_bio.bi_status = errno_to_blk_status(error);
1631-
bio_endio(&ioend->io_bio);
1632-
return error;
1633+
wpc->ioend->io_bio.bi_status = errno_to_blk_status(error);
1634+
bio_endio(&wpc->ioend->io_bio);
1635+
} else {
1636+
submit_bio(&wpc->ioend->io_bio);
16331637
}
16341638

1635-
submit_bio(&ioend->io_bio);
1636-
return 0;
1639+
wpc->ioend = NULL;
1640+
return error;
16371641
}
16381642

16391643
static struct iomap_ioend *iomap_alloc_ioend(struct iomap_writepage_ctx *wpc,
@@ -1687,19 +1691,28 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
16871691
/*
16881692
* Test to see if we have an existing ioend structure that we could append to
16891693
* first; otherwise finish off the current ioend and start another.
1694+
*
1695+
* If a new ioend is created and cached, the old ioend is submitted to the block
1696+
* layer instantly. Batching optimisations are provided by higher level block
1697+
* plugging.
1698+
*
1699+
* At the end of a writeback pass, there will be a cached ioend remaining on the
1700+
* writepage context that the caller will need to submit.
16901701
*/
1691-
static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1702+
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
16921703
struct writeback_control *wbc, struct folio *folio,
1693-
struct inode *inode, loff_t pos, struct list_head *iolist)
1704+
struct inode *inode, loff_t pos)
16941705
{
16951706
struct iomap_folio_state *ifs = folio->private;
16961707
unsigned len = i_blocksize(inode);
16971708
size_t poff = offset_in_folio(folio, pos);
1709+
int error;
16981710

16991711
if (!wpc->ioend || !iomap_can_add_to_ioend(wpc, pos)) {
17001712
new_ioend:
1701-
if (wpc->ioend)
1702-
list_add(&wpc->ioend->io_list, iolist);
1713+
error = iomap_submit_ioend(wpc, 0);
1714+
if (error)
1715+
return error;
17031716
wpc->ioend = iomap_alloc_ioend(wpc, wbc, inode, pos);
17041717
}
17051718

@@ -1710,12 +1723,12 @@ static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17101723
atomic_add(len, &ifs->write_bytes_pending);
17111724
wpc->ioend->io_size += len;
17121725
wbc_account_cgroup_owner(wbc, &folio->page, len);
1726+
return 0;
17131727
}
17141728

17151729
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
17161730
struct writeback_control *wbc, struct folio *folio,
1717-
struct inode *inode, u64 pos, unsigned *count,
1718-
struct list_head *submit_list)
1731+
struct inode *inode, u64 pos, unsigned *count)
17191732
{
17201733
int error;
17211734

@@ -1732,8 +1745,9 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
17321745
case IOMAP_HOLE:
17331746
break;
17341747
default:
1735-
iomap_add_to_ioend(wpc, wbc, folio, inode, pos, submit_list);
1736-
(*count)++;
1748+
error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos);
1749+
if (!error)
1750+
(*count)++;
17371751
}
17381752

17391753
fail:
@@ -1809,35 +1823,21 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
18091823
return true;
18101824
}
18111825

1812-
/*
1813-
* We implement an immediate ioend submission policy here to avoid needing to
1814-
* chain multiple ioends and hence nest mempool allocations which can violate
1815-
* the forward progress guarantees we need to provide. The current ioend we're
1816-
* adding blocks to is cached in the writepage context, and if the new block
1817-
* doesn't append to the cached ioend, it will create a new ioend and cache that
1818-
* instead.
1819-
*
1820-
* If a new ioend is created and cached, the old ioend is returned and queued
1821-
* locally for submission once the entire page is processed or an error has been
1822-
* detected. While ioends are submitted immediately after they are completed,
1823-
* batching optimisations are provided by higher level block plugging.
1824-
*
1825-
* At the end of a writeback pass, there will be a cached ioend remaining on the
1826-
* writepage context that the caller will need to submit.
1827-
*/
18281826
static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18291827
struct writeback_control *wbc, struct folio *folio)
18301828
{
18311829
struct iomap_folio_state *ifs = folio->private;
18321830
struct inode *inode = folio->mapping->host;
1833-
struct iomap_ioend *ioend, *next;
18341831
unsigned len = i_blocksize(inode);
18351832
unsigned nblocks = i_blocks_per_folio(inode, folio);
18361833
u64 pos = folio_pos(folio);
18371834
u64 end_pos = pos + folio_size(folio);
18381835
unsigned count = 0;
18391836
int error = 0, i;
1840-
LIST_HEAD(submit_list);
1837+
1838+
WARN_ON_ONCE(!folio_test_locked(folio));
1839+
WARN_ON_ONCE(folio_test_dirty(folio));
1840+
WARN_ON_ONCE(folio_test_writeback(folio));
18411841

18421842
trace_iomap_writepage(inode, pos, folio_size(folio));
18431843

@@ -1847,12 +1847,27 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18471847
}
18481848
WARN_ON_ONCE(end_pos <= pos);
18491849

1850-
if (!ifs && nblocks > 1) {
1851-
ifs = ifs_alloc(inode, folio, 0);
1852-
iomap_set_range_dirty(folio, 0, end_pos - pos);
1850+
if (nblocks > 1) {
1851+
if (!ifs) {
1852+
ifs = ifs_alloc(inode, folio, 0);
1853+
iomap_set_range_dirty(folio, 0, end_pos - pos);
1854+
}
1855+
1856+
/*
1857+
* Keep the I/O completion handler from clearing the writeback
1858+
* bit until we have submitted all blocks by adding a bias to
1859+
* ifs->write_bytes_pending, which is dropped after submitting
1860+
* all blocks.
1861+
*/
1862+
WARN_ON_ONCE(atomic_read(&ifs->write_bytes_pending) != 0);
1863+
atomic_inc(&ifs->write_bytes_pending);
18531864
}
18541865

1855-
WARN_ON_ONCE(ifs && atomic_read(&ifs->write_bytes_pending) != 0);
1866+
/*
1867+
* Set the writeback bit ASAP, as the I/O completion for the single
1868+
* block per folio case happen hit as soon as we're submitting the bio.
1869+
*/
1870+
folio_start_writeback(folio);
18561871

18571872
/*
18581873
* Walk through the folio to find areas to write back. If we
@@ -1863,18 +1878,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18631878
if (ifs && !ifs_block_is_dirty(folio, ifs, i))
18641879
continue;
18651880
error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, pos,
1866-
&count, &submit_list);
1881+
&count);
18671882
if (error)
18681883
break;
18691884
}
18701885
if (count)
18711886
wpc->nr_folios++;
18721887

1873-
WARN_ON_ONCE(!wpc->ioend && !list_empty(&submit_list));
1874-
WARN_ON_ONCE(!folio_test_locked(folio));
1875-
WARN_ON_ONCE(folio_test_writeback(folio));
1876-
WARN_ON_ONCE(folio_test_dirty(folio));
1877-
18781888
/*
18791889
* We can have dirty bits set past end of file in page_mkwrite path
18801890
* while mapping the last partial folio. Hence it's better to clear
@@ -1883,38 +1893,20 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18831893
iomap_clear_range_dirty(folio, 0, folio_size(folio));
18841894

18851895
/*
1886-
* If the page hasn't been added to the ioend, it won't be affected by
1887-
* I/O completion and we must unlock it now.
1896+
* Usually the writeback bit is cleared by the I/O completion handler.
1897+
* But we may end up either not actually writing any blocks, or (when
1898+
* there are multiple blocks in a folio) all I/O might have finished
1899+
* already at this point. In that case we need to clear the writeback
1900+
* bit ourselves right after unlocking the page.
18881901
*/
1889-
if (error && !count) {
1890-
folio_unlock(folio);
1891-
goto done;
1892-
}
1893-
1894-
folio_start_writeback(folio);
18951902
folio_unlock(folio);
1896-
1897-
/*
1898-
* Preserve the original error if there was one; catch
1899-
* submission errors here and propagate into subsequent ioend
1900-
* submissions.
1901-
*/
1902-
list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
1903-
int error2;
1904-
1905-
list_del_init(&ioend->io_list);
1906-
error2 = iomap_submit_ioend(wpc, ioend, error);
1907-
if (error2 && !error)
1908-
error = error2;
1903+
if (ifs) {
1904+
if (atomic_dec_and_test(&ifs->write_bytes_pending))
1905+
folio_end_writeback(folio);
1906+
} else {
1907+
if (!count)
1908+
folio_end_writeback(folio);
19091909
}
1910-
1911-
/*
1912-
* We can end up here with no error and nothing to write only if we race
1913-
* with a partial page truncate on a sub-page block sized filesystem.
1914-
*/
1915-
if (!count)
1916-
folio_end_writeback(folio);
1917-
done:
19181910
mapping_set_error(inode->i_mapping, error);
19191911
return error;
19201912
}
@@ -1942,9 +1934,7 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
19421934

19431935
wpc->ops = ops;
19441936
ret = write_cache_pages(mapping, wbc, iomap_do_writepage, wpc);
1945-
if (!wpc->ioend)
1946-
return ret;
1947-
return iomap_submit_ioend(wpc, wpc->ioend, ret);
1937+
return iomap_submit_ioend(wpc, ret);
19481938
}
19491939
EXPORT_SYMBOL_GPL(iomap_writepages);
19501940

0 commit comments

Comments
 (0)