Skip to content

Commit 30deff8

Browse files
Christoph Hellwigbrauner
authored andcommitted
iomap: map multiple blocks at a time
The ->map_blocks interface returns a valid range for writeback, but we still call back into it for every block, which is a bit inefficient. Change iomap_writepage_map to use the valid range in the map until the end of the folio or the dirty range inside the folio instead of calling back into every block. Note that the range is not used over folio boundaries as we need to be able to check the mapping sequence count under the folio lock. Signed-off-by: Christoph Hellwig <hch@lst.de> Link: https://lore.kernel.org/r/20231207072710.176093-14-hch@lst.de Signed-off-by: Christian Brauner <brauner@kernel.org>
1 parent 410bb2c commit 30deff8

File tree

2 files changed

+88
-35
lines changed

2 files changed

+88
-35
lines changed

fs/iomap/buffered-io.c

Lines changed: 81 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
// SPDX-License-Identifier: GPL-2.0
22
/*
33
* Copyright (C) 2010 Red Hat, Inc.
4-
* Copyright (C) 2016-2019 Christoph Hellwig.
4+
* Copyright (C) 2016-2023 Christoph Hellwig.
55
*/
66
#include <linux/module.h>
77
#include <linux/compiler.h>
@@ -95,6 +95,44 @@ static inline bool ifs_block_is_dirty(struct folio *folio,
9595
return test_bit(block + blks_per_folio, ifs->state);
9696
}
9797

98+
static unsigned ifs_find_dirty_range(struct folio *folio,
99+
struct iomap_folio_state *ifs, u64 *range_start, u64 range_end)
100+
{
101+
struct inode *inode = folio->mapping->host;
102+
unsigned start_blk =
103+
offset_in_folio(folio, *range_start) >> inode->i_blkbits;
104+
unsigned end_blk = min_not_zero(
105+
offset_in_folio(folio, range_end) >> inode->i_blkbits,
106+
i_blocks_per_folio(inode, folio));
107+
unsigned nblks = 1;
108+
109+
while (!ifs_block_is_dirty(folio, ifs, start_blk))
110+
if (++start_blk == end_blk)
111+
return 0;
112+
113+
while (start_blk + nblks < end_blk) {
114+
if (!ifs_block_is_dirty(folio, ifs, start_blk + nblks))
115+
break;
116+
nblks++;
117+
}
118+
119+
*range_start = folio_pos(folio) + (start_blk << inode->i_blkbits);
120+
return nblks << inode->i_blkbits;
121+
}
122+
123+
static unsigned iomap_find_dirty_range(struct folio *folio, u64 *range_start,
124+
u64 range_end)
125+
{
126+
struct iomap_folio_state *ifs = folio->private;
127+
128+
if (*range_start >= range_end)
129+
return 0;
130+
131+
if (ifs)
132+
return ifs_find_dirty_range(folio, ifs, range_start, range_end);
133+
return range_end - *range_start;
134+
}
135+
98136
static void ifs_clear_range_dirty(struct folio *folio,
99137
struct iomap_folio_state *ifs, size_t off, size_t len)
100138
{
@@ -1701,10 +1739,9 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
17011739
*/
17021740
static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17031741
struct writeback_control *wbc, struct folio *folio,
1704-
struct inode *inode, loff_t pos)
1742+
struct inode *inode, loff_t pos, unsigned len)
17051743
{
17061744
struct iomap_folio_state *ifs = folio->private;
1707-
unsigned len = i_blocksize(inode);
17081745
size_t poff = offset_in_folio(folio, pos);
17091746
int error;
17101747

@@ -1728,29 +1765,41 @@ static int iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
17281765

17291766
static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
17301767
struct writeback_control *wbc, struct folio *folio,
1731-
struct inode *inode, u64 pos, unsigned *count)
1768+
struct inode *inode, u64 pos, unsigned dirty_len,
1769+
unsigned *count)
17321770
{
17331771
int error;
17341772

1735-
error = wpc->ops->map_blocks(wpc, inode, pos);
1736-
if (error)
1737-
goto fail;
1738-
trace_iomap_writepage_map(inode, &wpc->iomap);
1739-
1740-
switch (wpc->iomap.type) {
1741-
case IOMAP_INLINE:
1742-
WARN_ON_ONCE(1);
1743-
error = -EIO;
1744-
break;
1745-
case IOMAP_HOLE:
1746-
break;
1747-
default:
1748-
error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos);
1749-
if (!error)
1750-
(*count)++;
1751-
}
1773+
do {
1774+
unsigned map_len;
1775+
1776+
error = wpc->ops->map_blocks(wpc, inode, pos);
1777+
if (error)
1778+
break;
1779+
trace_iomap_writepage_map(inode, &wpc->iomap);
1780+
1781+
map_len = min_t(u64, dirty_len,
1782+
wpc->iomap.offset + wpc->iomap.length - pos);
1783+
WARN_ON_ONCE(!folio->private && map_len < dirty_len);
1784+
1785+
switch (wpc->iomap.type) {
1786+
case IOMAP_INLINE:
1787+
WARN_ON_ONCE(1);
1788+
error = -EIO;
1789+
break;
1790+
case IOMAP_HOLE:
1791+
break;
1792+
default:
1793+
error = iomap_add_to_ioend(wpc, wbc, folio, inode, pos,
1794+
map_len);
1795+
if (!error)
1796+
(*count)++;
1797+
break;
1798+
}
1799+
dirty_len -= map_len;
1800+
pos += map_len;
1801+
} while (dirty_len && !error);
17521802

1753-
fail:
17541803
/*
17551804
* We cannot cancel the ioend directly here on error. We may have
17561805
* already set other pages under writeback and hence we have to run I/O
@@ -1817,7 +1866,7 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
18171866
* beyond i_size.
18181867
*/
18191868
folio_zero_segment(folio, poff, folio_size(folio));
1820-
*end_pos = isize;
1869+
*end_pos = round_up(isize, i_blocksize(inode));
18211870
}
18221871

18231872
return true;
@@ -1828,12 +1877,11 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18281877
{
18291878
struct iomap_folio_state *ifs = folio->private;
18301879
struct inode *inode = folio->mapping->host;
1831-
unsigned len = i_blocksize(inode);
1832-
unsigned nblocks = i_blocks_per_folio(inode, folio);
18331880
u64 pos = folio_pos(folio);
18341881
u64 end_pos = pos + folio_size(folio);
18351882
unsigned count = 0;
1836-
int error = 0, i;
1883+
int error = 0;
1884+
u32 rlen;
18371885

18381886
WARN_ON_ONCE(!folio_test_locked(folio));
18391887
WARN_ON_ONCE(folio_test_dirty(folio));
@@ -1847,7 +1895,7 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18471895
}
18481896
WARN_ON_ONCE(end_pos <= pos);
18491897

1850-
if (nblocks > 1) {
1898+
if (i_blocks_per_folio(inode, folio) > 1) {
18511899
if (!ifs) {
18521900
ifs = ifs_alloc(inode, folio, 0);
18531901
iomap_set_range_dirty(folio, 0, end_pos - pos);
@@ -1870,18 +1918,16 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
18701918
folio_start_writeback(folio);
18711919

18721920
/*
1873-
* Walk through the folio to find areas to write back. If we
1874-
* run off the end of the current map or find the current map
1875-
* invalid, grab a new one.
1921+
* Walk through the folio to find dirty areas to write back.
18761922
*/
1877-
for (i = 0; i < nblocks && pos < end_pos; i++, pos += len) {
1878-
if (ifs && !ifs_block_is_dirty(folio, ifs, i))
1879-
continue;
1880-
error = iomap_writepage_map_blocks(wpc, wbc, folio, inode, pos,
1881-
&count);
1923+
while ((rlen = iomap_find_dirty_range(folio, &pos, end_pos))) {
1924+
error = iomap_writepage_map_blocks(wpc, wbc, folio, inode,
1925+
pos, rlen, &count);
18821926
if (error)
18831927
break;
1928+
pos += rlen;
18841929
}
1930+
18851931
if (count)
18861932
wpc->nr_folios++;
18871933

include/linux/iomap.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,13 @@ struct iomap_writeback_ops {
309309
/*
310310
* Required, maps the blocks so that writeback can be performed on
311311
* the range starting at offset.
312+
*
313+
* Can return arbitrarily large regions, but we need to call into it at
314+
* least once per folio to allow the file systems to synchronize with
315+
* the write path that could be invalidating mappings.
316+
*
317+
* An existing mapping from a previous call to this method can be reused
318+
* by the file system if it is still valid.
312319
*/
313320
int (*map_blocks)(struct iomap_writepage_ctx *wpc, struct inode *inode,
314321
loff_t offset);

0 commit comments

Comments
 (0)