@@ -1610,30 +1610,34 @@ static void iomap_writepage_end_bio(struct bio *bio)
1610
1610
* Submit the final bio for an ioend.
1611
1611
*
1612
1612
* If @error is non-zero, it means that we have a situation where some part of
1613
- * the submission process has failed after we've marked pages for writeback
1614
- * and unlocked them. In this situation, we need to fail the bio instead of
1615
- * submitting it. This typically only happens on a filesystem shutdown.
1613
+ * the submission process has failed after we've marked pages for writeback.
1614
+ * We cannot cancel ioend directly in that case, so call the bio end I/O handler
1615
+ * with the error status here to run the normal I/O completion handler to clear
1616
+ * the writeback bit and let the file system proess the errors.
1616
1617
*/
1617
- static int
1618
- iomap_submit_ioend (struct iomap_writepage_ctx * wpc , struct iomap_ioend * ioend ,
1619
- int error )
1618
+ static int iomap_submit_ioend (struct iomap_writepage_ctx * wpc , int error )
1620
1619
{
1620
+ if (!wpc -> ioend )
1621
+ return error ;
1622
+
1623
+ /*
1624
+ * Let the file systems prepare the I/O submission and hook in an I/O
1625
+ * comletion handler. This also needs to happen in case after a
1626
+ * failure happened so that the file system end I/O handler gets called
1627
+ * to clean up.
1628
+ */
1621
1629
if (wpc -> ops -> prepare_ioend )
1622
- error = wpc -> ops -> prepare_ioend (ioend , error );
1630
+ error = wpc -> ops -> prepare_ioend (wpc -> ioend , error );
1631
+
1623
1632
if (error ) {
1624
- /*
1625
- * If we're failing the IO now, just mark the ioend with an
1626
- * error and finish it. This will run IO completion immediately
1627
- * as there is only one reference to the ioend at this point in
1628
- * time.
1629
- */
1630
- ioend -> io_bio .bi_status = errno_to_blk_status (error );
1631
- bio_endio (& ioend -> io_bio );
1632
- return error ;
1633
+ wpc -> ioend -> io_bio .bi_status = errno_to_blk_status (error );
1634
+ bio_endio (& wpc -> ioend -> io_bio );
1635
+ } else {
1636
+ submit_bio (& wpc -> ioend -> io_bio );
1633
1637
}
1634
1638
1635
- submit_bio ( & ioend -> io_bio ) ;
1636
- return 0 ;
1639
+ wpc -> ioend = NULL ;
1640
+ return error ;
1637
1641
}
1638
1642
1639
1643
static struct iomap_ioend * iomap_alloc_ioend (struct iomap_writepage_ctx * wpc ,
@@ -1687,19 +1691,28 @@ static bool iomap_can_add_to_ioend(struct iomap_writepage_ctx *wpc, loff_t pos)
1687
1691
/*
1688
1692
* Test to see if we have an existing ioend structure that we could append to
1689
1693
* first; otherwise finish off the current ioend and start another.
1694
+ *
1695
+ * If a new ioend is created and cached, the old ioend is submitted to the block
1696
+ * layer instantly. Batching optimisations are provided by higher level block
1697
+ * plugging.
1698
+ *
1699
+ * At the end of a writeback pass, there will be a cached ioend remaining on the
1700
+ * writepage context that the caller will need to submit.
1690
1701
*/
1691
- static void iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
1702
+ static int iomap_add_to_ioend (struct iomap_writepage_ctx * wpc ,
1692
1703
struct writeback_control * wbc , struct folio * folio ,
1693
- struct inode * inode , loff_t pos , struct list_head * iolist )
1704
+ struct inode * inode , loff_t pos )
1694
1705
{
1695
1706
struct iomap_folio_state * ifs = folio -> private ;
1696
1707
unsigned len = i_blocksize (inode );
1697
1708
size_t poff = offset_in_folio (folio , pos );
1709
+ int error ;
1698
1710
1699
1711
if (!wpc -> ioend || !iomap_can_add_to_ioend (wpc , pos )) {
1700
1712
new_ioend :
1701
- if (wpc -> ioend )
1702
- list_add (& wpc -> ioend -> io_list , iolist );
1713
+ error = iomap_submit_ioend (wpc , 0 );
1714
+ if (error )
1715
+ return error ;
1703
1716
wpc -> ioend = iomap_alloc_ioend (wpc , wbc , inode , pos );
1704
1717
}
1705
1718
@@ -1710,12 +1723,12 @@ static void iomap_add_to_ioend(struct iomap_writepage_ctx *wpc,
1710
1723
atomic_add (len , & ifs -> write_bytes_pending );
1711
1724
wpc -> ioend -> io_size += len ;
1712
1725
wbc_account_cgroup_owner (wbc , & folio -> page , len );
1726
+ return 0 ;
1713
1727
}
1714
1728
1715
1729
static int iomap_writepage_map_blocks (struct iomap_writepage_ctx * wpc ,
1716
1730
struct writeback_control * wbc , struct folio * folio ,
1717
- struct inode * inode , u64 pos , unsigned * count ,
1718
- struct list_head * submit_list )
1731
+ struct inode * inode , u64 pos , unsigned * count )
1719
1732
{
1720
1733
int error ;
1721
1734
@@ -1732,8 +1745,9 @@ static int iomap_writepage_map_blocks(struct iomap_writepage_ctx *wpc,
1732
1745
case IOMAP_HOLE :
1733
1746
break ;
1734
1747
default :
1735
- iomap_add_to_ioend (wpc , wbc , folio , inode , pos , submit_list );
1736
- (* count )++ ;
1748
+ error = iomap_add_to_ioend (wpc , wbc , folio , inode , pos );
1749
+ if (!error )
1750
+ (* count )++ ;
1737
1751
}
1738
1752
1739
1753
fail :
@@ -1809,35 +1823,21 @@ static bool iomap_writepage_handle_eof(struct folio *folio, struct inode *inode,
1809
1823
return true;
1810
1824
}
1811
1825
1812
- /*
1813
- * We implement an immediate ioend submission policy here to avoid needing to
1814
- * chain multiple ioends and hence nest mempool allocations which can violate
1815
- * the forward progress guarantees we need to provide. The current ioend we're
1816
- * adding blocks to is cached in the writepage context, and if the new block
1817
- * doesn't append to the cached ioend, it will create a new ioend and cache that
1818
- * instead.
1819
- *
1820
- * If a new ioend is created and cached, the old ioend is returned and queued
1821
- * locally for submission once the entire page is processed or an error has been
1822
- * detected. While ioends are submitted immediately after they are completed,
1823
- * batching optimisations are provided by higher level block plugging.
1824
- *
1825
- * At the end of a writeback pass, there will be a cached ioend remaining on the
1826
- * writepage context that the caller will need to submit.
1827
- */
1828
1826
static int iomap_writepage_map (struct iomap_writepage_ctx * wpc ,
1829
1827
struct writeback_control * wbc , struct folio * folio )
1830
1828
{
1831
1829
struct iomap_folio_state * ifs = folio -> private ;
1832
1830
struct inode * inode = folio -> mapping -> host ;
1833
- struct iomap_ioend * ioend , * next ;
1834
1831
unsigned len = i_blocksize (inode );
1835
1832
unsigned nblocks = i_blocks_per_folio (inode , folio );
1836
1833
u64 pos = folio_pos (folio );
1837
1834
u64 end_pos = pos + folio_size (folio );
1838
1835
unsigned count = 0 ;
1839
1836
int error = 0 , i ;
1840
- LIST_HEAD (submit_list );
1837
+
1838
+ WARN_ON_ONCE (!folio_test_locked (folio ));
1839
+ WARN_ON_ONCE (folio_test_dirty (folio ));
1840
+ WARN_ON_ONCE (folio_test_writeback (folio ));
1841
1841
1842
1842
trace_iomap_writepage (inode , pos , folio_size (folio ));
1843
1843
@@ -1847,12 +1847,27 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1847
1847
}
1848
1848
WARN_ON_ONCE (end_pos <= pos );
1849
1849
1850
- if (!ifs && nblocks > 1 ) {
1851
- ifs = ifs_alloc (inode , folio , 0 );
1852
- iomap_set_range_dirty (folio , 0 , end_pos - pos );
1850
+ if (nblocks > 1 ) {
1851
+ if (!ifs ) {
1852
+ ifs = ifs_alloc (inode , folio , 0 );
1853
+ iomap_set_range_dirty (folio , 0 , end_pos - pos );
1854
+ }
1855
+
1856
+ /*
1857
+ * Keep the I/O completion handler from clearing the writeback
1858
+ * bit until we have submitted all blocks by adding a bias to
1859
+ * ifs->write_bytes_pending, which is dropped after submitting
1860
+ * all blocks.
1861
+ */
1862
+ WARN_ON_ONCE (atomic_read (& ifs -> write_bytes_pending ) != 0 );
1863
+ atomic_inc (& ifs -> write_bytes_pending );
1853
1864
}
1854
1865
1855
- WARN_ON_ONCE (ifs && atomic_read (& ifs -> write_bytes_pending ) != 0 );
1866
+ /*
1867
+ * Set the writeback bit ASAP, as the I/O completion for the single
1868
+ * block per folio case happen hit as soon as we're submitting the bio.
1869
+ */
1870
+ folio_start_writeback (folio );
1856
1871
1857
1872
/*
1858
1873
* Walk through the folio to find areas to write back. If we
@@ -1863,18 +1878,13 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1863
1878
if (ifs && !ifs_block_is_dirty (folio , ifs , i ))
1864
1879
continue ;
1865
1880
error = iomap_writepage_map_blocks (wpc , wbc , folio , inode , pos ,
1866
- & count , & submit_list );
1881
+ & count );
1867
1882
if (error )
1868
1883
break ;
1869
1884
}
1870
1885
if (count )
1871
1886
wpc -> nr_folios ++ ;
1872
1887
1873
- WARN_ON_ONCE (!wpc -> ioend && !list_empty (& submit_list ));
1874
- WARN_ON_ONCE (!folio_test_locked (folio ));
1875
- WARN_ON_ONCE (folio_test_writeback (folio ));
1876
- WARN_ON_ONCE (folio_test_dirty (folio ));
1877
-
1878
1888
/*
1879
1889
* We can have dirty bits set past end of file in page_mkwrite path
1880
1890
* while mapping the last partial folio. Hence it's better to clear
@@ -1883,38 +1893,20 @@ static int iomap_writepage_map(struct iomap_writepage_ctx *wpc,
1883
1893
iomap_clear_range_dirty (folio , 0 , folio_size (folio ));
1884
1894
1885
1895
/*
1886
- * If the page hasn't been added to the ioend, it won't be affected by
1887
- * I/O completion and we must unlock it now.
1896
+ * Usually the writeback bit is cleared by the I/O completion handler.
1897
+ * But we may end up either not actually writing any blocks, or (when
1898
+ * there are multiple blocks in a folio) all I/O might have finished
1899
+ * already at this point. In that case we need to clear the writeback
1900
+ * bit ourselves right after unlocking the page.
1888
1901
*/
1889
- if (error && !count ) {
1890
- folio_unlock (folio );
1891
- goto done ;
1892
- }
1893
-
1894
- folio_start_writeback (folio );
1895
1902
folio_unlock (folio );
1896
-
1897
- /*
1898
- * Preserve the original error if there was one; catch
1899
- * submission errors here and propagate into subsequent ioend
1900
- * submissions.
1901
- */
1902
- list_for_each_entry_safe (ioend , next , & submit_list , io_list ) {
1903
- int error2 ;
1904
-
1905
- list_del_init (& ioend -> io_list );
1906
- error2 = iomap_submit_ioend (wpc , ioend , error );
1907
- if (error2 && !error )
1908
- error = error2 ;
1903
+ if (ifs ) {
1904
+ if (atomic_dec_and_test (& ifs -> write_bytes_pending ))
1905
+ folio_end_writeback (folio );
1906
+ } else {
1907
+ if (!count )
1908
+ folio_end_writeback (folio );
1909
1909
}
1910
-
1911
- /*
1912
- * We can end up here with no error and nothing to write only if we race
1913
- * with a partial page truncate on a sub-page block sized filesystem.
1914
- */
1915
- if (!count )
1916
- folio_end_writeback (folio );
1917
- done :
1918
1910
mapping_set_error (inode -> i_mapping , error );
1919
1911
return error ;
1920
1912
}
@@ -1942,9 +1934,7 @@ iomap_writepages(struct address_space *mapping, struct writeback_control *wbc,
1942
1934
1943
1935
wpc -> ops = ops ;
1944
1936
ret = write_cache_pages (mapping , wbc , iomap_do_writepage , wpc );
1945
- if (!wpc -> ioend )
1946
- return ret ;
1947
- return iomap_submit_ioend (wpc , wpc -> ioend , ret );
1937
+ return iomap_submit_ioend (wpc , ret );
1948
1938
}
1949
1939
EXPORT_SYMBOL_GPL (iomap_writepages );
1950
1940
0 commit comments