@@ -29,11 +29,6 @@ struct kmem_cache *xfs_buf_cache;
29
29
/*
30
30
* Locking orders
31
31
*
32
- * xfs_buf_ioacct_inc:
33
- * xfs_buf_ioacct_dec:
34
- * b_sema (caller holds)
35
- * b_lock
36
- *
37
32
* xfs_buf_stale:
38
33
* b_sema (caller holds)
39
34
* b_lock
@@ -81,51 +76,6 @@ xfs_buf_vmap_len(
81
76
return (bp -> b_page_count * PAGE_SIZE );
82
77
}
83
78
84
- /*
85
- * Bump the I/O in flight count on the buftarg if we haven't yet done so for
86
- * this buffer. The count is incremented once per buffer (per hold cycle)
87
- * because the corresponding decrement is deferred to buffer release. Buffers
88
- * can undergo I/O multiple times in a hold-release cycle and per buffer I/O
89
- * tracking adds unnecessary overhead. This is used for sychronization purposes
90
- * with unmount (see xfs_buftarg_drain()), so all we really need is a count of
91
- * in-flight buffers.
92
- *
93
- * Buffers that are never released (e.g., superblock, iclog buffers) must set
94
- * the XBF_NO_IOACCT flag before I/O submission. Otherwise, the buftarg count
95
- * never reaches zero and unmount hangs indefinitely.
96
- */
97
- static inline void
98
- xfs_buf_ioacct_inc (
99
- struct xfs_buf * bp )
100
- {
101
- if (bp -> b_flags & XBF_NO_IOACCT )
102
- return ;
103
-
104
- ASSERT (bp -> b_flags & XBF_ASYNC );
105
- spin_lock (& bp -> b_lock );
106
- if (!(bp -> b_state & XFS_BSTATE_IN_FLIGHT )) {
107
- bp -> b_state |= XFS_BSTATE_IN_FLIGHT ;
108
- percpu_counter_inc (& bp -> b_target -> bt_io_count );
109
- }
110
- spin_unlock (& bp -> b_lock );
111
- }
112
-
113
- /*
114
- * Clear the in-flight state on a buffer about to be released to the LRU or
115
- * freed and unaccount from the buftarg.
116
- */
117
- static inline void
118
- __xfs_buf_ioacct_dec (
119
- struct xfs_buf * bp )
120
- {
121
- lockdep_assert_held (& bp -> b_lock );
122
-
123
- if (bp -> b_state & XFS_BSTATE_IN_FLIGHT ) {
124
- bp -> b_state &= ~XFS_BSTATE_IN_FLIGHT ;
125
- percpu_counter_dec (& bp -> b_target -> bt_io_count );
126
- }
127
- }
128
-
129
79
/*
130
80
* When we mark a buffer stale, we remove the buffer from the LRU and clear the
131
81
* b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -149,15 +99,7 @@ xfs_buf_stale(
149
99
*/
150
100
bp -> b_flags &= ~_XBF_DELWRI_Q ;
151
101
152
- /*
153
- * Once the buffer is marked stale and unlocked, a subsequent lookup
154
- * could reset b_flags. There is no guarantee that the buffer is
155
- * unaccounted (released to LRU) before that occurs. Drop in-flight
156
- * status now to preserve accounting consistency.
157
- */
158
102
spin_lock (& bp -> b_lock );
159
- __xfs_buf_ioacct_dec (bp );
160
-
161
103
atomic_set (& bp -> b_lru_ref , 0 );
162
104
if (!(bp -> b_state & XFS_BSTATE_DISPOSE ) &&
163
105
(list_lru_del_obj (& bp -> b_target -> bt_lru , & bp -> b_lru )))
@@ -794,18 +736,13 @@ xfs_buf_get_map(
794
736
795
737
int
796
738
_xfs_buf_read (
797
- struct xfs_buf * bp ,
798
- xfs_buf_flags_t flags )
739
+ struct xfs_buf * bp )
799
740
{
800
- ASSERT (!(flags & XBF_WRITE ));
801
741
ASSERT (bp -> b_maps [0 ].bm_bn != XFS_BUF_DADDR_NULL );
802
742
803
743
bp -> b_flags &= ~(XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD | XBF_DONE );
804
- bp -> b_flags |= flags & (XBF_READ | XBF_ASYNC | XBF_READ_AHEAD );
805
-
744
+ bp -> b_flags |= XBF_READ ;
806
745
xfs_buf_submit (bp );
807
- if (flags & XBF_ASYNC )
808
- return 0 ;
809
746
return xfs_buf_iowait (bp );
810
747
}
811
748
@@ -857,6 +794,8 @@ xfs_buf_read_map(
857
794
struct xfs_buf * bp ;
858
795
int error ;
859
796
797
+ ASSERT (!(flags & (XBF_WRITE | XBF_ASYNC | XBF_READ_AHEAD )));
798
+
860
799
flags |= XBF_READ ;
861
800
* bpp = NULL ;
862
801
@@ -870,21 +809,11 @@ xfs_buf_read_map(
870
809
/* Initiate the buffer read and wait. */
871
810
XFS_STATS_INC (target -> bt_mount , xb_get_read );
872
811
bp -> b_ops = ops ;
873
- error = _xfs_buf_read (bp , flags );
874
-
875
- /* Readahead iodone already dropped the buffer, so exit. */
876
- if (flags & XBF_ASYNC )
877
- return 0 ;
812
+ error = _xfs_buf_read (bp );
878
813
} else {
879
814
/* Buffer already read; all we need to do is check it. */
880
815
error = xfs_buf_reverify (bp , ops );
881
816
882
- /* Readahead already finished; drop the buffer and exit. */
883
- if (flags & XBF_ASYNC ) {
884
- xfs_buf_relse (bp );
885
- return 0 ;
886
- }
887
-
888
817
/* We do not want read in the flags */
889
818
bp -> b_flags &= ~XBF_READ ;
890
819
ASSERT (bp -> b_ops != NULL || ops == NULL );
@@ -936,6 +865,7 @@ xfs_buf_readahead_map(
936
865
int nmaps ,
937
866
const struct xfs_buf_ops * ops )
938
867
{
868
+ const xfs_buf_flags_t flags = XBF_READ | XBF_ASYNC | XBF_READ_AHEAD ;
939
869
struct xfs_buf * bp ;
940
870
941
871
/*
@@ -945,9 +875,21 @@ xfs_buf_readahead_map(
945
875
if (xfs_buftarg_is_mem (target ))
946
876
return ;
947
877
948
- xfs_buf_read_map (target , map , nmaps ,
949
- XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD , & bp , ops ,
950
- __this_address );
878
+ if (xfs_buf_get_map (target , map , nmaps , flags | XBF_TRYLOCK , & bp ))
879
+ return ;
880
+ trace_xfs_buf_readahead (bp , 0 , _RET_IP_ );
881
+
882
+ if (bp -> b_flags & XBF_DONE ) {
883
+ xfs_buf_reverify (bp , ops );
884
+ xfs_buf_relse (bp );
885
+ return ;
886
+ }
887
+ XFS_STATS_INC (target -> bt_mount , xb_get_read );
888
+ bp -> b_ops = ops ;
889
+ bp -> b_flags &= ~(XBF_WRITE | XBF_DONE );
890
+ bp -> b_flags |= flags ;
891
+ percpu_counter_inc (& target -> bt_readahead_count );
892
+ xfs_buf_submit (bp );
951
893
}
952
894
953
895
/*
@@ -1003,10 +945,12 @@ xfs_buf_get_uncached(
1003
945
struct xfs_buf * bp ;
1004
946
DEFINE_SINGLE_BUF_MAP (map , XFS_BUF_DADDR_NULL , numblks );
1005
947
948
+ /* there are currently no valid flags for xfs_buf_get_uncached */
949
+ ASSERT (flags == 0 );
950
+
1006
951
* bpp = NULL ;
1007
952
1008
- /* flags might contain irrelevant bits, pass only what we care about */
1009
- error = _xfs_buf_alloc (target , & map , 1 , flags & XBF_NO_IOACCT , & bp );
953
+ error = _xfs_buf_alloc (target , & map , 1 , flags , & bp );
1010
954
if (error )
1011
955
return error ;
1012
956
@@ -1060,7 +1004,6 @@ xfs_buf_rele_uncached(
1060
1004
spin_unlock (& bp -> b_lock );
1061
1005
return ;
1062
1006
}
1063
- __xfs_buf_ioacct_dec (bp );
1064
1007
spin_unlock (& bp -> b_lock );
1065
1008
xfs_buf_free (bp );
1066
1009
}
@@ -1079,20 +1022,12 @@ xfs_buf_rele_cached(
1079
1022
spin_lock (& bp -> b_lock );
1080
1023
ASSERT (bp -> b_hold >= 1 );
1081
1024
if (bp -> b_hold > 1 ) {
1082
- /*
1083
- * Drop the in-flight state if the buffer is already on the LRU
1084
- * and it holds the only reference. This is racy because we
1085
- * haven't acquired the pag lock, but the use of _XBF_IN_FLIGHT
1086
- * ensures the decrement occurs only once per-buf.
1087
- */
1088
- if (-- bp -> b_hold == 1 && !list_empty (& bp -> b_lru ))
1089
- __xfs_buf_ioacct_dec (bp );
1025
+ bp -> b_hold -- ;
1090
1026
goto out_unlock ;
1091
1027
}
1092
1028
1093
1029
/* we are asked to drop the last reference */
1094
- __xfs_buf_ioacct_dec (bp );
1095
- if (!(bp -> b_flags & XBF_STALE ) && atomic_read (& bp -> b_lru_ref )) {
1030
+ if (atomic_read (& bp -> b_lru_ref )) {
1096
1031
/*
1097
1032
* If the buffer is added to the LRU, keep the reference to the
1098
1033
* buffer for the LRU and clear the (now stale) dispose list
@@ -1345,6 +1280,7 @@ xfs_buf_ioend_handle_error(
1345
1280
resubmit :
1346
1281
xfs_buf_ioerror (bp , 0 );
1347
1282
bp -> b_flags |= (XBF_DONE | XBF_WRITE_FAIL );
1283
+ reinit_completion (& bp -> b_iowait );
1348
1284
xfs_buf_submit (bp );
1349
1285
return true;
1350
1286
out_stale :
@@ -1355,8 +1291,9 @@ xfs_buf_ioend_handle_error(
1355
1291
return false;
1356
1292
}
1357
1293
1358
- static void
1359
- xfs_buf_ioend (
1294
+ /* returns false if the caller needs to resubmit the I/O, else true */
1295
+ static bool
1296
+ __xfs_buf_ioend (
1360
1297
struct xfs_buf * bp )
1361
1298
{
1362
1299
trace_xfs_buf_iodone (bp , _RET_IP_ );
@@ -1369,14 +1306,16 @@ xfs_buf_ioend(
1369
1306
bp -> b_ops -> verify_read (bp );
1370
1307
if (!bp -> b_error )
1371
1308
bp -> b_flags |= XBF_DONE ;
1309
+ if (bp -> b_flags & XBF_READ_AHEAD )
1310
+ percpu_counter_dec (& bp -> b_target -> bt_readahead_count );
1372
1311
} else {
1373
1312
if (!bp -> b_error ) {
1374
1313
bp -> b_flags &= ~XBF_WRITE_FAIL ;
1375
1314
bp -> b_flags |= XBF_DONE ;
1376
1315
}
1377
1316
1378
1317
if (unlikely (bp -> b_error ) && xfs_buf_ioend_handle_error (bp ))
1379
- return ;
1318
+ return false ;
1380
1319
1381
1320
/* clear the retry state */
1382
1321
bp -> b_last_error = 0 ;
@@ -1397,7 +1336,15 @@ xfs_buf_ioend(
1397
1336
1398
1337
bp -> b_flags &= ~(XBF_READ | XBF_WRITE | XBF_READ_AHEAD |
1399
1338
_XBF_LOGRECOVERY );
1339
+ return true;
1340
+ }
1400
1341
1342
+ static void
1343
+ xfs_buf_ioend (
1344
+ struct xfs_buf * bp )
1345
+ {
1346
+ if (!__xfs_buf_ioend (bp ))
1347
+ return ;
1401
1348
if (bp -> b_flags & XBF_ASYNC )
1402
1349
xfs_buf_relse (bp );
1403
1350
else
@@ -1411,15 +1358,8 @@ xfs_buf_ioend_work(
1411
1358
struct xfs_buf * bp =
1412
1359
container_of (work , struct xfs_buf , b_ioend_work );
1413
1360
1414
- xfs_buf_ioend (bp );
1415
- }
1416
-
1417
- static void
1418
- xfs_buf_ioend_async (
1419
- struct xfs_buf * bp )
1420
- {
1421
- INIT_WORK (& bp -> b_ioend_work , xfs_buf_ioend_work );
1422
- queue_work (bp -> b_mount -> m_buf_workqueue , & bp -> b_ioend_work );
1361
+ if (__xfs_buf_ioend (bp ))
1362
+ xfs_buf_relse (bp );
1423
1363
}
1424
1364
1425
1365
void
@@ -1491,7 +1431,13 @@ xfs_buf_bio_end_io(
1491
1431
XFS_TEST_ERROR (false, bp -> b_mount , XFS_ERRTAG_BUF_IOERROR ))
1492
1432
xfs_buf_ioerror (bp , - EIO );
1493
1433
1494
- xfs_buf_ioend_async (bp );
1434
+ if (bp -> b_flags & XBF_ASYNC ) {
1435
+ INIT_WORK (& bp -> b_ioend_work , xfs_buf_ioend_work );
1436
+ queue_work (bp -> b_mount -> m_buf_workqueue , & bp -> b_ioend_work );
1437
+ } else {
1438
+ complete (& bp -> b_iowait );
1439
+ }
1440
+
1495
1441
bio_put (bio );
1496
1442
}
1497
1443
@@ -1568,9 +1514,11 @@ xfs_buf_iowait(
1568
1514
{
1569
1515
ASSERT (!(bp -> b_flags & XBF_ASYNC ));
1570
1516
1571
- trace_xfs_buf_iowait (bp , _RET_IP_ );
1572
- wait_for_completion (& bp -> b_iowait );
1573
- trace_xfs_buf_iowait_done (bp , _RET_IP_ );
1517
+ do {
1518
+ trace_xfs_buf_iowait (bp , _RET_IP_ );
1519
+ wait_for_completion (& bp -> b_iowait );
1520
+ trace_xfs_buf_iowait_done (bp , _RET_IP_ );
1521
+ } while (!__xfs_buf_ioend (bp ));
1574
1522
1575
1523
return bp -> b_error ;
1576
1524
}
@@ -1648,9 +1596,6 @@ xfs_buf_submit(
1648
1596
*/
1649
1597
bp -> b_error = 0 ;
1650
1598
1651
- if (bp -> b_flags & XBF_ASYNC )
1652
- xfs_buf_ioacct_inc (bp );
1653
-
1654
1599
if ((bp -> b_flags & XBF_WRITE ) && !xfs_buf_verify_write (bp )) {
1655
1600
xfs_force_shutdown (bp -> b_mount , SHUTDOWN_CORRUPT_INCORE );
1656
1601
xfs_buf_ioend (bp );
@@ -1776,9 +1721,8 @@ xfs_buftarg_wait(
1776
1721
struct xfs_buftarg * btp )
1777
1722
{
1778
1723
/*
1779
- * First wait on the buftarg I/O count for all in-flight buffers to be
1780
- * released. This is critical as new buffers do not make the LRU until
1781
- * they are released.
1724
+ * First wait for all in-flight readahead buffers to be released. This is
1725
+ * critical as new buffers do not make the LRU until they are released.
1782
1726
*
1783
1727
* Next, flush the buffer workqueue to ensure all completion processing
1784
1728
* has finished. Just waiting on buffer locks is not sufficient for
@@ -1787,7 +1731,7 @@ xfs_buftarg_wait(
1787
1731
* all reference counts have been dropped before we start walking the
1788
1732
* LRU list.
1789
1733
*/
1790
- while (percpu_counter_sum (& btp -> bt_io_count ))
1734
+ while (percpu_counter_sum (& btp -> bt_readahead_count ))
1791
1735
delay (100 );
1792
1736
flush_workqueue (btp -> bt_mount -> m_buf_workqueue );
1793
1737
}
@@ -1904,8 +1848,8 @@ xfs_destroy_buftarg(
1904
1848
struct xfs_buftarg * btp )
1905
1849
{
1906
1850
shrinker_free (btp -> bt_shrinker );
1907
- ASSERT (percpu_counter_sum (& btp -> bt_io_count ) == 0 );
1908
- percpu_counter_destroy (& btp -> bt_io_count );
1851
+ ASSERT (percpu_counter_sum (& btp -> bt_readahead_count ) == 0 );
1852
+ percpu_counter_destroy (& btp -> bt_readahead_count );
1909
1853
list_lru_destroy (& btp -> bt_lru );
1910
1854
}
1911
1855
@@ -1959,7 +1903,7 @@ xfs_init_buftarg(
1959
1903
1960
1904
if (list_lru_init (& btp -> bt_lru ))
1961
1905
return - ENOMEM ;
1962
- if (percpu_counter_init (& btp -> bt_io_count , 0 , GFP_KERNEL ))
1906
+ if (percpu_counter_init (& btp -> bt_readahead_count , 0 , GFP_KERNEL ))
1963
1907
goto out_destroy_lru ;
1964
1908
1965
1909
btp -> bt_shrinker =
@@ -1973,7 +1917,7 @@ xfs_init_buftarg(
1973
1917
return 0 ;
1974
1918
1975
1919
out_destroy_io_count :
1976
- percpu_counter_destroy (& btp -> bt_io_count );
1920
+ percpu_counter_destroy (& btp -> bt_readahead_count );
1977
1921
out_destroy_lru :
1978
1922
list_lru_destroy (& btp -> bt_lru );
1979
1923
return - ENOMEM ;
0 commit comments