Skip to content

Commit 20855e4

Browse files
committed
Merge tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull xfs fixes from Darrick Wong: "This fixes some stalling problems and corrects the last of the problems (I hope) observed during testing of the new atomic xattr update feature. - Fix statfs blocking on background inode gc workers - Fix some broken inode lock assertion code - Fix xattr leaf buffer leaks when cancelling a deferred xattr update operation - Clean up xattr recovery to make it easier to understand. - Fix xattr leaf block verifiers tripping over empty blocks. - Remove complicated and error prone xattr leaf block bholding mess. - Fix a bug where an rt extent crossing EOF was treated as "posteof" blocks and cleaned unnecessarily. - Fix a UAF when log shutdown races with unmount" * tag 'xfs-5.19-fixes-4' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: prevent a UAF when log IO errors race with unmount xfs: dont treat rt extents beyond EOF as eofblocks to be cleared xfs: don't hold xattr leaf buffers across transaction rolls xfs: empty xattr leaf header blocks are not corruption xfs: clean up the end of xfs_attri_item_recover xfs: always free xattri_leaf_bp when cancelling a deferred op xfs: use invalidate_lock to check the state of mmap_lock xfs: factor out the common lock flags assert xfs: introduce xfs_inodegc_push() xfs: bound maximum wait time for inodegc work
2 parents 69cb6c6 + 7561cea commit 20855e4

14 files changed

+130
-131
lines changed

fs/xfs/libxfs/xfs_attr.c

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ STATIC int xfs_attr_shortform_addname(xfs_da_args_t *args);
5050
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
5151
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
5252
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
53-
STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
53+
STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
5454

5555
/*
5656
* Internal routines when attribute list is more than one block.
@@ -393,16 +393,10 @@ xfs_attr_sf_addname(
393393
* It won't fit in the shortform, transform to a leaf block. GROT:
394394
* another possible req'mt for a double-split btree op.
395395
*/
396-
error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp);
396+
error = xfs_attr_shortform_to_leaf(args);
397397
if (error)
398398
return error;
399399

400-
/*
401-
* Prevent the leaf buffer from being unlocked so that a concurrent AIL
402-
* push cannot grab the half-baked leaf buffer and run into problems
403-
* with the write verifier.
404-
*/
405-
xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
406400
attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
407401
out:
408402
trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
@@ -447,11 +441,9 @@ xfs_attr_leaf_addname(
447441

448442
/*
449443
* Use the leaf buffer we may already hold locked as a result of
450-
* a sf-to-leaf conversion. The held buffer is no longer valid
451-
* after this call, regardless of the result.
444+
* a sf-to-leaf conversion.
452445
*/
453-
error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp);
454-
attr->xattri_leaf_bp = NULL;
446+
error = xfs_attr_leaf_try_add(args);
455447

456448
if (error == -ENOSPC) {
457449
error = xfs_attr3_leaf_to_node(args);
@@ -497,8 +489,6 @@ xfs_attr_node_addname(
497489
struct xfs_da_args *args = attr->xattri_da_args;
498490
int error;
499491

500-
ASSERT(!attr->xattri_leaf_bp);
501-
502492
error = xfs_attr_node_addname_find_attr(attr);
503493
if (error)
504494
return error;
@@ -1215,24 +1205,14 @@ xfs_attr_restore_rmt_blk(
12151205
*/
12161206
STATIC int
12171207
xfs_attr_leaf_try_add(
1218-
struct xfs_da_args *args,
1219-
struct xfs_buf *bp)
1208+
struct xfs_da_args *args)
12201209
{
1210+
struct xfs_buf *bp;
12211211
int error;
12221212

1223-
/*
1224-
* If the caller provided a buffer to us, it is locked and held in
1225-
* the transaction because it just did a shortform to leaf conversion.
1226-
* Hence we don't need to read it again. Otherwise read in the leaf
1227-
* buffer.
1228-
*/
1229-
if (bp) {
1230-
xfs_trans_bhold_release(args->trans, bp);
1231-
} else {
1232-
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
1233-
if (error)
1234-
return error;
1235-
}
1213+
error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
1214+
if (error)
1215+
return error;
12361216

12371217
/*
12381218
* Look up the xattr name to set the insertion point for the new xattr.

fs/xfs/libxfs/xfs_attr.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -515,11 +515,6 @@ struct xfs_attr_intent {
515515
*/
516516
struct xfs_attri_log_nameval *xattri_nameval;
517517

518-
/*
519-
* Used by xfs_attr_set to hold a leaf buffer across a transaction roll
520-
*/
521-
struct xfs_buf *xattri_leaf_bp;
522-
523518
/* Used to keep track of current state of delayed operation */
524519
enum xfs_delattr_state xattri_dela_state;
525520

fs/xfs/libxfs/xfs_attr_leaf.c

Lines changed: 19 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -289,6 +289,23 @@ xfs_attr3_leaf_verify_entry(
289289
return NULL;
290290
}
291291

292+
/*
293+
* Validate an attribute leaf block.
294+
*
295+
* Empty leaf blocks can occur under the following circumstances:
296+
*
297+
* 1. setxattr adds a new extended attribute to a file;
298+
* 2. The file has zero existing attributes;
299+
* 3. The attribute is too large to fit in the attribute fork;
300+
* 4. The attribute is small enough to fit in a leaf block;
301+
* 5. A log flush occurs after committing the transaction that creates
302+
* the (empty) leaf block; and
303+
* 6. The filesystem goes down after the log flush but before the new
304+
* attribute can be committed to the leaf block.
305+
*
306+
* Hence we need to ensure that we don't fail the validation purely
307+
* because the leaf is empty.
308+
*/
292309
static xfs_failaddr_t
293310
xfs_attr3_leaf_verify(
294311
struct xfs_buf *bp)
@@ -310,15 +327,6 @@ xfs_attr3_leaf_verify(
310327
if (fa)
311328
return fa;
312329

313-
/*
314-
* Empty leaf blocks should never occur; they imply the existence of a
315-
* software bug that needs fixing. xfs_repair also flags them as a
316-
* corruption that needs fixing, so we should never let these go to
317-
* disk.
318-
*/
319-
if (ichdr.count == 0)
320-
return __this_address;
321-
322330
/*
323331
* firstused is the block offset of the first name info structure.
324332
* Make sure it doesn't go off the block or crash into the header.
@@ -922,14 +930,10 @@ xfs_attr_shortform_getvalue(
922930
return -ENOATTR;
923931
}
924932

925-
/*
926-
* Convert from using the shortform to the leaf. On success, return the
927-
* buffer so that we can keep it locked until we're totally done with it.
928-
*/
933+
/* Convert from using the shortform to the leaf format. */
929934
int
930935
xfs_attr_shortform_to_leaf(
931-
struct xfs_da_args *args,
932-
struct xfs_buf **leaf_bp)
936+
struct xfs_da_args *args)
933937
{
934938
struct xfs_inode *dp;
935939
struct xfs_attr_shortform *sf;
@@ -991,7 +995,6 @@ xfs_attr_shortform_to_leaf(
991995
sfe = xfs_attr_sf_nextentry(sfe);
992996
}
993997
error = 0;
994-
*leaf_bp = bp;
995998
out:
996999
kmem_free(tmpbuffer);
9971000
return error;

fs/xfs/libxfs/xfs_attr_leaf.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,7 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
4949
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
5050
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
5151
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
52-
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
53-
struct xfs_buf **leaf_bp);
52+
int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
5453
int xfs_attr_sf_removename(struct xfs_da_args *args);
5554
int xfs_attr_sf_findname(struct xfs_da_args *args,
5655
struct xfs_attr_sf_entry **sfep,

fs/xfs/xfs_attr_item.c

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -576,7 +576,7 @@ xfs_attri_item_recover(
576576
struct xfs_trans_res tres;
577577
struct xfs_attri_log_format *attrp;
578578
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
579-
int error, ret = 0;
579+
int error;
580580
int total;
581581
int local;
582582
struct xfs_attrd_log_item *done_item = NULL;
@@ -655,29 +655,32 @@ xfs_attri_item_recover(
655655
xfs_ilock(ip, XFS_ILOCK_EXCL);
656656
xfs_trans_ijoin(tp, ip, 0);
657657

658-
ret = xfs_xattri_finish_update(attr, done_item);
659-
if (ret == -EAGAIN) {
660-
/* There's more work to do, so add it to this transaction */
658+
error = xfs_xattri_finish_update(attr, done_item);
659+
if (error == -EAGAIN) {
660+
/*
661+
* There's more work to do, so add the intent item to this
662+
* transaction so that we can continue it later.
663+
*/
661664
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
662-
} else
663-
error = ret;
665+
error = xfs_defer_ops_capture_and_commit(tp, capture_list);
666+
if (error)
667+
goto out_unlock;
664668

669+
xfs_iunlock(ip, XFS_ILOCK_EXCL);
670+
xfs_irele(ip);
671+
return 0;
672+
}
665673
if (error) {
666674
xfs_trans_cancel(tp);
667675
goto out_unlock;
668676
}
669677

670678
error = xfs_defer_ops_capture_and_commit(tp, capture_list);
671-
672679
out_unlock:
673-
if (attr->xattri_leaf_bp)
674-
xfs_buf_relse(attr->xattri_leaf_bp);
675-
676680
xfs_iunlock(ip, XFS_ILOCK_EXCL);
677681
xfs_irele(ip);
678682
out:
679-
if (ret != -EAGAIN)
680-
xfs_attr_free_item(attr);
683+
xfs_attr_free_item(attr);
681684
return error;
682685
}
683686

fs/xfs/xfs_bmap_util.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -686,6 +686,8 @@ xfs_can_free_eofblocks(
686686
* forever.
687687
*/
688688
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
689+
if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
690+
end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
689691
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
690692
if (last_fsb <= end_fsb)
691693
return false;

fs/xfs/xfs_icache.c

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -440,7 +440,7 @@ xfs_inodegc_queue_all(
440440
for_each_online_cpu(cpu) {
441441
gc = per_cpu_ptr(mp->m_inodegc, cpu);
442442
if (!llist_empty(&gc->list))
443-
queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
443+
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
444444
}
445445
}
446446

@@ -1841,8 +1841,8 @@ void
18411841
xfs_inodegc_worker(
18421842
struct work_struct *work)
18431843
{
1844-
struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc,
1845-
work);
1844+
struct xfs_inodegc *gc = container_of(to_delayed_work(work),
1845+
struct xfs_inodegc, work);
18461846
struct llist_node *node = llist_del_all(&gc->list);
18471847
struct xfs_inode *ip, *n;
18481848

@@ -1862,19 +1862,29 @@ xfs_inodegc_worker(
18621862
}
18631863

18641864
/*
1865-
* Force all currently queued inode inactivation work to run immediately and
1866-
* wait for the work to finish.
1865+
* Expedite all pending inodegc work to run immediately. This does not wait for
1866+
* completion of the work.
18671867
*/
18681868
void
1869-
xfs_inodegc_flush(
1869+
xfs_inodegc_push(
18701870
struct xfs_mount *mp)
18711871
{
18721872
if (!xfs_is_inodegc_enabled(mp))
18731873
return;
1874+
trace_xfs_inodegc_push(mp, __return_address);
1875+
xfs_inodegc_queue_all(mp);
1876+
}
18741877

1878+
/*
1879+
* Force all currently queued inode inactivation work to run immediately and
1880+
* wait for the work to finish.
1881+
*/
1882+
void
1883+
xfs_inodegc_flush(
1884+
struct xfs_mount *mp)
1885+
{
1886+
xfs_inodegc_push(mp);
18751887
trace_xfs_inodegc_flush(mp, __return_address);
1876-
1877-
xfs_inodegc_queue_all(mp);
18781888
flush_workqueue(mp->m_inodegc_wq);
18791889
}
18801890

@@ -2014,6 +2024,7 @@ xfs_inodegc_queue(
20142024
struct xfs_inodegc *gc;
20152025
int items;
20162026
unsigned int shrinker_hits;
2027+
unsigned long queue_delay = 1;
20172028

20182029
trace_xfs_inode_set_need_inactive(ip);
20192030
spin_lock(&ip->i_flags_lock);
@@ -2025,19 +2036,26 @@ xfs_inodegc_queue(
20252036
items = READ_ONCE(gc->items);
20262037
WRITE_ONCE(gc->items, items + 1);
20272038
shrinker_hits = READ_ONCE(gc->shrinker_hits);
2028-
put_cpu_ptr(gc);
20292039

2030-
if (!xfs_is_inodegc_enabled(mp))
2040+
/*
2041+
* We queue the work while holding the current CPU so that the work
2042+
* is scheduled to run on this CPU.
2043+
*/
2044+
if (!xfs_is_inodegc_enabled(mp)) {
2045+
put_cpu_ptr(gc);
20312046
return;
2032-
2033-
if (xfs_inodegc_want_queue_work(ip, items)) {
2034-
trace_xfs_inodegc_queue(mp, __return_address);
2035-
queue_work(mp->m_inodegc_wq, &gc->work);
20362047
}
20372048

2049+
if (xfs_inodegc_want_queue_work(ip, items))
2050+
queue_delay = 0;
2051+
2052+
trace_xfs_inodegc_queue(mp, __return_address);
2053+
mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
2054+
put_cpu_ptr(gc);
2055+
20382056
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
20392057
trace_xfs_inodegc_throttle(mp, __return_address);
2040-
flush_work(&gc->work);
2058+
flush_delayed_work(&gc->work);
20412059
}
20422060
}
20432061

@@ -2054,7 +2072,7 @@ xfs_inodegc_cpu_dead(
20542072
unsigned int count = 0;
20552073

20562074
dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
2057-
cancel_work_sync(&dead_gc->work);
2075+
cancel_delayed_work_sync(&dead_gc->work);
20582076

20592077
if (llist_empty(&dead_gc->list))
20602078
return;
@@ -2073,12 +2091,12 @@ xfs_inodegc_cpu_dead(
20732091
llist_add_batch(first, last, &gc->list);
20742092
count += READ_ONCE(gc->items);
20752093
WRITE_ONCE(gc->items, count);
2076-
put_cpu_ptr(gc);
20772094

20782095
if (xfs_is_inodegc_enabled(mp)) {
20792096
trace_xfs_inodegc_queue(mp, __return_address);
2080-
queue_work(mp->m_inodegc_wq, &gc->work);
2097+
mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
20812098
}
2099+
put_cpu_ptr(gc);
20822100
}
20832101

20842102
/*
@@ -2173,7 +2191,7 @@ xfs_inodegc_shrinker_scan(
21732191
unsigned int h = READ_ONCE(gc->shrinker_hits);
21742192

21752193
WRITE_ONCE(gc->shrinker_hits, h + 1);
2176-
queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
2194+
mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
21772195
no_items = false;
21782196
}
21792197
}

fs/xfs/xfs_icache.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ void xfs_blockgc_stop(struct xfs_mount *mp);
7676
void xfs_blockgc_start(struct xfs_mount *mp);
7777

7878
void xfs_inodegc_worker(struct work_struct *work);
79+
void xfs_inodegc_push(struct xfs_mount *mp);
7980
void xfs_inodegc_flush(struct xfs_mount *mp);
8081
void xfs_inodegc_stop(struct xfs_mount *mp);
8182
void xfs_inodegc_start(struct xfs_mount *mp);

0 commit comments

Comments
 (0)