Skip to content

Commit 088aea3

Browse files
committed
Revert "btrfs: turn delayed_nodes_tree into an XArray"
This reverts commit 253bf57. Revert the xarray conversion, there's a problem with potential sleep-inside-spinlock [1] when calling xa_insert that triggers GFP_NOFS allocation. The radix tree used the preloading mechanism to avoid sleeping but this is not available in xarray. Conversion from spin lock to mutex is possible but at time of rc6 is riskier than a clean revert. [1] https://lore.kernel.org/linux-btrfs/cover.1657097693.git.fdmanana@suse.com/ Reported-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 5b8418b commit 088aea3

File tree

4 files changed

+50
-44
lines changed

4 files changed

+50
-44
lines changed

fs/btrfs/ctree.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,10 +1222,10 @@ struct btrfs_root {
12221222
struct rb_root inode_tree;
12231223

12241224
/*
1225-
* Xarray that keeps track of delayed nodes of every inode, protected
1226-
* by inode_lock
1225+
* radix tree that keeps track of delayed nodes of every inode,
1226+
* protected by inode_lock
12271227
*/
1228-
struct xarray delayed_nodes;
1228+
struct radix_tree_root delayed_nodes_tree;
12291229
/*
12301230
* right now this just gets used so that a root has its own devid
12311231
* for stat. It may be used for more later

fs/btrfs/delayed-inode.c

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
7878
}
7979

8080
spin_lock(&root->inode_lock);
81-
node = xa_load(&root->delayed_nodes, ino);
81+
node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
8282

8383
if (node) {
8484
if (btrfs_inode->delayed_node) {
@@ -90,17 +90,17 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
9090

9191
/*
9292
* It's possible that we're racing into the middle of removing
93-
* this node from the xarray. In this case, the refcount
93+
* this node from the radix tree. In this case, the refcount
9494
* was zero and it should never go back to one. Just return
95-
* NULL like it was never in the xarray at all; our release
95+
* NULL like it was never in the radix at all; our release
9696
* function is in the process of removing it.
9797
*
9898
* Some implementations of refcount_inc refuse to bump the
9999
* refcount once it has hit zero. If we don't do this dance
100100
* here, refcount_inc() may decide to just WARN_ONCE() instead
101101
* of actually bumping the refcount.
102102
*
103-
* If this node is properly in the xarray, we want to bump the
103+
* If this node is properly in the radix, we want to bump the
104104
* refcount twice, once for the inode and once for this get
105105
* operation.
106106
*/
@@ -128,30 +128,36 @@ static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
128128
u64 ino = btrfs_ino(btrfs_inode);
129129
int ret;
130130

131-
do {
132-
node = btrfs_get_delayed_node(btrfs_inode);
133-
if (node)
134-
return node;
131+
again:
132+
node = btrfs_get_delayed_node(btrfs_inode);
133+
if (node)
134+
return node;
135135

136-
node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
137-
if (!node)
138-
return ERR_PTR(-ENOMEM);
139-
btrfs_init_delayed_node(node, root, ino);
136+
node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
137+
if (!node)
138+
return ERR_PTR(-ENOMEM);
139+
btrfs_init_delayed_node(node, root, ino);
140140

141-
/* Cached in the inode and can be accessed */
142-
refcount_set(&node->refs, 2);
141+
/* cached in the btrfs inode and can be accessed */
142+
refcount_set(&node->refs, 2);
143143

144-
spin_lock(&root->inode_lock);
145-
ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS);
146-
if (ret) {
147-
spin_unlock(&root->inode_lock);
148-
kmem_cache_free(delayed_node_cache, node);
149-
if (ret != -EBUSY)
150-
return ERR_PTR(ret);
151-
}
152-
} while (ret);
144+
ret = radix_tree_preload(GFP_NOFS);
145+
if (ret) {
146+
kmem_cache_free(delayed_node_cache, node);
147+
return ERR_PTR(ret);
148+
}
149+
150+
spin_lock(&root->inode_lock);
151+
ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
152+
if (ret == -EEXIST) {
153+
spin_unlock(&root->inode_lock);
154+
kmem_cache_free(delayed_node_cache, node);
155+
radix_tree_preload_end();
156+
goto again;
157+
}
153158
btrfs_inode->delayed_node = node;
154159
spin_unlock(&root->inode_lock);
160+
radix_tree_preload_end();
155161

156162
return node;
157163
}
@@ -270,7 +276,8 @@ static void __btrfs_release_delayed_node(
270276
* back up. We can delete it now.
271277
*/
272278
ASSERT(refcount_read(&delayed_node->refs) == 0);
273-
xa_erase(&root->delayed_nodes, delayed_node->inode_id);
279+
radix_tree_delete(&root->delayed_nodes_tree,
280+
delayed_node->inode_id);
274281
spin_unlock(&root->inode_lock);
275282
kmem_cache_free(delayed_node_cache, delayed_node);
276283
}
@@ -1863,35 +1870,34 @@ void btrfs_kill_delayed_inode_items(struct btrfs_inode *inode)
18631870

18641871
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
18651872
{
1866-
unsigned long index = 0;
1867-
struct btrfs_delayed_node *delayed_node;
1873+
u64 inode_id = 0;
18681874
struct btrfs_delayed_node *delayed_nodes[8];
1875+
int i, n;
18691876

18701877
while (1) {
1871-
int n = 0;
1872-
18731878
spin_lock(&root->inode_lock);
1874-
if (xa_empty(&root->delayed_nodes)) {
1879+
n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
1880+
(void **)delayed_nodes, inode_id,
1881+
ARRAY_SIZE(delayed_nodes));
1882+
if (!n) {
18751883
spin_unlock(&root->inode_lock);
1876-
return;
1884+
break;
18771885
}
18781886

1879-
xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) {
1887+
inode_id = delayed_nodes[n - 1]->inode_id + 1;
1888+
for (i = 0; i < n; i++) {
18801889
/*
18811890
* Don't increase refs in case the node is dead and
18821891
* about to be removed from the tree in the loop below
18831892
*/
1884-
if (refcount_inc_not_zero(&delayed_node->refs)) {
1885-
delayed_nodes[n] = delayed_node;
1886-
n++;
1887-
}
1888-
if (n >= ARRAY_SIZE(delayed_nodes))
1889-
break;
1893+
if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
1894+
delayed_nodes[i] = NULL;
18901895
}
1891-
index++;
18921896
spin_unlock(&root->inode_lock);
18931897

1894-
for (int i = 0; i < n; i++) {
1898+
for (i = 0; i < n; i++) {
1899+
if (!delayed_nodes[i])
1900+
continue;
18951901
__btrfs_kill_delayed_node(delayed_nodes[i]);
18961902
btrfs_release_delayed_node(delayed_nodes[i]);
18971903
}

fs/btrfs/disk-io.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1159,7 +1159,7 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info,
11591159
root->nr_delalloc_inodes = 0;
11601160
root->nr_ordered_extents = 0;
11611161
root->inode_tree = RB_ROOT;
1162-
xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
1162+
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
11631163

11641164
btrfs_init_root_block_rsv(root);
11651165

fs/btrfs/inode.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3910,7 +3910,7 @@ static int btrfs_read_locked_inode(struct inode *inode,
39103910
* cache.
39113911
*
39123912
* This is required for both inode re-read from disk and delayed inode
3913-
* in the delayed_nodes xarray.
3913+
* in delayed_nodes_tree.
39143914
*/
39153915
if (BTRFS_I(inode)->last_trans == fs_info->generation)
39163916
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,

0 commit comments

Comments
 (0)