Skip to content

Commit b939bcd

Browse files
committed
Merge tag 'realtime-groups-6.13_2024-11-05' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into staging-merge
xfs: shard the realtime section [v5.5 06/10] Right now, the realtime section uses a single pair of metadata inodes to store the free space information. This presents a scalability problem since every thread trying to allocate or free rt extents have to lock these files. Solve this problem by sharding the realtime section into separate realtime allocation groups. While we're at it, define a superblock to be stamped into the start of the rt section. This enables utilities such as blkid to identify block devices containing realtime sections, and avoids the situation where anything written into block 0 of the realtime extent can be misinterpreted as file data. The best advantage for rtgroups will become evident later when we get to adding rmap and reflink to the realtime volume, since the geometry constraints are the same for rt groups and AGs. Hence we can reuse all that code directly. This is a very large patchset, but it catches us up with 20 years of technical debt that have accumulated. With a bit of luck, this should all go splendidly. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2 parents cb288c9 + a3315d1 commit b939bcd

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+2699
-512
lines changed

fs/xfs/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ xfs-y += $(addprefix scrub/, \
191191
xfs-$(CONFIG_XFS_ONLINE_SCRUB_STATS) += scrub/stats.o
192192

193193
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
194+
rgsuper.o \
194195
rtbitmap.o \
195196
rtsummary.o \
196197
)

fs/xfs/libxfs/xfs_ag.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -205,9 +205,10 @@ xfs_update_last_ag_size(
205205

206206
if (!pag)
207207
return -EFSCORRUPTED;
208-
pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
209-
mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
210-
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
208+
pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp,
209+
prev_agcount - 1, mp->m_sb.sb_agcount,
210+
mp->m_sb.sb_dblocks);
211+
__xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
211212
&pag->agino_max);
212213
xfs_perag_rele(pag);
213214
return 0;
@@ -241,9 +242,10 @@ xfs_perag_alloc(
241242
/*
242243
* Pre-calculated geometry
243244
*/
244-
pag->block_count = __xfs_ag_block_count(mp, index, agcount, dblocks);
245-
pag->min_block = XFS_AGFL_BLOCK(mp);
246-
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
245+
pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount,
246+
dblocks);
247+
pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1;
248+
__xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
247249
&pag->agino_max);
248250

249251
error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
@@ -852,8 +854,8 @@ xfs_ag_shrink_space(
852854
}
853855

854856
/* Update perag geometry */
855-
pag->block_count -= delta;
856-
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
857+
pag_group(pag)->xg_block_count -= delta;
858+
__xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
857859
&pag->agino_max);
858860

859861
xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
@@ -924,8 +926,8 @@ xfs_ag_extend_space(
924926
return error;
925927

926928
/* Update perag geometry */
927-
pag->block_count = be32_to_cpu(agf->agf_length);
928-
__xfs_agino_range(mp, pag->block_count, &pag->agino_min,
929+
pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length);
930+
__xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
929931
&pag->agino_max);
930932
return 0;
931933
}

fs/xfs/libxfs/xfs_ag.h

Lines changed: 2 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,6 @@ struct xfs_perag {
6161
struct xfs_ag_resv pag_rmapbt_resv;
6262

6363
/* Precalculated geometry info */
64-
xfs_agblock_t block_count;
65-
xfs_agblock_t min_block;
6664
xfs_agino_t agino_min;
6765
xfs_agino_t agino_max;
6866

@@ -220,11 +218,7 @@ void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
220218
static inline bool
221219
xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
222220
{
223-
if (agbno >= pag->block_count)
224-
return false;
225-
if (agbno <= pag->min_block)
226-
return false;
227-
return true;
221+
return xfs_verify_gbno(pag_group(pag), agbno);
228222
}
229223

230224
static inline bool
@@ -233,13 +227,7 @@ xfs_verify_agbext(
233227
xfs_agblock_t agbno,
234228
xfs_agblock_t len)
235229
{
236-
if (agbno + len <= agbno)
237-
return false;
238-
239-
if (!xfs_verify_agbno(pag, agbno))
240-
return false;
241-
242-
return xfs_verify_agbno(pag, agbno + len - 1);
230+
return xfs_verify_gbext(pag_group(pag), agbno, len);
243231
}
244232

245233
/*

fs/xfs/libxfs/xfs_alloc.c

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2648,8 +2648,17 @@ xfs_defer_extent_free(
26482648
ASSERT(!isnullstartblock(bno));
26492649
ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
26502650

2651-
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2652-
return -EFSCORRUPTED;
2651+
if (free_flags & XFS_FREE_EXTENT_REALTIME) {
2652+
if (type != XFS_AG_RESV_NONE) {
2653+
ASSERT(type == XFS_AG_RESV_NONE);
2654+
return -EFSCORRUPTED;
2655+
}
2656+
if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
2657+
return -EFSCORRUPTED;
2658+
} else {
2659+
if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
2660+
return -EFSCORRUPTED;
2661+
}
26532662

26542663
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
26552664
GFP_KERNEL | __GFP_NOFAIL);
@@ -2658,6 +2667,8 @@ xfs_defer_extent_free(
26582667
xefi->xefi_agresv = type;
26592668
if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD)
26602669
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
2670+
if (free_flags & XFS_FREE_EXTENT_REALTIME)
2671+
xefi->xefi_flags |= XFS_EFI_REALTIME;
26612672
if (oinfo) {
26622673
ASSERT(oinfo->oi_offset == 0);
26632674

fs/xfs/libxfs/xfs_alloc.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,11 @@ int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
237237
/* Don't issue a discard for the blocks freed. */
238238
#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
239239

240-
#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD)
240+
/* Free blocks on the realtime device. */
241+
#define XFS_FREE_EXTENT_REALTIME (1U << 1)
242+
243+
#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \
244+
XFS_FREE_EXTENT_REALTIME)
241245

242246
/*
243247
* List of extents to be free "later".
@@ -257,6 +261,12 @@ struct xfs_extent_free_item {
257261
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
258262
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
259263
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
264+
#define XFS_EFI_REALTIME (1U << 4) /* freeing realtime extent */
265+
266+
static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi)
267+
{
268+
return xefi->xefi_flags & XFS_EFI_REALTIME;
269+
}
260270

261271
struct xfs_alloc_autoreap {
262272
struct xfs_defer_pending *dfp;

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 69 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
#include "xfs_bmap_item.h"
4141
#include "xfs_symlink_remote.h"
4242
#include "xfs_inode_util.h"
43+
#include "xfs_rtgroup.h"
4344

4445
struct kmem_cache *xfs_bmap_intent_cache;
4546

@@ -1426,6 +1427,24 @@ xfs_bmap_last_offset(
14261427
* Extent tree manipulation functions used during allocation.
14271428
*/
14281429

1430+
static inline bool
1431+
xfs_bmap_same_rtgroup(
1432+
struct xfs_inode *ip,
1433+
int whichfork,
1434+
struct xfs_bmbt_irec *left,
1435+
struct xfs_bmbt_irec *right)
1436+
{
1437+
struct xfs_mount *mp = ip->i_mount;
1438+
1439+
if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) {
1440+
if (xfs_rtb_to_rgno(mp, left->br_startblock) !=
1441+
xfs_rtb_to_rgno(mp, right->br_startblock))
1442+
return false;
1443+
}
1444+
1445+
return true;
1446+
}
1447+
14291448
/*
14301449
* Convert a delayed allocation to a real allocation.
14311450
*/
@@ -1495,7 +1514,8 @@ xfs_bmap_add_extent_delay_real(
14951514
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
14961515
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
14971516
LEFT.br_state == new->br_state &&
1498-
LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
1517+
LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
1518+
xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new))
14991519
state |= BMAP_LEFT_CONTIG;
15001520

15011521
/*
@@ -1519,7 +1539,8 @@ xfs_bmap_add_extent_delay_real(
15191539
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
15201540
BMAP_RIGHT_FILLING) ||
15211541
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
1522-
<= XFS_MAX_BMBT_EXTLEN))
1542+
<= XFS_MAX_BMBT_EXTLEN) &&
1543+
xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT))
15231544
state |= BMAP_RIGHT_CONTIG;
15241545

15251546
error = 0;
@@ -2064,7 +2085,8 @@ xfs_bmap_add_extent_unwritten_real(
20642085
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
20652086
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
20662087
LEFT.br_state == new->br_state &&
2067-
LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2088+
LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2089+
xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new))
20682090
state |= BMAP_LEFT_CONTIG;
20692091

20702092
/*
@@ -2088,7 +2110,8 @@ xfs_bmap_add_extent_unwritten_real(
20882110
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
20892111
BMAP_RIGHT_FILLING) ||
20902112
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
2091-
<= XFS_MAX_BMBT_EXTLEN))
2113+
<= XFS_MAX_BMBT_EXTLEN) &&
2114+
xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT))
20922115
state |= BMAP_RIGHT_CONTIG;
20932116

20942117
/*
@@ -2597,15 +2620,17 @@ xfs_bmap_add_extent_hole_delay(
25972620
*/
25982621
if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
25992622
left.br_startoff + left.br_blockcount == new->br_startoff &&
2600-
left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2623+
left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2624+
xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
26012625
state |= BMAP_LEFT_CONTIG;
26022626

26032627
if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
26042628
new->br_startoff + new->br_blockcount == right.br_startoff &&
26052629
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
26062630
(!(state & BMAP_LEFT_CONTIG) ||
26072631
(left.br_blockcount + new->br_blockcount +
2608-
right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
2632+
right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) &&
2633+
xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
26092634
state |= BMAP_RIGHT_CONTIG;
26102635

26112636
/*
@@ -2748,7 +2773,8 @@ xfs_bmap_add_extent_hole_real(
27482773
left.br_startoff + left.br_blockcount == new->br_startoff &&
27492774
left.br_startblock + left.br_blockcount == new->br_startblock &&
27502775
left.br_state == new->br_state &&
2751-
left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
2776+
left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
2777+
xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
27522778
state |= BMAP_LEFT_CONTIG;
27532779

27542780
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
@@ -2758,7 +2784,8 @@ xfs_bmap_add_extent_hole_real(
27582784
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
27592785
(!(state & BMAP_LEFT_CONTIG) ||
27602786
left.br_blockcount + new->br_blockcount +
2761-
right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
2787+
right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) &&
2788+
xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
27622789
state |= BMAP_RIGHT_CONTIG;
27632790

27642791
error = 0;
@@ -3124,8 +3151,15 @@ xfs_bmap_adjacent_valid(
31243151
struct xfs_mount *mp = ap->ip->i_mount;
31253152

31263153
if (XFS_IS_REALTIME_INODE(ap->ip) &&
3127-
(ap->datatype & XFS_ALLOC_USERDATA))
3128-
return x < mp->m_sb.sb_rblocks;
3154+
(ap->datatype & XFS_ALLOC_USERDATA)) {
3155+
if (!xfs_has_rtgroups(mp))
3156+
return x < mp->m_sb.sb_rblocks;
3157+
3158+
return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
3159+
xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
3160+
xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;
3161+
3162+
}
31293163

31303164
return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) &&
31313165
XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount &&
@@ -5356,9 +5390,11 @@ xfs_bmap_del_extent_real(
53565390
* If we need to, add to list of extents to delete.
53575391
*/
53585392
if (!(bflags & XFS_BMAPI_REMAP)) {
5393+
bool isrt = xfs_ifork_is_realtime(ip, whichfork);
5394+
53595395
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
53605396
xfs_refcount_decrease_extent(tp, del);
5361-
} else if (xfs_ifork_is_realtime(ip, whichfork)) {
5397+
} else if (isrt && !xfs_has_rtgroups(mp)) {
53625398
error = xfs_bmap_free_rtblocks(tp, del);
53635399
} else {
53645400
unsigned int efi_flags = 0;
@@ -5367,6 +5403,19 @@ xfs_bmap_del_extent_real(
53675403
del->br_state == XFS_EXT_UNWRITTEN)
53685404
efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
53695405

5406+
/*
5407+
* Historically, we did not use EFIs to free realtime
5408+
* extents. However, when reverse mapping is enabled,
5409+
* we must maintain the same order of operations as the
5410+
* data device, which is: Remove the file mapping,
5411+
* remove the reverse mapping, and then free the
5412+
* blocks. Reflink for realtime volumes requires the
5413+
* same sort of ordering. Both features rely on
5414+
* rtgroups, so let's gate rt EFI usage on rtgroups.
5415+
*/
5416+
if (isrt)
5417+
efi_flags |= XFS_FREE_EXTENT_REALTIME;
5418+
53705419
error = xfs_free_extent_later(tp, del->br_startblock,
53715420
del->br_blockcount, NULL,
53725421
XFS_AG_RESV_NONE, efi_flags);
@@ -5715,6 +5764,8 @@ xfs_bunmapi(
57155764
*/
57165765
STATIC bool
57175766
xfs_bmse_can_merge(
5767+
struct xfs_inode *ip,
5768+
int whichfork,
57185769
struct xfs_bmbt_irec *left, /* preceding extent */
57195770
struct xfs_bmbt_irec *got, /* current extent to shift */
57205771
xfs_fileoff_t shift) /* shift fsb */
@@ -5730,7 +5781,8 @@ xfs_bmse_can_merge(
57305781
if ((left->br_startoff + left->br_blockcount != startoff) ||
57315782
(left->br_startblock + left->br_blockcount != got->br_startblock) ||
57325783
(left->br_state != got->br_state) ||
5733-
(left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
5784+
(left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) ||
5785+
!xfs_bmap_same_rtgroup(ip, whichfork, left, got))
57345786
return false;
57355787

57365788
return true;
@@ -5766,7 +5818,7 @@ xfs_bmse_merge(
57665818
blockcount = left->br_blockcount + got->br_blockcount;
57675819

57685820
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
5769-
ASSERT(xfs_bmse_can_merge(left, got, shift));
5821+
ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift));
57705822

57715823
new = *left;
57725824
new.br_blockcount = blockcount;
@@ -5928,7 +5980,8 @@ xfs_bmap_collapse_extents(
59285980
goto del_cursor;
59295981
}
59305982

5931-
if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
5983+
if (xfs_bmse_can_merge(ip, whichfork, &prev, &got,
5984+
offset_shift_fsb)) {
59325985
error = xfs_bmse_merge(tp, ip, whichfork,
59335986
offset_shift_fsb, &icur, &got, &prev,
59345987
cur, &logflags);
@@ -6064,7 +6117,8 @@ xfs_bmap_insert_extents(
60646117
* never find mergeable extents in this scenario. Check anyways
60656118
* and warn if we encounter two extents that could be one.
60666119
*/
6067-
if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
6120+
if (xfs_bmse_can_merge(ip, whichfork, &got, &next,
6121+
offset_shift_fsb))
60686122
WARN_ON_ONCE(1);
60696123
}
60706124

fs/xfs/libxfs/xfs_defer.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -846,6 +846,12 @@ xfs_defer_add(
846846

847847
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
848848

849+
if (!ops->finish_item) {
850+
ASSERT(ops->finish_item != NULL);
851+
xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
852+
return NULL;
853+
}
854+
849855
dfp = xfs_defer_find_last(tp, ops);
850856
if (!dfp || !xfs_defer_can_append(dfp, ops))
851857
dfp = xfs_defer_alloc(&tp->t_dfops, ops);

fs/xfs/libxfs/xfs_defer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
7171
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
7272
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
7373
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
74+
extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type;
7475
extern const struct xfs_defer_op_type xfs_attr_defer_type;
7576
extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
7677

0 commit comments

Comments
 (0)