Skip to content

Commit d7a5b69

Browse files
committed
Merge tag 'metadata-directory-tree-6.13_2024-11-05' of https://git.kernel.org/pub/scm/linux/kernel/git/djwong/xfs-linux into staging-merge
xfs: metadata inode directory trees [v5.5 03/10] This series delivers a new feature -- metadata inode directories. This is a separate directory tree (rooted in the superblock) that contains only inodes that contain filesystem metadata. Different metadata objects can be looked up with regular paths. Start by creating xfs_imeta{dir,file}* functions to mediate access to the metadata directory tree. By the end of this mega series, all existing metadata inodes (rt+quota) will use this directory tree instead of the superblock. Next, define the metadir on-disk format, which consists of marking inodes with a new iflag that says they're metadata. This prevents bulkstat and friends from ever getting their hands on fs metadata files. With a bit of luck, this should all go splendidly. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
2 parents 28cf0d1 + 0d2c636 commit d7a5b69

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

72 files changed

+2332
-205
lines changed

fs/xfs/Makefile

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ xfs-y += xfs_trace.o
1616
xfs-y += $(addprefix libxfs/, \
1717
xfs_group.o \
1818
xfs_ag.o \
19+
xfs_ag_resv.o \
1920
xfs_alloc.o \
2021
xfs_alloc_btree.o \
2122
xfs_attr.o \
@@ -43,7 +44,8 @@ xfs-y += $(addprefix libxfs/, \
4344
xfs_inode_buf.o \
4445
xfs_inode_util.o \
4546
xfs_log_rlimit.o \
46-
xfs_ag_resv.o \
47+
xfs_metadir.o \
48+
xfs_metafile.o \
4749
xfs_parent.o \
4850
xfs_rmap.o \
4951
xfs_rmap_btree.o \
@@ -172,6 +174,7 @@ xfs-y += $(addprefix scrub/, \
172174
inode.o \
173175
iscan.o \
174176
listxattr.o \
177+
metapath.o \
175178
nlinks.o \
176179
parent.o \
177180
readdir.o \

fs/xfs/libxfs/xfs_attr.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1004,7 +1004,10 @@ xfs_attr_add_fork(
10041004
unsigned int blks; /* space reservation */
10051005
int error; /* error return value */
10061006

1007-
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1007+
if (xfs_is_metadir_inode(ip))
1008+
ASSERT(XFS_IS_DQDETACHED(ip));
1009+
else
1010+
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
10081011

10091012
blks = XFS_ADDAFORK_SPACE_RES(mp);
10101013

fs/xfs/libxfs/xfs_bmap.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1042,7 +1042,10 @@ xfs_bmap_add_attrfork(
10421042
int error; /* error return value */
10431043

10441044
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
1045-
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
1045+
if (xfs_is_metadir_inode(ip))
1046+
ASSERT(XFS_IS_DQDETACHED(ip));
1047+
else
1048+
ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
10461049
ASSERT(!xfs_inode_has_attr_fork(ip));
10471050

10481051
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);

fs/xfs/libxfs/xfs_format.h

Lines changed: 95 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,8 @@ typedef struct xfs_sb {
174174
xfs_lsn_t sb_lsn; /* last write sequence */
175175
uuid_t sb_meta_uuid; /* metadata file system unique id */
176176

177+
xfs_ino_t sb_metadirino; /* metadata directory tree root */
178+
177179
/* must be padded to 64 bit alignment */
178180
} xfs_sb_t;
179181

@@ -259,6 +261,8 @@ struct xfs_dsb {
259261
__be64 sb_lsn; /* last write sequence */
260262
uuid_t sb_meta_uuid; /* metadata file system unique id */
261263

264+
__be64 sb_metadirino; /* metadata directory tree root */
265+
262266
/* must be padded to 64 bit alignment */
263267
};
264268

@@ -278,7 +282,7 @@ struct xfs_dsb {
278282

279283
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
280284

281-
static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
285+
static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp)
282286
{
283287
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
284288
}
@@ -287,12 +291,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
287291
* Detect a mismatched features2 field. Older kernels read/wrote
288292
* this into the wrong slot, so to be safe we keep them in sync.
289293
*/
290-
static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
294+
static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp)
291295
{
292296
return sbp->sb_bad_features2 != sbp->sb_features2;
293297
}
294298

295-
static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
299+
static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp)
296300
{
297301
return xfs_sb_is_v5(sbp) ||
298302
(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
@@ -342,8 +346,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp)
342346
#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
343347
static inline bool
344348
xfs_sb_has_compat_feature(
345-
struct xfs_sb *sbp,
346-
uint32_t feature)
349+
const struct xfs_sb *sbp,
350+
uint32_t feature)
347351
{
348352
return (sbp->sb_features_compat & feature) != 0;
349353
}
@@ -360,8 +364,8 @@ xfs_sb_has_compat_feature(
360364
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
361365
static inline bool
362366
xfs_sb_has_ro_compat_feature(
363-
struct xfs_sb *sbp,
364-
uint32_t feature)
367+
const struct xfs_sb *sbp,
368+
uint32_t feature)
365369
{
366370
return (sbp->sb_features_ro_compat & feature) != 0;
367371
}
@@ -374,6 +378,7 @@ xfs_sb_has_ro_compat_feature(
374378
#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
375379
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
376380
#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
381+
#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */
377382
#define XFS_SB_FEAT_INCOMPAT_ALL \
378383
(XFS_SB_FEAT_INCOMPAT_FTYPE | \
379384
XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -387,8 +392,8 @@ xfs_sb_has_ro_compat_feature(
387392
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
388393
static inline bool
389394
xfs_sb_has_incompat_feature(
390-
struct xfs_sb *sbp,
391-
uint32_t feature)
395+
const struct xfs_sb *sbp,
396+
uint32_t feature)
392397
{
393398
return (sbp->sb_features_incompat & feature) != 0;
394399
}
@@ -399,8 +404,8 @@ xfs_sb_has_incompat_feature(
399404
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
400405
static inline bool
401406
xfs_sb_has_incompat_log_feature(
402-
struct xfs_sb *sbp,
403-
uint32_t feature)
407+
const struct xfs_sb *sbp,
408+
uint32_t feature)
404409
{
405410
return (sbp->sb_features_log_incompat & feature) != 0;
406411
}
@@ -420,7 +425,7 @@ xfs_sb_add_incompat_log_features(
420425
sbp->sb_features_log_incompat |= features;
421426
}
422427

423-
static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
428+
static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp)
424429
{
425430
return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
426431
XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
@@ -790,6 +795,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
790795
return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
791796
}
792797

798+
enum xfs_metafile_type {
799+
XFS_METAFILE_UNKNOWN, /* unknown */
800+
XFS_METAFILE_DIR, /* metadir directory */
801+
XFS_METAFILE_USRQUOTA, /* user quota */
802+
XFS_METAFILE_GRPQUOTA, /* group quota */
803+
XFS_METAFILE_PRJQUOTA, /* project quota */
804+
XFS_METAFILE_RTBITMAP, /* rt bitmap */
805+
XFS_METAFILE_RTSUMMARY, /* rt summary */
806+
807+
XFS_METAFILE_MAX
808+
} __packed;
809+
810+
#define XFS_METAFILE_TYPE_STR \
811+
{ XFS_METAFILE_UNKNOWN, "unknown" }, \
812+
{ XFS_METAFILE_DIR, "dir" }, \
813+
{ XFS_METAFILE_USRQUOTA, "usrquota" }, \
814+
{ XFS_METAFILE_GRPQUOTA, "grpquota" }, \
815+
{ XFS_METAFILE_PRJQUOTA, "prjquota" }, \
816+
{ XFS_METAFILE_RTBITMAP, "rtbitmap" }, \
817+
{ XFS_METAFILE_RTSUMMARY, "rtsummary" }
818+
793819
/*
794820
* On-disk inode structure.
795821
*
@@ -812,7 +838,7 @@ struct xfs_dinode {
812838
__be16 di_mode; /* mode and type of file */
813839
__u8 di_version; /* inode version */
814840
__u8 di_format; /* format of di_c data */
815-
__be16 di_onlink; /* old number of links to file */
841+
__be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */
816842
__be32 di_uid; /* owner's user id */
817843
__be32 di_gid; /* owner's group id */
818844
__be32 di_nlink; /* number of links to file */
@@ -1088,21 +1114,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
10881114
* Values for di_flags2 These start by being exposed to userspace in the upper
10891115
* 16 bits of the XFS_XFLAG_s range.
10901116
*/
1091-
#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
1092-
#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
1093-
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
1094-
#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
1095-
#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
1117+
/* use DAX for this inode */
1118+
#define XFS_DIFLAG2_DAX_BIT 0
1119+
1120+
/* file's blocks may be shared */
1121+
#define XFS_DIFLAG2_REFLINK_BIT 1
10961122

1097-
#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
1098-
#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
1099-
#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
1100-
#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
1101-
#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
1123+
/* copy on write extent size hint */
1124+
#define XFS_DIFLAG2_COWEXTSIZE_BIT 2
1125+
1126+
/* big timestamps */
1127+
#define XFS_DIFLAG2_BIGTIME_BIT 3
1128+
1129+
/* large extent counters */
1130+
#define XFS_DIFLAG2_NREXT64_BIT 4
1131+
1132+
/*
1133+
* The inode contains filesystem metadata and can be found through the metadata
1134+
* directory tree. Metadata inodes must satisfy the following constraints:
1135+
*
1136+
* - V5 filesystem (and ftype) are enabled;
1137+
* - The only valid modes are regular files and directories;
1138+
* - The access bits must be zero;
1139+
* - DMAPI event and state masks are zero;
1140+
* - The user and group IDs must be zero;
1141+
* - The project ID can be used as a u32 annotation;
1142+
* - The immutable, sync, noatime, nodump, nodefrag flags must be set.
1143+
* - The dax flag must not be set.
1144+
* - Directories must have nosymlinks set.
1145+
*
1146+
* These requirements are chosen defensively to minimize the ability of
1147+
* userspace to read or modify the contents, should a metadata file ever
1148+
* escape to userspace.
1149+
*
1150+
* There are further constraints on the directory tree itself:
1151+
*
1152+
* - Metadata inodes must never be resolvable through the root directory;
1153+
* - They must never be accessed by userspace;
1154+
* - Metadata directory entries must have correct ftype.
1155+
*
1156+
* Superblock-rooted metadata files must have the METADATA iflag set even
1157+
* though they do not have a parent directory.
1158+
*/
1159+
#define XFS_DIFLAG2_METADATA_BIT 5
1160+
1161+
#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
1162+
#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT)
1163+
#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
1164+
#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT)
1165+
#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT)
1166+
#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT)
11021167

11031168
#define XFS_DIFLAG2_ANY \
11041169
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
1105-
XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
1170+
XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA)
11061171

11071172
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
11081173
{
@@ -1117,6 +1182,12 @@ static inline bool xfs_dinode_has_large_extent_counts(
11171182
(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64));
11181183
}
11191184

1185+
static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
1186+
{
1187+
return dip->di_version >= 3 &&
1188+
(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA));
1189+
}
1190+
11201191
/*
11211192
* Inode number format:
11221193
* low inopblog bits - offset in block

fs/xfs/libxfs/xfs_fs.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,8 @@ struct xfs_fsop_geom {
198198
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
199199
#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
200200
#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
201+
#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */
202+
#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */
201203

202204
/* Output for XFS_FS_COUNTS */
203205
typedef struct xfs_fsop_counts {
@@ -242,6 +244,7 @@ typedef struct xfs_fsop_resblks {
242244
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
243245
#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
244246
#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
247+
#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */
245248

246249
/*
247250
* Minimum and maximum sizes need for growth checks.
@@ -489,9 +492,17 @@ struct xfs_bulk_ireq {
489492
*/
490493
#define XFS_BULK_IREQ_NREXT64 (1U << 2)
491494

495+
/*
496+
* Allow bulkstat to return information about metadata directories. This
497+
* enables xfs_scrub to find them for scanning, as they are otherwise ordinary
498+
* directories.
499+
*/
500+
#define XFS_BULK_IREQ_METADIR (1U << 3)
501+
492502
#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
493503
XFS_BULK_IREQ_SPECIAL | \
494-
XFS_BULK_IREQ_NREXT64)
504+
XFS_BULK_IREQ_NREXT64 | \
505+
XFS_BULK_IREQ_METADIR)
495506

496507
/* Operate on the root directory inode. */
497508
#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
@@ -722,9 +733,10 @@ struct xfs_scrub_metadata {
722733
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
723734
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
724735
#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
736+
#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
725737

726738
/* Number of scrub subcommands. */
727-
#define XFS_SCRUB_TYPE_NR 29
739+
#define XFS_SCRUB_TYPE_NR 30
728740

729741
/*
730742
* This special type code only applies to the vectored scrub implementation.
@@ -802,6 +814,15 @@ struct xfs_scrub_vec_head {
802814

803815
#define XFS_SCRUB_VEC_FLAGS_ALL (0)
804816

817+
/*
818+
* i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for
819+
* path checking.
820+
*/
821+
#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */
822+
823+
/* Number of metapath sm_ino values */
824+
#define XFS_SCRUB_METAPATH_NR (1)
825+
805826
/*
806827
* ioctl limits
807828
*/

fs/xfs/libxfs/xfs_health.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ struct xfs_da_args;
6262
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
6363
#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
6464
#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
65+
#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */
66+
#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */
6567

6668
/* Observable health issues for realtime volume metadata. */
6769
#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
@@ -105,7 +107,9 @@ struct xfs_da_args;
105107
XFS_SICK_FS_GQUOTA | \
106108
XFS_SICK_FS_PQUOTA | \
107109
XFS_SICK_FS_QUOTACHECK | \
108-
XFS_SICK_FS_NLINKS)
110+
XFS_SICK_FS_NLINKS | \
111+
XFS_SICK_FS_METADIR | \
112+
XFS_SICK_FS_METAPATH)
109113

110114
#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
111115
XFS_SICK_RT_SUMMARY)

0 commit comments

Comments
 (0)