Skip to content

Commit 096384d

Browse files
committed
Merge tag 'xfs-fixes-6.15-rc3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux
Pull XFS fixes from Carlos Maiolino: "This mostly includes fixes and documentation for the zoned allocator feature merged during previous merge window, but it also adds a sysfs tunable for the zone garbage collector. There is also a fix for a regression to the RT device that we'd like to fix ASAP now that we're getting more users on the RT zoned allocator" * tag 'xfs-fixes-6.15-rc3' of git://git.kernel.org/pub/scm/fs/xfs/xfs-linux: xfs: document zoned rt specifics in admin-guide xfs: fix fsmap for internal zoned devices xfs: Fix spelling mistake "drity" -> "dirty" xfs: compute buffer address correctly in xmbuf_map_backing_mem xfs: add tunable threshold parameter for triggering zone GC xfs: mark xfs_buf_free as might_sleep() xfs: remove the leftover xfs_{set,clear}_li_failed infrastructure
2 parents 0cb9ce0 + c7b67dd commit 096384d

File tree

13 files changed

+143
-61
lines changed

13 files changed

+143
-61
lines changed

Documentation/admin-guide/xfs.rst

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,14 @@ When mounting an XFS filesystem, the following options are accepted.
124124
controls the size of each buffer and so is also relevant to
125125
this case.
126126

127+
lifetime (default) or nolifetime
128+
Enable data placement based on write life time hints provided
129+
by the user. This turns on co-allocation of data of similar
130+
life times when statistically favorable to reduce garbage
131+
collection cost.
132+
133+
These options are only available for zoned rt file systems.
134+
127135
logbsize=value
128136
Set the size of each in-memory log buffer. The size may be
129137
specified in bytes, or in kilobytes with a "k" suffix.
@@ -143,6 +151,14 @@ When mounting an XFS filesystem, the following options are accepted.
143151
optional, and the log section can be separate from the data
144152
section or contained within it.
145153

154+
max_open_zones=value
155+
Specify the max number of zones to keep open for writing on a
156+
zoned rt device. Many open zones aids file data separation
157+
but may impact performance on HDDs.
158+
159+
If ``max_open_zones`` is not specified, the value is determined
160+
by the capabilities and the size of the zoned rt device.
161+
146162
noalign
147163
Data allocations will not be aligned at stripe unit
148164
boundaries. This is only relevant to filesystems created
@@ -542,3 +558,37 @@ The interesting knobs for XFS workqueues are as follows:
542558
nice Relative priority of scheduling the threads. These are the
543559
same nice levels that can be applied to userspace processes.
544560
============ ===========
561+
562+
Zoned Filesystems
563+
=================
564+
565+
For zoned file systems, the following attribute is exposed in:
566+
567+
/sys/fs/xfs/<dev>/zoned/
568+
569+
max_open_zones (Min: 1 Default: Varies Max: UINTMAX)
570+
This read-only attribute exposes the maximum number of open zones
571+
available for data placement. The value is determined at mount time and
572+
is limited by the capabilities of the backing zoned device, file system
573+
size and the max_open_zones mount option.
574+
575+
Zoned Filesystems
576+
=================
577+
578+
For zoned file systems, the following attributes are exposed in:
579+
580+
/sys/fs/xfs/<dev>/zoned/
581+
582+
max_open_zones (Min: 1 Default: Varies Max: UINTMAX)
583+
This read-only attribute exposes the maximum number of open zones
584+
available for data placement. The value is determined at mount time and
585+
is limited by the capabilities of the backing zoned device, file system
586+
size and the max_open_zones mount option.
587+
588+
zonegc_low_space (Min: 0 Default: 0 Max: 100)
589+
Define a percentage for how much of the unused space that GC should keep
590+
available for writing. A high value will reclaim more of the space
591+
occupied by unused blocks, creating a larger buffer against write
592+
bursts at the cost of increased write amplification. Regardless
593+
of this value, garbage collection will always aim to free a minimum
594+
amount of blocks to keep max_open_zones open for data placement purposes.

fs/xfs/xfs_buf.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ xfs_buf_free(
105105
{
106106
unsigned int size = BBTOB(bp->b_length);
107107

108+
might_sleep();
108109
trace_xfs_buf_free(bp, _RET_IP_);
109110

110111
ASSERT(list_empty(&bp->b_lru));

fs/xfs/xfs_buf_mem.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ xmbuf_map_backing_mem(
165165
folio_set_dirty(folio);
166166
folio_unlock(folio);
167167

168-
bp->b_addr = folio_address(folio);
168+
bp->b_addr = folio_address(folio) + offset_in_folio(folio, pos);
169169
return 0;
170170
}
171171

fs/xfs/xfs_dquot.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,9 +1186,8 @@ xfs_qm_dqflush_done(
11861186
if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) &&
11871187
(lip->li_lsn == qlip->qli_flush_lsn ||
11881188
test_bit(XFS_LI_FAILED, &lip->li_flags))) {
1189-
11901189
spin_lock(&ailp->ail_lock);
1191-
xfs_clear_li_failed(lip);
1190+
clear_bit(XFS_LI_FAILED, &lip->li_flags);
11921191
if (lip->li_lsn == qlip->qli_flush_lsn) {
11931192
/* xfs_ail_update_finish() drops the AIL lock */
11941193
tail_lsn = xfs_ail_delete_one(ailp, lip);

fs/xfs/xfs_fsmap.c

Lines changed: 33 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -876,6 +876,7 @@ xfs_getfsmap_rtdev_rmapbt(
876876
const struct xfs_fsmap *keys,
877877
struct xfs_getfsmap_info *info)
878878
{
879+
struct xfs_fsmap key0 = *keys; /* struct copy */
879880
struct xfs_mount *mp = tp->t_mountp;
880881
struct xfs_rtgroup *rtg = NULL;
881882
struct xfs_btree_cur *bt_cur = NULL;
@@ -887,45 +888,59 @@ xfs_getfsmap_rtdev_rmapbt(
887888
int error = 0;
888889

889890
eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart + mp->m_sb.sb_rblocks);
890-
if (keys[0].fmr_physical >= eofs)
891+
if (key0.fmr_physical >= eofs)
891892
return 0;
892893

894+
/*
895+
* On zoned filesystems with an internal rt volume, the volume comes
896+
* immediately after the end of the data volume. However, the
897+
* xfs_rtblock_t address space is relative to the start of the data
898+
* device, which means that the first @rtstart fsblocks do not actually
899+
* point anywhere. If a fsmap query comes in with the low key starting
900+
* below @rtstart, report it as "owned by filesystem".
901+
*/
893902
rtstart_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rtstart);
894-
if (keys[0].fmr_physical < rtstart_daddr) {
903+
if (xfs_has_zoned(mp) && key0.fmr_physical < rtstart_daddr) {
895904
struct xfs_fsmap_irec frec = {
896905
.owner = XFS_RMAP_OWN_FS,
897906
.len_daddr = rtstart_daddr,
898907
};
899908

900-
/* Adjust the low key if we are continuing from where we left off. */
901-
if (keys[0].fmr_length > 0) {
902-
info->low_daddr = keys[0].fmr_physical + keys[0].fmr_length;
903-
return 0;
909+
/*
910+
* Adjust the start of the query range if we're picking up from
911+
* a previous round, and only emit the record if we haven't
912+
* already gone past.
913+
*/
914+
key0.fmr_physical += key0.fmr_length;
915+
if (key0.fmr_physical < rtstart_daddr) {
916+
error = xfs_getfsmap_helper(tp, info, &frec);
917+
if (error)
918+
return error;
919+
920+
key0.fmr_physical = rtstart_daddr;
904921
}
905922

906-
/* Fabricate an rmap entry for space occupied by the data dev */
907-
error = xfs_getfsmap_helper(tp, info, &frec);
908-
if (error)
909-
return error;
923+
/* Zero the other fields to avoid further adjustments. */
924+
key0.fmr_owner = 0;
925+
key0.fmr_offset = 0;
926+
key0.fmr_length = 0;
910927
}
911928

912-
start_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr + keys[0].fmr_physical);
913-
end_rtb = xfs_daddr_to_rtb(mp, rtstart_daddr +
914-
min(eofs - 1, keys[1].fmr_physical));
915-
929+
start_rtb = xfs_daddr_to_rtb(mp, key0.fmr_physical);
930+
end_rtb = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
916931
info->missing_owner = XFS_FMR_OWN_FREE;
917932

918933
/*
919934
* Convert the fsmap low/high keys to rtgroup based keys. Initialize
920935
* low to the fsmap low key and max out the high key to the end
921936
* of the rtgroup.
922937
*/
923-
info->low.rm_offset = XFS_BB_TO_FSBT(mp, keys[0].fmr_offset);
924-
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
938+
info->low.rm_offset = XFS_BB_TO_FSBT(mp, key0.fmr_offset);
939+
error = xfs_fsmap_owner_to_rmap(&info->low, &key0);
925940
if (error)
926941
return error;
927-
info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
928-
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
942+
info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, key0.fmr_length);
943+
xfs_getfsmap_set_irec_flags(&info->low, &key0);
929944

930945
/* Adjust the low key if we are continuing from where we left off. */
931946
if (info->low.rm_blockcount == 0) {

fs/xfs/xfs_inode_item.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1089,13 +1089,7 @@ xfs_iflush_abort(
10891089
* state. Whilst the inode is in the AIL, it should have a valid buffer
10901090
* pointer for push operations to access - it is only safe to remove the
10911091
* inode from the buffer once it has been removed from the AIL.
1092-
*
1093-
* We also clear the failed bit before removing the item from the AIL
1094-
* as xfs_trans_ail_delete()->xfs_clear_li_failed() will release buffer
1095-
* references the inode item owns and needs to hold until we've fully
1096-
* aborted the inode log item and detached it from the buffer.
10971092
*/
1098-
clear_bit(XFS_LI_FAILED, &iip->ili_item.li_flags);
10991093
xfs_trans_ail_delete(&iip->ili_item, 0);
11001094

11011095
/*

fs/xfs/xfs_log.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2888,7 +2888,7 @@ xlog_force_and_check_iclog(
28882888
*
28892889
* 1. the current iclog is active and has no data; the previous iclog
28902890
* is in the active or dirty state.
2891-
* 2. the current iclog is drity, and the previous iclog is in the
2891+
* 2. the current iclog is dirty, and the previous iclog is in the
28922892
* active or dirty state.
28932893
*
28942894
* We may sleep if:

fs/xfs/xfs_mount.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ typedef struct xfs_mount {
229229
bool m_finobt_nores; /* no per-AG finobt resv. */
230230
bool m_update_sb; /* sb needs update in mount */
231231
unsigned int m_max_open_zones;
232+
unsigned int m_zonegc_low_space;
232233

233234
/*
234235
* Bitsets of per-fs metadata that have been checked and/or are sick.

fs/xfs/xfs_sysfs.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -718,8 +718,40 @@ max_open_zones_show(
718718
}
719719
XFS_SYSFS_ATTR_RO(max_open_zones);
720720

721+
static ssize_t
722+
zonegc_low_space_store(
723+
struct kobject *kobj,
724+
const char *buf,
725+
size_t count)
726+
{
727+
int ret;
728+
unsigned int val;
729+
730+
ret = kstrtouint(buf, 0, &val);
731+
if (ret)
732+
return ret;
733+
734+
if (val > 100)
735+
return -EINVAL;
736+
737+
zoned_to_mp(kobj)->m_zonegc_low_space = val;
738+
739+
return count;
740+
}
741+
742+
static ssize_t
743+
zonegc_low_space_show(
744+
struct kobject *kobj,
745+
char *buf)
746+
{
747+
return sysfs_emit(buf, "%u\n",
748+
zoned_to_mp(kobj)->m_zonegc_low_space);
749+
}
750+
XFS_SYSFS_ATTR_RW(zonegc_low_space);
751+
721752
static struct attribute *xfs_zoned_attrs[] = {
722753
ATTR_LIST(max_open_zones),
754+
ATTR_LIST(zonegc_low_space),
723755
NULL,
724756
};
725757
ATTRIBUTE_GROUPS(xfs_zoned);

fs/xfs/xfs_trans_ail.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -909,10 +909,9 @@ xfs_trans_ail_delete(
909909
return;
910910
}
911911

912-
/* xfs_ail_update_finish() drops the AIL lock */
913-
xfs_clear_li_failed(lip);
912+
clear_bit(XFS_LI_FAILED, &lip->li_flags);
914913
tail_lsn = xfs_ail_delete_one(ailp, lip);
915-
xfs_ail_update_finish(ailp, tail_lsn);
914+
xfs_ail_update_finish(ailp, tail_lsn); /* drops the AIL lock */
916915
}
917916

918917
int

0 commit comments

Comments
 (0)