Skip to content

Commit 63ef7a3

Browse files
author
Darrick J. Wong
committed
xfs: fix interval filtering in multi-step fsmap queries
I noticed a bug in ranged GETFSMAP queries: # xfs_io -c 'fsmap -vvvv' /opt EXT: DEV BLOCK-RANGE OWNER FILE-OFFSET AG AG-OFFSET TOTAL 0: 8:80 [0..7]: static fs metadata 0 (0..7) 8 <snip> 9: 8:80 [192..223]: 137 0..31 0 (192..223) 32 # xfs_io -c 'fsmap -vvvv -d 208 208' /opt # That's not right -- we asked what block maps block 208, and we should've received a mapping for inode 137 offset 16. Instead, we get nothing. The root cause of this problem is a mis-interaction between the fsmap code and how btree ranged queries work. xfs_btree_query_range returns any btree record that overlaps with the query interval, even if the record starts before or ends after the interval. Similarly, GETFSMAP is supposed to return a recordset containing all records that overlap the range queried. However, it's possible that the recordset is larger than the buffer that the caller provided to convey mappings to userspace. In /that/ case, userspace is supposed to copy the last record returned to fmh_keys[0] and call GETFSMAP again. In this case, we do not want to return mappings that we have already supplied to the caller. The call to xfs_btree_query_range is the same, but now we ignore any records that start before fmh_keys[0]. Unfortunately, we didn't implement the filtering predicate correctly. The predicate should only be called when we're calling back for more records. Accomplish this by setting info->low.rm_blockcount to a nonzero value and ensuring that it is cleared as necessary. As a result, we no longer want to adjust dkeys[0] in the main setup function because that's confusing. This patch doesn't touch the logdev/rtbitmap backends because they have bigger problems that will be addressed by subsequent patches. Found via xfs/556 with parent pointers enabled. Fixes: e89c041 ("xfs: implement the GETFSMAP ioctl") Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Dave Chinner <dchinner@redhat.com>
1 parent 2bed0d8 commit 63ef7a3

File tree

1 file changed

+48
-19
lines changed

1 file changed

+48
-19
lines changed

fs/xfs/xfs_fsmap.c

Lines changed: 48 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,14 @@ struct xfs_getfsmap_info {
162162
xfs_daddr_t next_daddr; /* next daddr we expect */
163163
u64 missing_owner; /* owner of holes */
164164
u32 dev; /* device id */
165-
struct xfs_rmap_irec low; /* low rmap key */
165+
/*
166+
* Low rmap key for the query. If low.rm_blockcount is nonzero, this
167+
* is the second (or later) call to retrieve the recordset in pieces.
168+
* xfs_getfsmap_rec_before_start will compare all records retrieved
169+
* by the rmapbt query to filter out any records that start before
170+
* the last record.
171+
*/
172+
struct xfs_rmap_irec low;
166173
struct xfs_rmap_irec high; /* high rmap key */
167174
bool last; /* last extent? */
168175
};
@@ -237,6 +244,17 @@ xfs_getfsmap_format(
237244
xfs_fsmap_from_internal(rec, xfm);
238245
}
239246

247+
static inline bool
248+
xfs_getfsmap_rec_before_start(
249+
struct xfs_getfsmap_info *info,
250+
const struct xfs_rmap_irec *rec,
251+
xfs_daddr_t rec_daddr)
252+
{
253+
if (info->low.rm_blockcount)
254+
return xfs_rmap_compare(rec, &info->low) < 0;
255+
return false;
256+
}
257+
240258
/*
241259
* Format a reverse mapping for getfsmap, having translated rm_startblock
242260
* into the appropriate daddr units.
@@ -260,7 +278,7 @@ xfs_getfsmap_helper(
260278
* Filter out records that start before our startpoint, if the
261279
* caller requested that.
262280
*/
263-
if (xfs_rmap_compare(rec, &info->low) < 0) {
281+
if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
264282
rec_daddr += XFS_FSB_TO_BB(mp, rec->rm_blockcount);
265283
if (info->next_daddr < rec_daddr)
266284
info->next_daddr = rec_daddr;
@@ -606,9 +624,27 @@ __xfs_getfsmap_datadev(
606624
error = xfs_fsmap_owner_to_rmap(&info->low, &keys[0]);
607625
if (error)
608626
return error;
609-
info->low.rm_blockcount = 0;
627+
info->low.rm_blockcount = XFS_BB_TO_FSBT(mp, keys[0].fmr_length);
610628
xfs_getfsmap_set_irec_flags(&info->low, &keys[0]);
611629

630+
/* Adjust the low key if we are continuing from where we left off. */
631+
if (info->low.rm_blockcount == 0) {
632+
/* empty */
633+
} else if (XFS_RMAP_NON_INODE_OWNER(info->low.rm_owner) ||
634+
(info->low.rm_flags & (XFS_RMAP_ATTR_FORK |
635+
XFS_RMAP_BMBT_BLOCK |
636+
XFS_RMAP_UNWRITTEN))) {
637+
info->low.rm_startblock += info->low.rm_blockcount;
638+
info->low.rm_owner = 0;
639+
info->low.rm_offset = 0;
640+
641+
start_fsb += info->low.rm_blockcount;
642+
if (XFS_FSB_TO_DADDR(mp, start_fsb) >= eofs)
643+
return 0;
644+
} else {
645+
info->low.rm_offset += info->low.rm_blockcount;
646+
}
647+
612648
info->high.rm_startblock = -1U;
613649
info->high.rm_owner = ULLONG_MAX;
614650
info->high.rm_offset = ULLONG_MAX;
@@ -659,12 +695,8 @@ __xfs_getfsmap_datadev(
659695
* Set the AG low key to the start of the AG prior to
660696
* moving on to the next AG.
661697
*/
662-
if (pag->pag_agno == start_ag) {
663-
info->low.rm_startblock = 0;
664-
info->low.rm_owner = 0;
665-
info->low.rm_offset = 0;
666-
info->low.rm_flags = 0;
667-
}
698+
if (pag->pag_agno == start_ag)
699+
memset(&info->low, 0, sizeof(info->low));
668700

669701
/*
670702
* If this is the last AG, report any gap at the end of it
@@ -901,21 +933,17 @@ xfs_getfsmap(
901933
* blocks could be mapped to several other files/offsets.
902934
* According to rmapbt record ordering, the minimal next
903935
* possible record for the block range is the next starting
904-
* offset in the same inode. Therefore, bump the file offset to
905-
* continue the search appropriately. For all other low key
906-
* mapping types (attr blocks, metadata), bump the physical
907-
* offset as there can be no other mapping for the same physical
908-
* block range.
936+
* offset in the same inode. Therefore, each fsmap backend bumps
937+
* the file offset to continue the search appropriately. For
938+
* all other low key mapping types (attr blocks, metadata), each
939+
* fsmap backend bumps the physical offset as there can be no
940+
* other mapping for the same physical block range.
909941
*/
910942
dkeys[0] = head->fmh_keys[0];
911943
if (dkeys[0].fmr_flags & (FMR_OF_SPECIAL_OWNER | FMR_OF_EXTENT_MAP)) {
912-
dkeys[0].fmr_physical += dkeys[0].fmr_length;
913-
dkeys[0].fmr_owner = 0;
914944
if (dkeys[0].fmr_offset)
915945
return -EINVAL;
916-
} else
917-
dkeys[0].fmr_offset += dkeys[0].fmr_length;
918-
dkeys[0].fmr_length = 0;
946+
}
919947
memset(&dkeys[1], 0xFF, sizeof(struct xfs_fsmap));
920948

921949
if (!xfs_getfsmap_check_keys(dkeys, &head->fmh_keys[1]))
@@ -960,6 +988,7 @@ xfs_getfsmap(
960988
info.dev = handlers[i].dev;
961989
info.last = false;
962990
info.pag = NULL;
991+
info.low.rm_blockcount = 0;
963992
error = handlers[i].fn(tp, dkeys, &info);
964993
if (error)
965994
break;

0 commit comments

Comments
 (0)