Skip to content

Commit 90515fe

Browse files
morbidrsakdave
authored andcommitted
btrfs: zoned: fix alloc_offset calculation for partly conventional block groups
When one of two zones composing a DUP block group is a conventional zone, we have the zone_info[i]->alloc_offset = WP_CONVENTIONAL. That will, of course, not match the write pointer of the other zone, and fails that block group. This commit solves that issue by properly recovering the emulated write pointer from the last allocated extent. The offset for the SINGLE, DUP, and RAID1 are straight-forward: it is same as the end of last allocated extent. The RAID0 and RAID10 are a bit tricky that we need to do the math of striping. This is the kernel equivalent of Naohiro's user-space commit: "btrfs-progs: zoned: fix alloc_offset calculation for partly conventional block groups". Reviewed-by: Naohiro Aota <naohiro.aota@wdc.com> Signed-off-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent 5fc86db commit 90515fe

File tree

1 file changed

+72
-14
lines changed

1 file changed

+72
-14
lines changed

fs/btrfs/zoned.c

Lines changed: 72 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1403,7 +1403,8 @@ static int btrfs_load_block_group_single(struct btrfs_block_group *bg,
14031403
static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14041404
struct btrfs_chunk_map *map,
14051405
struct zone_info *zone_info,
1406-
unsigned long *active)
1406+
unsigned long *active,
1407+
u64 last_alloc)
14071408
{
14081409
struct btrfs_fs_info *fs_info = bg->fs_info;
14091410

@@ -1426,6 +1427,13 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14261427
zone_info[1].physical);
14271428
return -EIO;
14281429
}
1430+
1431+
if (zone_info[0].alloc_offset == WP_CONVENTIONAL)
1432+
zone_info[0].alloc_offset = last_alloc;
1433+
1434+
if (zone_info[1].alloc_offset == WP_CONVENTIONAL)
1435+
zone_info[1].alloc_offset = last_alloc;
1436+
14291437
if (zone_info[0].alloc_offset != zone_info[1].alloc_offset) {
14301438
btrfs_err(bg->fs_info,
14311439
"zoned: write pointer offset mismatch of zones in DUP profile");
@@ -1446,7 +1454,8 @@ static int btrfs_load_block_group_dup(struct btrfs_block_group *bg,
14461454
static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14471455
struct btrfs_chunk_map *map,
14481456
struct zone_info *zone_info,
1449-
unsigned long *active)
1457+
unsigned long *active,
1458+
u64 last_alloc)
14501459
{
14511460
struct btrfs_fs_info *fs_info = bg->fs_info;
14521461
int i;
@@ -1461,10 +1470,12 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14611470
bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity);
14621471

14631472
for (i = 0; i < map->num_stripes; i++) {
1464-
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
1465-
zone_info[i].alloc_offset == WP_CONVENTIONAL)
1473+
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
14661474
continue;
14671475

1476+
if (zone_info[i].alloc_offset == WP_CONVENTIONAL)
1477+
zone_info[i].alloc_offset = last_alloc;
1478+
14681479
if ((zone_info[0].alloc_offset != zone_info[i].alloc_offset) &&
14691480
!btrfs_test_opt(fs_info, DEGRADED)) {
14701481
btrfs_err(fs_info,
@@ -1494,7 +1505,8 @@ static int btrfs_load_block_group_raid1(struct btrfs_block_group *bg,
14941505
static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
14951506
struct btrfs_chunk_map *map,
14961507
struct zone_info *zone_info,
1497-
unsigned long *active)
1508+
unsigned long *active,
1509+
u64 last_alloc)
14981510
{
14991511
struct btrfs_fs_info *fs_info = bg->fs_info;
15001512

@@ -1505,10 +1517,29 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
15051517
}
15061518

15071519
for (int i = 0; i < map->num_stripes; i++) {
1508-
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
1509-
zone_info[i].alloc_offset == WP_CONVENTIONAL)
1520+
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
15101521
continue;
15111522

1523+
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
1524+
u64 stripe_nr, full_stripe_nr;
1525+
u64 stripe_offset;
1526+
int stripe_index;
1527+
1528+
stripe_nr = div64_u64(last_alloc, map->stripe_size);
1529+
stripe_offset = stripe_nr * map->stripe_size;
1530+
full_stripe_nr = div_u64(stripe_nr, map->num_stripes);
1531+
div_u64_rem(stripe_nr, map->num_stripes, &stripe_index);
1532+
1533+
zone_info[i].alloc_offset =
1534+
full_stripe_nr * map->stripe_size;
1535+
1536+
if (stripe_index > i)
1537+
zone_info[i].alloc_offset += map->stripe_size;
1538+
else if (stripe_index == i)
1539+
zone_info[i].alloc_offset +=
1540+
(last_alloc - stripe_offset);
1541+
}
1542+
15121543
if (test_bit(0, active) != test_bit(i, active)) {
15131544
if (!btrfs_zone_activate(bg))
15141545
return -EIO;
@@ -1526,7 +1557,8 @@ static int btrfs_load_block_group_raid0(struct btrfs_block_group *bg,
15261557
static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
15271558
struct btrfs_chunk_map *map,
15281559
struct zone_info *zone_info,
1529-
unsigned long *active)
1560+
unsigned long *active,
1561+
u64 last_alloc)
15301562
{
15311563
struct btrfs_fs_info *fs_info = bg->fs_info;
15321564

@@ -1537,8 +1569,7 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
15371569
}
15381570

15391571
for (int i = 0; i < map->num_stripes; i++) {
1540-
if (zone_info[i].alloc_offset == WP_MISSING_DEV ||
1541-
zone_info[i].alloc_offset == WP_CONVENTIONAL)
1572+
if (zone_info[i].alloc_offset == WP_MISSING_DEV)
15421573
continue;
15431574

15441575
if (test_bit(0, active) != test_bit(i, active)) {
@@ -1549,6 +1580,29 @@ static int btrfs_load_block_group_raid10(struct btrfs_block_group *bg,
15491580
set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags);
15501581
}
15511582

1583+
if (zone_info[i].alloc_offset == WP_CONVENTIONAL) {
1584+
u64 stripe_nr, full_stripe_nr;
1585+
u64 stripe_offset;
1586+
int stripe_index;
1587+
1588+
stripe_nr = div64_u64(last_alloc, map->stripe_size);
1589+
stripe_offset = stripe_nr * map->stripe_size;
1590+
full_stripe_nr = div_u64(stripe_nr,
1591+
map->num_stripes / map->sub_stripes);
1592+
div_u64_rem(stripe_nr,
1593+
(map->num_stripes / map->sub_stripes),
1594+
&stripe_index);
1595+
1596+
zone_info[i].alloc_offset =
1597+
full_stripe_nr * map->stripe_size;
1598+
1599+
if (stripe_index > (i / map->sub_stripes))
1600+
zone_info[i].alloc_offset += map->stripe_size;
1601+
else if (stripe_index == (i / map->sub_stripes))
1602+
zone_info[i].alloc_offset +=
1603+
(last_alloc - stripe_offset);
1604+
}
1605+
15521606
if ((i % map->sub_stripes) == 0) {
15531607
bg->zone_capacity += zone_info[i].capacity;
15541608
bg->alloc_offset += zone_info[i].alloc_offset;
@@ -1637,18 +1691,22 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
16371691
ret = btrfs_load_block_group_single(cache, &zone_info[0], active);
16381692
break;
16391693
case BTRFS_BLOCK_GROUP_DUP:
1640-
ret = btrfs_load_block_group_dup(cache, map, zone_info, active);
1694+
ret = btrfs_load_block_group_dup(cache, map, zone_info, active,
1695+
last_alloc);
16411696
break;
16421697
case BTRFS_BLOCK_GROUP_RAID1:
16431698
case BTRFS_BLOCK_GROUP_RAID1C3:
16441699
case BTRFS_BLOCK_GROUP_RAID1C4:
1645-
ret = btrfs_load_block_group_raid1(cache, map, zone_info, active);
1700+
ret = btrfs_load_block_group_raid1(cache, map, zone_info,
1701+
active, last_alloc);
16461702
break;
16471703
case BTRFS_BLOCK_GROUP_RAID0:
1648-
ret = btrfs_load_block_group_raid0(cache, map, zone_info, active);
1704+
ret = btrfs_load_block_group_raid0(cache, map, zone_info,
1705+
active, last_alloc);
16491706
break;
16501707
case BTRFS_BLOCK_GROUP_RAID10:
1651-
ret = btrfs_load_block_group_raid10(cache, map, zone_info, active);
1708+
ret = btrfs_load_block_group_raid10(cache, map, zone_info,
1709+
active, last_alloc);
16521710
break;
16531711
case BTRFS_BLOCK_GROUP_RAID5:
16541712
case BTRFS_BLOCK_GROUP_RAID6:

0 commit comments

Comments
 (0)