Skip to content

Commit 71ef07c

Browse files
committed
btrfs: open code RCU for device name
The RCU protected string is only used for a device name, and RCU is used so we can print the name and eventually synchronize against the rare device rename in device_list_add(). We don't need the whole API just for that. Open code all the helpers and access to the string itself. Notable change is in device_list_add() when the device name is changed, which is the only place that can actually happen at the same time as message prints using the device name under RCU read lock. Previously there was kfree_rcu() which used the embedded rcu_head to delay freeing the object depending on the RCU mechanism. Now there's kfree_rcu_mightsleep() which does not need the rcu_head and waits for the grace period. Sleeping is safe in this context and as this is a rare event it won't interfere with the rest as it's holding the device_list_mutex. Straightforward changes: - rcu_string_strdup -> kstrdup - rcu_str_deref -> rcu_dereference - drop ->str from safe contexts Historical notes: Introduced in 606686e ("Btrfs: use rcu to protect device->name") with a vague reference of the potential problem described in https://lore.kernel.org/all/20120531155304.GF11775@ZenIV.linux.org.uk/ . The RCU protection looks like the easiest and most lightweight way of protecting the rare event of device rename racing device_list_add() with a random printk() that uses the device name. Alternatives: a spin lock would require to protect the printk anyway, a fixed buffer for the name would be eventually wrong in case the new name is overwritten when being printed, an array switching pointers and cleaning them up eventually resembles RCU too much. The cleanups up to this patch should hide special case of RCU to the minimum that only the name needs rcu_dereference(), which can be further cleaned up to use btrfs_dev_name(). Reviewed-by: Daniel Vacek <neelx@suse.com> Signed-off-by: David Sterba <dsterba@suse.com>
1 parent f9299f8 commit 71ef07c

File tree

3 files changed

+36
-26
lines changed

3 files changed

+36
-26
lines changed

fs/btrfs/volumes.c

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -401,8 +401,11 @@ static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
401401
static void btrfs_free_device(struct btrfs_device *device)
402402
{
403403
WARN_ON(!list_empty(&device->post_commit_list));
404-
/* No need to call kfree_rcu(), nothing is reading the device name. */
405-
kfree(device->name);
404+
/*
405+
* No need to call kfree_rcu() nor do RCU lock/unlock, nothing is
406+
* reading the device name.
407+
*/
408+
kfree(rcu_dereference_raw(device->name));
406409
btrfs_extent_io_tree_release(&device->alloc_state);
407410
btrfs_destroy_dev_zone_info(device);
408411
kfree(device);
@@ -657,7 +660,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
657660
if (!device->name)
658661
return -EINVAL;
659662

660-
ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
663+
ret = btrfs_get_bdev_and_sb(rcu_dereference_raw(device->name), flags, holder, 1,
661664
&bdev_file, &disk_super);
662665
if (ret)
663666
return ret;
@@ -701,7 +704,7 @@ static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
701704
if (device->devt != device->bdev->bd_dev) {
702705
btrfs_warn(NULL,
703706
"device %s maj:min changed from %d:%d to %d:%d",
704-
device->name->str, MAJOR(device->devt),
707+
rcu_dereference_raw(device->name), MAJOR(device->devt),
705708
MINOR(device->devt), MAJOR(device->bdev->bd_dev),
706709
MINOR(device->bdev->bd_dev));
707710

@@ -749,7 +752,7 @@ static bool is_same_device(struct btrfs_device *device, const char *new_path)
749752
goto out;
750753

751754
rcu_read_lock();
752-
ret = strscpy(old_path, rcu_str_deref(device->name), PATH_MAX);
755+
ret = strscpy(old_path, rcu_dereference(device->name), PATH_MAX);
753756
rcu_read_unlock();
754757
if (ret < 0)
755758
goto out;
@@ -782,7 +785,7 @@ static noinline struct btrfs_device *device_list_add(const char *path,
782785
{
783786
struct btrfs_device *device;
784787
struct btrfs_fs_devices *fs_devices = NULL;
785-
struct rcu_string *name;
788+
const char *name;
786789
u64 found_transid = btrfs_super_generation(disk_super);
787790
u64 devid = btrfs_stack_device_id(&disk_super->dev_item);
788791
dev_t path_devt;
@@ -890,6 +893,8 @@ static noinline struct btrfs_device *device_list_add(const char *path,
890893
current->comm, task_pid_nr(current));
891894

892895
} else if (!device->name || !is_same_device(device, path)) {
896+
const char *old_name;
897+
893898
/*
894899
* When FS is already mounted.
895900
* 1. If you are here and if the device->name is NULL that
@@ -957,13 +962,17 @@ static noinline struct btrfs_device *device_list_add(const char *path,
957962
task_pid_nr(current));
958963
}
959964

960-
name = rcu_string_strdup(path, GFP_NOFS);
965+
name = kstrdup(path, GFP_NOFS);
961966
if (!name) {
962967
mutex_unlock(&fs_devices->device_list_mutex);
963968
return ERR_PTR(-ENOMEM);
964969
}
965-
kfree_rcu(device->name, rcu);
970+
rcu_read_lock();
971+
old_name = rcu_dereference(device->name);
972+
rcu_read_unlock();
966973
rcu_assign_pointer(device->name, name);
974+
kfree_rcu_mightsleep(old_name);
975+
967976
if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
968977
fs_devices->missing_devices--;
969978
clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
@@ -1012,7 +1021,7 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
10121021
* uuid mutex so nothing we touch in here is going to disappear.
10131022
*/
10141023
if (orig_dev->name)
1015-
dev_path = orig_dev->name->str;
1024+
dev_path = rcu_dereference_raw(orig_dev->name);
10161025

10171026
device = btrfs_alloc_device(NULL, &orig_dev->devid,
10181027
orig_dev->uuid, dev_path);
@@ -1414,7 +1423,7 @@ static bool btrfs_skip_registration(struct btrfs_super_block *disk_super,
14141423

14151424
list_for_each_entry(device, &fs_devices->devices, dev_list) {
14161425
if (device->bdev && (device->bdev->bd_dev == devt) &&
1417-
strcmp(device->name->str, path) != 0) {
1426+
strcmp(rcu_dereference_raw(device->name), path) != 0) {
14181427
mutex_unlock(&fs_devices->device_list_mutex);
14191428

14201429
/* Do not skip registration. */
@@ -2164,7 +2173,7 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_devic
21642173
btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
21652174

21662175
/* Update ctime/mtime for device path for libblkid */
2167-
update_dev_time(device->name->str);
2176+
update_dev_time(rcu_dereference_raw(device->name));
21682177
}
21692178

21702179
int btrfs_rm_device(struct btrfs_fs_info *fs_info,
@@ -6923,9 +6932,9 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
69236932
generate_random_uuid(dev->uuid);
69246933

69256934
if (path) {
6926-
struct rcu_string *name;
6935+
const char *name;
69276936

6928-
name = rcu_string_strdup(path, GFP_KERNEL);
6937+
name = kstrdup(path, GFP_KERNEL);
69296938
if (!name) {
69306939
btrfs_free_device(dev);
69316940
return ERR_PTR(-ENOMEM);

fs/btrfs/volumes.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,8 @@ struct btrfs_device {
114114
struct btrfs_fs_devices *fs_devices;
115115
struct btrfs_fs_info *fs_info;
116116

117-
struct rcu_string __rcu *name;
117+
/* Device path or NULL if missing. */
118+
const char __rcu *name;
118119

119120
u64 generation;
120121

@@ -857,7 +858,7 @@ static inline const char *btrfs_dev_name(const struct btrfs_device *device)
857858
if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
858859
return "<missing disk>";
859860
else
860-
return rcu_str_deref(device->name);
861+
return rcu_dereference(device->name);
861862
}
862863

863864
static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol)

fs/btrfs/zoned.c

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,7 @@ static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
266266
if (ret < 0) {
267267
btrfs_err(device->fs_info,
268268
"zoned: failed to read zone %llu on %s (devid %llu)",
269-
pos, rcu_str_deref(device->name),
269+
pos, rcu_dereference(device->name),
270270
device->devid);
271271
return ret;
272272
}
@@ -398,14 +398,14 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
398398
if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) {
399399
btrfs_err(fs_info,
400400
"zoned: %s: zone size %llu larger than supported maximum %llu",
401-
rcu_str_deref(device->name),
401+
rcu_dereference(device->name),
402402
zone_info->zone_size, BTRFS_MAX_ZONE_SIZE);
403403
ret = -EINVAL;
404404
goto out;
405405
} else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) {
406406
btrfs_err(fs_info,
407407
"zoned: %s: zone size %llu smaller than supported minimum %u",
408-
rcu_str_deref(device->name),
408+
rcu_dereference(device->name),
409409
zone_info->zone_size, BTRFS_MIN_ZONE_SIZE);
410410
ret = -EINVAL;
411411
goto out;
@@ -421,7 +421,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
421421
if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) {
422422
btrfs_err(fs_info,
423423
"zoned: %s: max active zones %u is too small, need at least %u active zones",
424-
rcu_str_deref(device->name), max_active_zones,
424+
rcu_dereference(device->name), max_active_zones,
425425
BTRFS_MIN_ACTIVE_ZONES);
426426
ret = -EINVAL;
427427
goto out;
@@ -463,7 +463,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
463463
if (!zone_info->zone_cache) {
464464
btrfs_err(device->fs_info,
465465
"zoned: failed to allocate zone cache for %s",
466-
rcu_str_deref(device->name));
466+
rcu_dereference(device->name));
467467
ret = -ENOMEM;
468468
goto out;
469469
}
@@ -500,7 +500,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
500500
if (nreported != zone_info->nr_zones) {
501501
btrfs_err(device->fs_info,
502502
"inconsistent number of zones on %s (%u/%u)",
503-
rcu_str_deref(device->name), nreported,
503+
rcu_dereference(device->name), nreported,
504504
zone_info->nr_zones);
505505
ret = -EIO;
506506
goto out;
@@ -510,7 +510,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
510510
if (nactive > max_active_zones) {
511511
btrfs_err(device->fs_info,
512512
"zoned: %u active zones on %s exceeds max_active_zones %u",
513-
nactive, rcu_str_deref(device->name),
513+
nactive, rcu_dereference(device->name),
514514
max_active_zones);
515515
ret = -EIO;
516516
goto out;
@@ -578,7 +578,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
578578

579579
btrfs_info(fs_info,
580580
"%s block device %s, %u %szones of %llu bytes",
581-
model, rcu_str_deref(device->name), zone_info->nr_zones,
581+
model, rcu_dereference(device->name), zone_info->nr_zones,
582582
emulated, zone_info->zone_size);
583583

584584
return 0;
@@ -1186,7 +1186,7 @@ int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
11861186
btrfs_warn(
11871187
device->fs_info,
11881188
"zoned: resetting device %s (devid %llu) zone %llu for allocation",
1189-
rcu_str_deref(device->name), device->devid, pos >> shift);
1189+
rcu_dereference(device->name), device->devid, pos >> shift);
11901190
WARN_ON_ONCE(1);
11911191

11921192
ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size,
@@ -1348,7 +1348,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
13481348
if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
13491349
btrfs_err(fs_info,
13501350
"zoned: unexpected conventional zone %llu on device %s (devid %llu)",
1351-
zone.start << SECTOR_SHIFT, rcu_str_deref(device->name),
1351+
zone.start << SECTOR_SHIFT, rcu_dereference(device->name),
13521352
device->devid);
13531353
up_read(&dev_replace->rwsem);
13541354
return -EIO;
@@ -1362,7 +1362,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx,
13621362
btrfs_err(fs_info,
13631363
"zoned: offline/readonly zone %llu on device %s (devid %llu)",
13641364
(info->physical >> device->zone_info->zone_size_shift),
1365-
rcu_str_deref(device->name), device->devid);
1365+
rcu_dereference(device->name), device->devid);
13661366
info->alloc_offset = WP_MISSING_DEV;
13671367
break;
13681368
case BLK_ZONE_COND_EMPTY:

0 commit comments

Comments
 (0)