Skip to content

Commit 7734fb4

Browse files
kevmwMikulas Patocka
authored andcommitted
dm mpath: Interface for explicit probing of active paths
Multipath cannot directly provide failover for ioctls in the kernel because it doesn't know what each ioctl means and which result could indicate a path error. Userspace generally knows what the ioctl it issued means and if it might be a path error, but neither does it know which path the ioctl took nor does it necessarily have the privileges to fail a path using the control device. In order to allow userspace to address this situation, implement a DM_MPATH_PROBE_PATHS ioctl that prompts the dm-mpath driver to probe all active paths in the current path group to see whether they still work, and fail them if not. If this returns success, userspace can retry the ioctl and expect that the previously hit bad path is now failed (or working again). The immediate motivation for this is the use of SG_IO in QEMU for SCSI passthrough. Following a failed SG_IO ioctl, QEMU will trigger probing to ensure that all active paths are actually alive, so that retrying SG_IO at least has a lower chance of failing due to a path error. However, the problem is broader than just SG_IO (it affects any ioctl), and if applications need failover support for other ioctls, the same probing can be used. This is not implemented on the DM control device, but on the DM mpath block devices, to allow all users who have access to such a block device to make use of this interface, specifically to implement failover for ioctls. For the same reason, it is also unprivileged. Its implementation is effectively just a bunch of reads, which could already be issued by userspace, just without any guarantee that all the rights paths are selected. The probing implemented here is done fully synchronously path by path; probing all paths concurrently is left as an improvement for the future. Co-developed-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Hanna Czenczek <hreitz@redhat.com> Signed-off-by: Kevin Wolf <kwolf@redhat.com> Reviewed-by: Benjamin Marzinski <bmarzins@redhat.com> Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com> Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
1 parent 4862c88 commit 7734fb4

File tree

3 files changed

+107
-3
lines changed

3 files changed

+107
-3
lines changed

drivers/md/dm-ioctl.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1885,6 +1885,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
18851885
{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
18861886
{DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
18871887
{DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
1888+
{DM_MPATH_PROBE_PATHS_CMD, 0, NULL}, /* block device ioctl */
18881889
};
18891890

18901891
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))

drivers/md/dm-mpath.c

Lines changed: 99 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2021,6 +2021,94 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
20212021
return r;
20222022
}
20232023

2024+
/*
2025+
* Perform a minimal read from the given path to find out whether the
2026+
* path still works. If a path error occurs, fail it.
2027+
*/
2028+
static int probe_path(struct pgpath *pgpath)
2029+
{
2030+
struct block_device *bdev = pgpath->path.dev->bdev;
2031+
unsigned int read_size = bdev_logical_block_size(bdev);
2032+
struct page *page;
2033+
struct bio *bio;
2034+
blk_status_t status;
2035+
int r = 0;
2036+
2037+
if (WARN_ON_ONCE(read_size > PAGE_SIZE))
2038+
return -EINVAL;
2039+
2040+
page = alloc_page(GFP_KERNEL);
2041+
if (!page)
2042+
return -ENOMEM;
2043+
2044+
/* Perform a minimal read: Sector 0, length read_size */
2045+
bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL);
2046+
if (!bio) {
2047+
r = -ENOMEM;
2048+
goto out;
2049+
}
2050+
2051+
bio->bi_iter.bi_sector = 0;
2052+
__bio_add_page(bio, page, read_size, 0);
2053+
submit_bio_wait(bio);
2054+
status = bio->bi_status;
2055+
bio_put(bio);
2056+
2057+
if (status && blk_path_error(status))
2058+
fail_path(pgpath);
2059+
2060+
out:
2061+
__free_page(page);
2062+
return r;
2063+
}
2064+
2065+
/*
2066+
* Probe all active paths in current_pg to find out whether they still work.
2067+
* Fail all paths that do not work.
2068+
*
2069+
* Return -ENOTCONN if no valid path is left (even outside of current_pg). We
2070+
* cannot probe paths in other pgs without switching current_pg, so if valid
2071+
* paths are only in different pgs, they may or may not work. Additionally
2072+
* we should not probe paths in a pathgroup that is in the process of
2073+
* Initializing. Userspace can submit a request and we'll switch and wait
2074+
* for the pathgroup to be initialized. If the request fails, it may need to
2075+
* probe again.
2076+
*/
2077+
static int probe_active_paths(struct multipath *m)
2078+
{
2079+
struct pgpath *pgpath;
2080+
struct priority_group *pg;
2081+
unsigned long flags;
2082+
int r = 0;
2083+
2084+
mutex_lock(&m->work_mutex);
2085+
2086+
spin_lock_irqsave(&m->lock, flags);
2087+
if (test_bit(MPATHF_QUEUE_IO, &m->flags))
2088+
pg = NULL;
2089+
else
2090+
pg = m->current_pg;
2091+
spin_unlock_irqrestore(&m->lock, flags);
2092+
2093+
if (pg) {
2094+
list_for_each_entry(pgpath, &pg->pgpaths, list) {
2095+
if (!pgpath->is_active)
2096+
continue;
2097+
2098+
r = probe_path(pgpath);
2099+
if (r < 0)
2100+
goto out;
2101+
}
2102+
}
2103+
2104+
if (!atomic_read(&m->nr_valid_paths))
2105+
r = -ENOTCONN;
2106+
2107+
out:
2108+
mutex_unlock(&m->work_mutex);
2109+
return r;
2110+
}
2111+
20242112
static int multipath_prepare_ioctl(struct dm_target *ti,
20252113
struct block_device **bdev,
20262114
unsigned int cmd, unsigned long arg,
@@ -2031,6 +2119,16 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
20312119
unsigned long flags;
20322120
int r;
20332121

2122+
if (_IOC_TYPE(cmd) == DM_IOCTL) {
2123+
*forward = false;
2124+
switch (cmd) {
2125+
case DM_MPATH_PROBE_PATHS:
2126+
return probe_active_paths(m);
2127+
default:
2128+
return -ENOTTY;
2129+
}
2130+
}
2131+
20342132
pgpath = READ_ONCE(m->current_pgpath);
20352133
if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
20362134
pgpath = choose_pgpath(m, 0);
@@ -2182,7 +2280,7 @@ static int multipath_busy(struct dm_target *ti)
21822280
*/
21832281
static struct target_type multipath_target = {
21842282
.name = "multipath",
2185-
.version = {1, 14, 0},
2283+
.version = {1, 15, 0},
21862284
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
21872285
DM_TARGET_PASSES_INTEGRITY,
21882286
.module = THIS_MODULE,

include/uapi/linux/dm-ioctl.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,10 +258,12 @@ enum {
258258
DM_DEV_SET_GEOMETRY_CMD,
259259
DM_DEV_ARM_POLL_CMD,
260260
DM_GET_TARGET_VERSION_CMD,
261+
DM_MPATH_PROBE_PATHS_CMD,
261262
};
262263

263264
#define DM_IOCTL 0xfd
264265

266+
/* Control device ioctls */
265267
#define DM_VERSION _IOWR(DM_IOCTL, DM_VERSION_CMD, struct dm_ioctl)
266268
#define DM_REMOVE_ALL _IOWR(DM_IOCTL, DM_REMOVE_ALL_CMD, struct dm_ioctl)
267269
#define DM_LIST_DEVICES _IOWR(DM_IOCTL, DM_LIST_DEVICES_CMD, struct dm_ioctl)
@@ -285,10 +287,13 @@ enum {
285287
#define DM_TARGET_MSG _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
286288
#define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
287289

290+
/* Block device ioctls */
291+
#define DM_MPATH_PROBE_PATHS _IO(DM_IOCTL, DM_MPATH_PROBE_PATHS_CMD)
292+
288293
#define DM_VERSION_MAJOR 4
289-
#define DM_VERSION_MINOR 49
294+
#define DM_VERSION_MINOR 50
290295
#define DM_VERSION_PATCHLEVEL 0
291-
#define DM_VERSION_EXTRA "-ioctl (2025-01-17)"
296+
#define DM_VERSION_EXTRA "-ioctl (2025-04-28)"
292297

293298
/* Status bits */
294299
#define DM_READONLY_FLAG (1 << 0) /* In/Out */

0 commit comments

Comments
 (0)