Skip to content

Commit 5881590

Browse files
committed
rbd: retrieve and check lock owner twice before blocklisting
An attempt to acquire exclusive lock can race with the current lock owner closing the image: 1. lock is held by client123, rbd_lock() returns -EBUSY 2. get_lock_owner_info() returns client123 instance details 3. client123 closes the image, lock is released 4. find_watcher() returns 0 as there is no matching watcher anymore 5. client123 instance gets erroneously blocklisted Particularly impacted is mirror snapshot scheduler in snapshot-based mirroring since it happens to open and close images a lot (images are opened only for as long as it takes to take the next mirror snapshot, the same client instance is used for all images). To reduce the potential for erroneous blocklisting, retrieve the lock owner again after find_watcher() returns 0. If it's still there, make sure it matches the previously detected lock owner. Cc: stable@vger.kernel.org # f38cb9d: rbd: make get_lock_owner_info() return a single locker or NULL Cc: stable@vger.kernel.org # 8ff2c64: rbd: harden get_lock_owner_info() a bit Cc: stable@vger.kernel.org Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Reviewed-by: Dongsheng Yang <dongsheng.yang@easystack.cn>
1 parent 8ff2c64 commit 5881590

File tree

1 file changed

+23
-2
lines changed

1 file changed

+23
-2
lines changed

drivers/block/rbd.c

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3849,6 +3849,15 @@ static void wake_lock_waiters(struct rbd_device *rbd_dev, int result)
38493849
list_splice_tail_init(&rbd_dev->acquiring_list, &rbd_dev->running_list);
38503850
}
38513851

3852+
static bool locker_equal(const struct ceph_locker *lhs,
3853+
const struct ceph_locker *rhs)
3854+
{
3855+
return lhs->id.name.type == rhs->id.name.type &&
3856+
lhs->id.name.num == rhs->id.name.num &&
3857+
!strcmp(lhs->id.cookie, rhs->id.cookie) &&
3858+
ceph_addr_equal_no_type(&lhs->info.addr, &rhs->info.addr);
3859+
}
3860+
38523861
static void free_locker(struct ceph_locker *locker)
38533862
{
38543863
if (locker)
@@ -3969,11 +3978,11 @@ static int find_watcher(struct rbd_device *rbd_dev,
39693978
static int rbd_try_lock(struct rbd_device *rbd_dev)
39703979
{
39713980
struct ceph_client *client = rbd_dev->rbd_client->client;
3972-
struct ceph_locker *locker;
3981+
struct ceph_locker *locker, *refreshed_locker;
39733982
int ret;
39743983

39753984
for (;;) {
3976-
locker = NULL;
3985+
locker = refreshed_locker = NULL;
39773986

39783987
ret = rbd_lock(rbd_dev);
39793988
if (ret != -EBUSY)
@@ -3993,6 +4002,16 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
39934002
if (ret)
39944003
goto out; /* request lock or error */
39954004

4005+
refreshed_locker = get_lock_owner_info(rbd_dev);
4006+
if (IS_ERR(refreshed_locker)) {
4007+
ret = PTR_ERR(refreshed_locker);
4008+
refreshed_locker = NULL;
4009+
goto out;
4010+
}
4011+
if (!refreshed_locker ||
4012+
!locker_equal(locker, refreshed_locker))
4013+
goto again;
4014+
39964015
rbd_warn(rbd_dev, "breaking header lock owned by %s%llu",
39974016
ENTITY_NAME(locker->id.name));
39984017

@@ -4014,10 +4033,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
40144033
}
40154034

40164035
again:
4036+
free_locker(refreshed_locker);
40174037
free_locker(locker);
40184038
}
40194039

40204040
out:
4041+
free_locker(refreshed_locker);
40214042
free_locker(locker);
40224043
return ret;
40234044
}

0 commit comments

Comments
 (0)