Skip to content

Commit 4593f3c

Browse files
committed
Merge tag 'ceph-for-6.5-rc5' of https://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "Two patches to improve RBD exclusive lock interaction with osd_request_timeout option and another fix to reduce the potential for erroneous blocklisting -- this time in CephFS. All going to stable" * tag 'ceph-for-6.5-rc5' of https://github.com/ceph/ceph-client: libceph: fix potential hang in ceph_osdc_notify() rbd: prevent busy loop when requesting exclusive lock ceph: defer stopping mdsc delayed_work
2 parents 7979642 + e6e2843 commit 4593f3c

File tree

5 files changed

+46
-21
lines changed

5 files changed

+46
-21
lines changed

drivers/block/rbd.c

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3675,7 +3675,7 @@ static int rbd_lock(struct rbd_device *rbd_dev)
36753675
ret = ceph_cls_lock(osdc, &rbd_dev->header_oid, &rbd_dev->header_oloc,
36763676
RBD_LOCK_NAME, CEPH_CLS_LOCK_EXCLUSIVE, cookie,
36773677
RBD_LOCK_TAG, "", 0);
3678-
if (ret)
3678+
if (ret && ret != -EEXIST)
36793679
return ret;
36803680

36813681
__rbd_lock(rbd_dev, cookie);
@@ -3878,7 +3878,7 @@ static struct ceph_locker *get_lock_owner_info(struct rbd_device *rbd_dev)
38783878
&rbd_dev->header_oloc, RBD_LOCK_NAME,
38793879
&lock_type, &lock_tag, &lockers, &num_lockers);
38803880
if (ret) {
3881-
rbd_warn(rbd_dev, "failed to retrieve lockers: %d", ret);
3881+
rbd_warn(rbd_dev, "failed to get header lockers: %d", ret);
38823882
return ERR_PTR(ret);
38833883
}
38843884

@@ -3940,8 +3940,10 @@ static int find_watcher(struct rbd_device *rbd_dev,
39403940
ret = ceph_osdc_list_watchers(osdc, &rbd_dev->header_oid,
39413941
&rbd_dev->header_oloc, &watchers,
39423942
&num_watchers);
3943-
if (ret)
3943+
if (ret) {
3944+
rbd_warn(rbd_dev, "failed to get watchers: %d", ret);
39443945
return ret;
3946+
}
39453947

39463948
sscanf(locker->id.cookie, RBD_LOCK_COOKIE_PREFIX " %llu", &cookie);
39473949
for (i = 0; i < num_watchers; i++) {
@@ -3985,8 +3987,12 @@ static int rbd_try_lock(struct rbd_device *rbd_dev)
39853987
locker = refreshed_locker = NULL;
39863988

39873989
ret = rbd_lock(rbd_dev);
3988-
if (ret != -EBUSY)
3990+
if (!ret)
3991+
goto out;
3992+
if (ret != -EBUSY) {
3993+
rbd_warn(rbd_dev, "failed to lock header: %d", ret);
39893994
goto out;
3995+
}
39903996

39913997
/* determine if the current lock holder is still alive */
39923998
locker = get_lock_owner_info(rbd_dev);
@@ -4089,11 +4095,8 @@ static int rbd_try_acquire_lock(struct rbd_device *rbd_dev)
40894095

40904096
ret = rbd_try_lock(rbd_dev);
40914097
if (ret < 0) {
4092-
rbd_warn(rbd_dev, "failed to lock header: %d", ret);
4093-
if (ret == -EBLOCKLISTED)
4094-
goto out;
4095-
4096-
ret = 1; /* request lock anyway */
4098+
rbd_warn(rbd_dev, "failed to acquire lock: %d", ret);
4099+
goto out;
40974100
}
40984101
if (ret > 0) {
40994102
up_write(&rbd_dev->lock_rwsem);
@@ -6627,12 +6630,11 @@ static int rbd_add_acquire_lock(struct rbd_device *rbd_dev)
66276630
cancel_delayed_work_sync(&rbd_dev->lock_dwork);
66286631
if (!ret)
66296632
ret = -ETIMEDOUT;
6630-
}
66316633

6632-
if (ret) {
6633-
rbd_warn(rbd_dev, "failed to acquire exclusive lock: %ld", ret);
6634-
return ret;
6634+
rbd_warn(rbd_dev, "failed to acquire lock: %ld", ret);
66356635
}
6636+
if (ret)
6637+
return ret;
66366638

66376639
/*
66386640
* The lock may have been released by now, unless automatic lock

fs/ceph/mds_client.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4764,7 +4764,7 @@ static void delayed_work(struct work_struct *work)
47644764

47654765
dout("mdsc delayed_work\n");
47664766

4767-
if (mdsc->stopping)
4767+
if (mdsc->stopping >= CEPH_MDSC_STOPPING_FLUSHED)
47684768
return;
47694769

47704770
mutex_lock(&mdsc->mutex);
@@ -4943,7 +4943,7 @@ void send_flush_mdlog(struct ceph_mds_session *s)
49434943
void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
49444944
{
49454945
dout("pre_umount\n");
4946-
mdsc->stopping = 1;
4946+
mdsc->stopping = CEPH_MDSC_STOPPING_BEGIN;
49474947

49484948
ceph_mdsc_iterate_sessions(mdsc, send_flush_mdlog, true);
49494949
ceph_mdsc_iterate_sessions(mdsc, lock_unlock_session, false);

fs/ceph/mds_client.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -380,6 +380,11 @@ struct cap_wait {
380380
int want;
381381
};
382382

383+
enum {
384+
CEPH_MDSC_STOPPING_BEGIN = 1,
385+
CEPH_MDSC_STOPPING_FLUSHED = 2,
386+
};
387+
383388
/*
384389
* mds client state
385390
*/

fs/ceph/super.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1374,6 +1374,16 @@ static void ceph_kill_sb(struct super_block *s)
13741374
ceph_mdsc_pre_umount(fsc->mdsc);
13751375
flush_fs_workqueues(fsc);
13761376

1377+
/*
1378+
* Though the kill_anon_super() will finally trigger the
1379+
* sync_filesystem() anyway, we still need to do it here
1380+
* and then bump the stage of shutdown to stop the work
1381+
* queue as earlier as possible.
1382+
*/
1383+
sync_filesystem(s);
1384+
1385+
fsc->mdsc->stopping = CEPH_MDSC_STOPPING_FLUSHED;
1386+
13771387
kill_anon_super(s);
13781388

13791389
fsc->client->extra_mon_dispatch = NULL;

net/ceph/osd_client.c

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3334,17 +3334,24 @@ static int linger_reg_commit_wait(struct ceph_osd_linger_request *lreq)
33343334
int ret;
33353335

33363336
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
3337-
ret = wait_for_completion_interruptible(&lreq->reg_commit_wait);
3337+
ret = wait_for_completion_killable(&lreq->reg_commit_wait);
33383338
return ret ?: lreq->reg_commit_error;
33393339
}
33403340

3341-
static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq)
3341+
static int linger_notify_finish_wait(struct ceph_osd_linger_request *lreq,
3342+
unsigned long timeout)
33423343
{
3343-
int ret;
3344+
long left;
33443345

33453346
dout("%s lreq %p linger_id %llu\n", __func__, lreq, lreq->linger_id);
3346-
ret = wait_for_completion_interruptible(&lreq->notify_finish_wait);
3347-
return ret ?: lreq->notify_finish_error;
3347+
left = wait_for_completion_killable_timeout(&lreq->notify_finish_wait,
3348+
ceph_timeout_jiffies(timeout));
3349+
if (left <= 0)
3350+
left = left ?: -ETIMEDOUT;
3351+
else
3352+
left = lreq->notify_finish_error; /* completed */
3353+
3354+
return left;
33483355
}
33493356

33503357
/*
@@ -4896,7 +4903,8 @@ int ceph_osdc_notify(struct ceph_osd_client *osdc,
48964903
linger_submit(lreq);
48974904
ret = linger_reg_commit_wait(lreq);
48984905
if (!ret)
4899-
ret = linger_notify_finish_wait(lreq);
4906+
ret = linger_notify_finish_wait(lreq,
4907+
msecs_to_jiffies(2 * timeout * MSEC_PER_SEC));
49004908
else
49014909
dout("lreq %p failed to initiate notify %d\n", lreq, ret);
49024910

0 commit comments

Comments
 (0)