Skip to content

Commit 3f9c1b3

Browse files
committed
Merge tag 'ceph-for-6.8-rc5' of https://github.com/ceph/ceph-client
Pull ceph fixes from Ilya Dryomov: "Additional cap handling fixes from Xiubo to avoid "client isn't responding to mclientcaps(revoke)" stalls on the MDS side" * tag 'ceph-for-6.8-rc5' of https://github.com/ceph/ceph-client: ceph: add ceph_cap_unlink_work to fire check_caps() immediately ceph: always queue a writeback when revoking the Fb caps
2 parents 683b783 + dbc347e commit 3f9c1b3

File tree

3 files changed

+93
-25
lines changed

3 files changed

+93
-25
lines changed

fs/ceph/caps.c

Lines changed: 40 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2156,6 +2156,30 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
21562156
ceph_cap_string(cap->implemented),
21572157
ceph_cap_string(revoking));
21582158

2159+
/* completed revocation? going down and there are no caps? */
2160+
if (revoking) {
2161+
if ((revoking & cap_used) == 0) {
2162+
doutc(cl, "completed revocation of %s\n",
2163+
ceph_cap_string(cap->implemented & ~cap->issued));
2164+
goto ack;
2165+
}
2166+
2167+
/*
2168+
* If the "i_wrbuffer_ref" was increased by mmap or generic
2169+
* cache write just before the ceph_check_caps() is called,
2170+
* the Fb capability revoking will fail this time. Then we
2171+
* must wait for the BDI's delayed work to flush the dirty
2172+
* pages and to release the "i_wrbuffer_ref", which will cost
2173+
* at most 5 seconds. That means the MDS needs to wait at
2174+
* most 5 seconds to finished the Fb capability's revocation.
2175+
*
2176+
* Let's queue a writeback for it.
2177+
*/
2178+
if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
2179+
(revoking & CEPH_CAP_FILE_BUFFER))
2180+
queue_writeback = true;
2181+
}
2182+
21592183
if (cap == ci->i_auth_cap &&
21602184
(cap->issued & CEPH_CAP_FILE_WR)) {
21612185
/* request larger max_size from MDS? */
@@ -2183,30 +2207,6 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags)
21832207
}
21842208
}
21852209

2186-
/* completed revocation? going down and there are no caps? */
2187-
if (revoking) {
2188-
if ((revoking & cap_used) == 0) {
2189-
doutc(cl, "completed revocation of %s\n",
2190-
ceph_cap_string(cap->implemented & ~cap->issued));
2191-
goto ack;
2192-
}
2193-
2194-
/*
2195-
* If the "i_wrbuffer_ref" was increased by mmap or generic
2196-
* cache write just before the ceph_check_caps() is called,
2197-
* the Fb capability revoking will fail this time. Then we
2198-
* must wait for the BDI's delayed work to flush the dirty
2199-
* pages and to release the "i_wrbuffer_ref", which will cost
2200-
* at most 5 seconds. That means the MDS needs to wait at
2201-
* most 5 seconds to finished the Fb capability's revocation.
2202-
*
2203-
* Let's queue a writeback for it.
2204-
*/
2205-
if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
2206-
(revoking & CEPH_CAP_FILE_BUFFER))
2207-
queue_writeback = true;
2208-
}
2209-
22102210
/* want more caps from mds? */
22112211
if (want & ~cap->mds_wanted) {
22122212
if (want & ~(cap->mds_wanted | cap->issued))
@@ -4772,7 +4772,22 @@ int ceph_drop_caps_for_unlink(struct inode *inode)
47724772
if (__ceph_caps_dirty(ci)) {
47734773
struct ceph_mds_client *mdsc =
47744774
ceph_inode_to_fs_client(inode)->mdsc;
4775-
__cap_delay_requeue_front(mdsc, ci);
4775+
4776+
doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode,
4777+
ceph_vinop(inode));
4778+
spin_lock(&mdsc->cap_unlink_delay_lock);
4779+
ci->i_ceph_flags |= CEPH_I_FLUSH;
4780+
if (!list_empty(&ci->i_cap_delay_list))
4781+
list_del_init(&ci->i_cap_delay_list);
4782+
list_add_tail(&ci->i_cap_delay_list,
4783+
&mdsc->cap_unlink_delay_list);
4784+
spin_unlock(&mdsc->cap_unlink_delay_lock);
4785+
4786+
/*
4787+
* Fire the work immediately, because the MDS maybe
4788+
* waiting for caps release.
4789+
*/
4790+
ceph_queue_cap_unlink_work(mdsc);
47764791
}
47774792
}
47784793
spin_unlock(&ci->i_ceph_lock);

fs/ceph/mds_client.c

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2484,6 +2484,50 @@ void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
24842484
}
24852485
}
24862486

2487+
void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc)
2488+
{
2489+
struct ceph_client *cl = mdsc->fsc->client;
2490+
if (mdsc->stopping)
2491+
return;
2492+
2493+
if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_unlink_work)) {
2494+
doutc(cl, "caps unlink work queued\n");
2495+
} else {
2496+
doutc(cl, "failed to queue caps unlink work\n");
2497+
}
2498+
}
2499+
2500+
static void ceph_cap_unlink_work(struct work_struct *work)
2501+
{
2502+
struct ceph_mds_client *mdsc =
2503+
container_of(work, struct ceph_mds_client, cap_unlink_work);
2504+
struct ceph_client *cl = mdsc->fsc->client;
2505+
2506+
doutc(cl, "begin\n");
2507+
spin_lock(&mdsc->cap_unlink_delay_lock);
2508+
while (!list_empty(&mdsc->cap_unlink_delay_list)) {
2509+
struct ceph_inode_info *ci;
2510+
struct inode *inode;
2511+
2512+
ci = list_first_entry(&mdsc->cap_unlink_delay_list,
2513+
struct ceph_inode_info,
2514+
i_cap_delay_list);
2515+
list_del_init(&ci->i_cap_delay_list);
2516+
2517+
inode = igrab(&ci->netfs.inode);
2518+
if (inode) {
2519+
spin_unlock(&mdsc->cap_unlink_delay_lock);
2520+
doutc(cl, "on %p %llx.%llx\n", inode,
2521+
ceph_vinop(inode));
2522+
ceph_check_caps(ci, CHECK_CAPS_FLUSH);
2523+
iput(inode);
2524+
spin_lock(&mdsc->cap_unlink_delay_lock);
2525+
}
2526+
}
2527+
spin_unlock(&mdsc->cap_unlink_delay_lock);
2528+
doutc(cl, "done\n");
2529+
}
2530+
24872531
/*
24882532
* requests
24892533
*/
@@ -5359,6 +5403,8 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
53595403
INIT_LIST_HEAD(&mdsc->cap_delay_list);
53605404
INIT_LIST_HEAD(&mdsc->cap_wait_list);
53615405
spin_lock_init(&mdsc->cap_delay_lock);
5406+
INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list);
5407+
spin_lock_init(&mdsc->cap_unlink_delay_lock);
53625408
INIT_LIST_HEAD(&mdsc->snap_flush_list);
53635409
spin_lock_init(&mdsc->snap_flush_lock);
53645410
mdsc->last_cap_flush_tid = 1;
@@ -5367,6 +5413,7 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc)
53675413
spin_lock_init(&mdsc->cap_dirty_lock);
53685414
init_waitqueue_head(&mdsc->cap_flushing_wq);
53695415
INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
5416+
INIT_WORK(&mdsc->cap_unlink_work, ceph_cap_unlink_work);
53705417
err = ceph_metric_init(&mdsc->metric);
53715418
if (err)
53725419
goto err_mdsmap;
@@ -5640,6 +5687,7 @@ void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
56405687
ceph_cleanup_global_and_empty_realms(mdsc);
56415688

56425689
cancel_work_sync(&mdsc->cap_reclaim_work);
5690+
cancel_work_sync(&mdsc->cap_unlink_work);
56435691
cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
56445692

56455693
doutc(cl, "done\n");

fs/ceph/mds_client.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -462,6 +462,8 @@ struct ceph_mds_client {
462462
unsigned long last_renew_caps; /* last time we renewed our caps */
463463
struct list_head cap_delay_list; /* caps with delayed release */
464464
spinlock_t cap_delay_lock; /* protects cap_delay_list */
465+
struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */
466+
spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */
465467
struct list_head snap_flush_list; /* cap_snaps ready to flush */
466468
spinlock_t snap_flush_lock;
467469

@@ -475,6 +477,8 @@ struct ceph_mds_client {
475477
struct work_struct cap_reclaim_work;
476478
atomic_t cap_reclaim_pending;
477479

480+
struct work_struct cap_unlink_work;
481+
478482
/*
479483
* Cap reservations
480484
*
@@ -574,6 +578,7 @@ extern void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
574578
struct ceph_mds_session *session);
575579
extern void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc);
576580
extern void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr);
581+
extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc);
577582
extern int ceph_iterate_session_caps(struct ceph_mds_session *session,
578583
int (*cb)(struct inode *, int mds, void *),
579584
void *arg);

0 commit comments

Comments
 (0)