Skip to content

Commit 17d8e3d

Browse files
committed
Merge tag 'ceph-for-5.19-rc1' of https://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "A big pile of assorted fixes and improvements for the filesystem with nothing in particular standing out, except perhaps that the fact that the MDS never really maintained atime was made official and thus it's no longer updated on the client either. We also have a MAINTAINERS update: Jeff is transitioning his filesystem maintainership duties to Xiubo" * tag 'ceph-for-5.19-rc1' of https://github.com/ceph/ceph-client: (23 commits) MAINTAINERS: move myself from ceph "Maintainer" to "Reviewer" ceph: fix decoding of client session messages flags ceph: switch TASK_INTERRUPTIBLE to TASK_KILLABLE ceph: remove redundant variable ino ceph: try to queue a writeback if revoking fails ceph: fix statfs for subdir mounts ceph: fix possible deadlock when holding Fwb to get inline_data ceph: redirty the page for writepage on failure ceph: try to choose the auth MDS if possible for getattr ceph: disable updating the atime since cephfs won't maintain it ceph: flush the mdlog for filesystem sync ceph: rename unsafe_request_wait() libceph: use swap() macro instead of taking tmp variable ceph: fix statx AT_STATX_DONT_SYNC vs AT_STATX_FORCE_SYNC check ceph: no need to invalidate the fscache twice ceph: replace usage of found with dedicated list iterator variable ceph: use dedicated list iterator variable ceph: update the dlease for the hashed dentry when removing ceph: stop retrying the request when exceeding 256 times ceph: stop forwarding the request when exceeding 256 times ...
2 parents 7c9e960 + af7dc8e commit 17d8e3d

File tree

12 files changed

+253
-103
lines changed

12 files changed

+253
-103
lines changed

MAINTAINERS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4566,8 +4566,8 @@ F: drivers/power/supply/cw2015_battery.c
45664566

45674567
CEPH COMMON CODE (LIBCEPH)
45684568
M: Ilya Dryomov <idryomov@gmail.com>
4569-
M: Jeff Layton <jlayton@kernel.org>
45704569
M: Xiubo Li <xiubli@redhat.com>
4570+
R: Jeff Layton <jlayton@kernel.org>
45714571
L: ceph-devel@vger.kernel.org
45724572
S: Supported
45734573
W: http://ceph.com/
@@ -4577,9 +4577,9 @@ F: include/linux/crush/
45774577
F: net/ceph/
45784578

45794579
CEPH DISTRIBUTED FILE SYSTEM CLIENT (CEPH)
4580-
M: Jeff Layton <jlayton@kernel.org>
45814580
M: Xiubo Li <xiubli@redhat.com>
45824581
M: Ilya Dryomov <idryomov@gmail.com>
4582+
R: Jeff Layton <jlayton@kernel.org>
45834583
L: ceph-devel@vger.kernel.org
45844584
S: Supported
45854585
W: http://ceph.com/

drivers/block/rbd.c

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -756,24 +756,23 @@ static struct rbd_client *__rbd_get_client(struct rbd_client *rbdc)
756756
*/
757757
static struct rbd_client *rbd_client_find(struct ceph_options *ceph_opts)
758758
{
759-
struct rbd_client *client_node;
760-
bool found = false;
759+
struct rbd_client *rbdc = NULL, *iter;
761760

762761
if (ceph_opts->flags & CEPH_OPT_NOSHARE)
763762
return NULL;
764763

765764
spin_lock(&rbd_client_list_lock);
766-
list_for_each_entry(client_node, &rbd_client_list, node) {
767-
if (!ceph_compare_options(ceph_opts, client_node->client)) {
768-
__rbd_get_client(client_node);
765+
list_for_each_entry(iter, &rbd_client_list, node) {
766+
if (!ceph_compare_options(ceph_opts, iter->client)) {
767+
__rbd_get_client(iter);
769768

770-
found = true;
769+
rbdc = iter;
771770
break;
772771
}
773772
}
774773
spin_unlock(&rbd_client_list_lock);
775774

776-
return found ? client_node : NULL;
775+
return rbdc;
777776
}
778777

779778
/*

fs/ceph/addr.c

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
256256
struct iov_iter iter;
257257
ssize_t err = 0;
258258
size_t len;
259+
int mode;
259260

260261
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
261262
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
@@ -264,7 +265,8 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
264265
goto out;
265266

266267
/* We need to fetch the inline data. */
267-
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
268+
mode = ceph_try_to_choose_auth_mds(inode, CEPH_STAT_CAP_INLINE_DATA);
269+
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, mode);
268270
if (IS_ERR(req)) {
269271
err = PTR_ERR(req);
270272
goto out;
@@ -604,8 +606,10 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
604606
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
605607
ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
606608
true);
607-
if (IS_ERR(req))
609+
if (IS_ERR(req)) {
610+
redirty_page_for_writepage(wbc, page);
608611
return PTR_ERR(req);
612+
}
609613

610614
set_page_writeback(page);
611615
if (caching)
@@ -1644,18 +1648,31 @@ int ceph_uninline_data(struct file *file)
16441648
struct inode *inode = file_inode(file);
16451649
struct ceph_inode_info *ci = ceph_inode(inode);
16461650
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
1647-
struct ceph_osd_request *req;
1651+
struct ceph_osd_request *req = NULL;
16481652
struct ceph_cap_flush *prealloc_cf;
16491653
struct folio *folio = NULL;
16501654
u64 inline_version = CEPH_INLINE_NONE;
16511655
struct page *pages[1];
16521656
int err = 0;
16531657
u64 len;
16541658

1659+
spin_lock(&ci->i_ceph_lock);
1660+
inline_version = ci->i_inline_version;
1661+
spin_unlock(&ci->i_ceph_lock);
1662+
1663+
dout("uninline_data %p %llx.%llx inline_version %llu\n",
1664+
inode, ceph_vinop(inode), inline_version);
1665+
1666+
if (inline_version == CEPH_INLINE_NONE)
1667+
return 0;
1668+
16551669
prealloc_cf = ceph_alloc_cap_flush();
16561670
if (!prealloc_cf)
16571671
return -ENOMEM;
16581672

1673+
if (inline_version == 1) /* initial version, no data */
1674+
goto out_uninline;
1675+
16591676
folio = read_mapping_folio(inode->i_mapping, 0, file);
16601677
if (IS_ERR(folio)) {
16611678
err = PTR_ERR(folio);
@@ -1664,17 +1681,6 @@ int ceph_uninline_data(struct file *file)
16641681

16651682
folio_lock(folio);
16661683

1667-
spin_lock(&ci->i_ceph_lock);
1668-
inline_version = ci->i_inline_version;
1669-
spin_unlock(&ci->i_ceph_lock);
1670-
1671-
dout("uninline_data %p %llx.%llx inline_version %llu\n",
1672-
inode, ceph_vinop(inode), inline_version);
1673-
1674-
if (inline_version == 1 || /* initial version, no data */
1675-
inline_version == CEPH_INLINE_NONE)
1676-
goto out_unlock;
1677-
16781684
len = i_size_read(inode);
16791685
if (len > folio_size(folio))
16801686
len = folio_size(folio);
@@ -1739,6 +1745,7 @@ int ceph_uninline_data(struct file *file)
17391745
ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
17401746
req->r_end_latency, len, err);
17411747

1748+
out_uninline:
17421749
if (!err) {
17431750
int dirty;
17441751

@@ -1757,8 +1764,10 @@ int ceph_uninline_data(struct file *file)
17571764
if (err == -ECANCELED)
17581765
err = 0;
17591766
out_unlock:
1760-
folio_unlock(folio);
1761-
folio_put(folio);
1767+
if (folio) {
1768+
folio_unlock(folio);
1769+
folio_put(folio);
1770+
}
17621771
out:
17631772
ceph_free_cap_flush(prealloc_cf);
17641773
dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
@@ -1777,7 +1786,6 @@ int ceph_mmap(struct file *file, struct vm_area_struct *vma)
17771786

17781787
if (!mapping->a_ops->read_folio)
17791788
return -ENOEXEC;
1780-
file_accessed(file);
17811789
vma->vm_ops = &ceph_vmops;
17821790
return 0;
17831791
}

fs/ceph/caps.c

Lines changed: 47 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1577,7 +1577,7 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
15771577

15781578
while (first_tid <= last_tid) {
15791579
struct ceph_cap *cap = ci->i_auth_cap;
1580-
struct ceph_cap_flush *cf;
1580+
struct ceph_cap_flush *cf = NULL, *iter;
15811581
int ret;
15821582

15831583
if (!(cap && cap->session == session)) {
@@ -1587,8 +1587,9 @@ static void __ceph_flush_snaps(struct ceph_inode_info *ci,
15871587
}
15881588

15891589
ret = -ENOENT;
1590-
list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
1591-
if (cf->tid >= first_tid) {
1590+
list_for_each_entry(iter, &ci->i_cap_flush_list, i_list) {
1591+
if (iter->tid >= first_tid) {
1592+
cf = iter;
15921593
ret = 0;
15931594
break;
15941595
}
@@ -1910,6 +1911,7 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
19101911
struct rb_node *p;
19111912
bool queue_invalidate = false;
19121913
bool tried_invalidate = false;
1914+
bool queue_writeback = false;
19131915

19141916
if (session)
19151917
ceph_get_mds_session(session);
@@ -2062,10 +2064,27 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
20622064
}
20632065

20642066
/* completed revocation? going down and there are no caps? */
2065-
if (revoking && (revoking & cap_used) == 0) {
2066-
dout("completed revocation of %s\n",
2067-
ceph_cap_string(cap->implemented & ~cap->issued));
2068-
goto ack;
2067+
if (revoking) {
2068+
if ((revoking & cap_used) == 0) {
2069+
dout("completed revocation of %s\n",
2070+
ceph_cap_string(cap->implemented & ~cap->issued));
2071+
goto ack;
2072+
}
2073+
2074+
/*
2075+
* If the "i_wrbuffer_ref" was increased by mmap or generic
2076+
* cache write just before the ceph_check_caps() is called,
2077+
* the Fb capability revoking will fail this time. Then we
2078+
* must wait for the BDI's delayed work to flush the dirty
2079+
* pages and to release the "i_wrbuffer_ref", which will cost
2080+
* at most 5 seconds. That means the MDS needs to wait at
2081+
* most 5 seconds to finished the Fb capability's revocation.
2082+
*
2083+
* Let's queue a writeback for it.
2084+
*/
2085+
if (S_ISREG(inode->i_mode) && ci->i_wrbuffer_ref &&
2086+
(revoking & CEPH_CAP_FILE_BUFFER))
2087+
queue_writeback = true;
20692088
}
20702089

20712090
/* want more caps from mds? */
@@ -2135,6 +2154,8 @@ void ceph_check_caps(struct ceph_inode_info *ci, int flags,
21352154
spin_unlock(&ci->i_ceph_lock);
21362155

21372156
ceph_put_mds_session(session);
2157+
if (queue_writeback)
2158+
ceph_queue_writeback(inode);
21382159
if (queue_invalidate)
21392160
ceph_queue_invalidate(inode);
21402161
}
@@ -2218,9 +2239,9 @@ static int caps_are_flushed(struct inode *inode, u64 flush_tid)
22182239
}
22192240

22202241
/*
2221-
* wait for any unsafe requests to complete.
2242+
* flush the mdlog and wait for any unsafe requests to complete.
22222243
*/
2223-
static int unsafe_request_wait(struct inode *inode)
2244+
static int flush_mdlog_and_wait_inode_unsafe_requests(struct inode *inode)
22242245
{
22252246
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
22262247
struct ceph_inode_info *ci = ceph_inode(inode);
@@ -2336,7 +2357,7 @@ static int unsafe_request_wait(struct inode *inode)
23362357
kfree(sessions);
23372358
}
23382359

2339-
dout("unsafe_request_wait %p wait on tid %llu %llu\n",
2360+
dout("%s %p wait on tid %llu %llu\n", __func__,
23402361
inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
23412362
if (req1) {
23422363
ret = !wait_for_completion_timeout(&req1->r_safe_completion,
@@ -2380,7 +2401,7 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
23802401
dirty = try_flush_caps(inode, &flush_tid);
23812402
dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
23822403

2383-
err = unsafe_request_wait(inode);
2404+
err = flush_mdlog_and_wait_inode_unsafe_requests(inode);
23842405

23852406
/*
23862407
* only wait on non-file metadata writeback (the mds
@@ -3182,10 +3203,9 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
31823203
struct ceph_snap_context *snapc)
31833204
{
31843205
struct inode *inode = &ci->vfs_inode;
3185-
struct ceph_cap_snap *capsnap = NULL;
3206+
struct ceph_cap_snap *capsnap = NULL, *iter;
31863207
int put = 0;
31873208
bool last = false;
3188-
bool found = false;
31893209
bool flush_snaps = false;
31903210
bool complete_capsnap = false;
31913211

@@ -3212,14 +3232,14 @@ void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
32123232
ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
32133233
last ? " LAST" : "");
32143234
} else {
3215-
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
3216-
if (capsnap->context == snapc) {
3217-
found = true;
3235+
list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
3236+
if (iter->context == snapc) {
3237+
capsnap = iter;
32183238
break;
32193239
}
32203240
}
32213241

3222-
if (!found) {
3242+
if (!capsnap) {
32233243
/*
32243244
* The capsnap should already be removed when removing
32253245
* auth cap in the case of a forced unmount.
@@ -3769,35 +3789,34 @@ static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
37693789
struct ceph_inode_info *ci = ceph_inode(inode);
37703790
struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
37713791
u64 follows = le64_to_cpu(m->snap_follows);
3772-
struct ceph_cap_snap *capsnap;
3773-
bool flushed = false;
3792+
struct ceph_cap_snap *capsnap = NULL, *iter;
37743793
bool wake_ci = false;
37753794
bool wake_mdsc = false;
37763795

37773796
dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
37783797
inode, ci, session->s_mds, follows);
37793798

37803799
spin_lock(&ci->i_ceph_lock);
3781-
list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
3782-
if (capsnap->follows == follows) {
3783-
if (capsnap->cap_flush.tid != flush_tid) {
3800+
list_for_each_entry(iter, &ci->i_cap_snaps, ci_item) {
3801+
if (iter->follows == follows) {
3802+
if (iter->cap_flush.tid != flush_tid) {
37843803
dout(" cap_snap %p follows %lld tid %lld !="
3785-
" %lld\n", capsnap, follows,
3786-
flush_tid, capsnap->cap_flush.tid);
3804+
" %lld\n", iter, follows,
3805+
flush_tid, iter->cap_flush.tid);
37873806
break;
37883807
}
3789-
flushed = true;
3808+
capsnap = iter;
37903809
break;
37913810
} else {
37923811
dout(" skipping cap_snap %p follows %lld\n",
3793-
capsnap, capsnap->follows);
3812+
iter, iter->follows);
37943813
}
37953814
}
3796-
if (flushed)
3815+
if (capsnap)
37973816
ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
37983817
spin_unlock(&ci->i_ceph_lock);
37993818

3800-
if (flushed) {
3819+
if (capsnap) {
38013820
ceph_put_snap_context(capsnap->context);
38023821
ceph_put_cap_snap(capsnap);
38033822
if (wake_ci)

0 commit comments

Comments
 (0)