Skip to content

Commit 0083340

Browse files
committed
Merge tag '9p-for-6.1' of https://github.com/martinetd/linux
Pull 9p updates from Dominique Martinet: "Smaller buffers for small messages and fixes. The highlight of this is Christian's patch to allocate smaller buffers for most metadata requests: 9p with a big msize would try to allocate large buffers when just 4 or 8k would be more than enough; this brings in nice performance improvements. There's also a few fixes for problems reported by syzkaller (thanks to Schspa Shi, Tetsuo Handa for tests and feedback/patches) as well as some minor cleanup" * tag '9p-for-6.1' of https://github.com/martinetd/linux: net/9p: clarify trans_fd parse_opt failure handling net/9p: add __init/__exit annotations to module init/exit funcs net/9p: use a dedicated spinlock for trans_fd 9p/trans_fd: always use O_NONBLOCK read/write net/9p: allocate appropriate reduced message buffers net/9p: add 'pooled_rbuffers' flag to struct p9_trans_module net/9p: add p9_msg_buf_size() 9p: add P9_ERRMAX for 9p2000 and 9p2000.u net/9p: split message size argument into 't_size' and 'r_size' pair 9p: trans_fd/p9_conn_cancel: drop client lock earlier
2 parents 288fc86 + a8e633c commit 0083340

File tree

9 files changed

+254
-28
lines changed

9 files changed

+254
-28
lines changed

include/net/9p/9p.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,9 @@ enum p9_qid_t {
331331
/* size of header for zero copy read/write */
332332
#define P9_ZC_HDR_SZ 4096
333333

334+
/* maximum length of an error string */
335+
#define P9_ERRMAX 128
336+
334337
/**
335338
* struct p9_qid - file system entity information
336339
* @type: 8-bit type &p9_qid_t

include/net/9p/transport.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
* @list: used to maintain a list of currently available transports
2020
* @name: the human-readable name of the transport
2121
* @maxsize: transport provided maximum packet size
22+
* @pooled_rbuffers: currently only set for RDMA transport which pulls the
23+
* response buffers from a shared pool, and accordingly
24+
* we're less flexible when choosing the response message
25+
* size in this case
2226
* @def: set if this transport should be considered the default
2327
* @create: member function to create a new connection on this transport
2428
* @close: member function to discard a connection on this transport
@@ -38,6 +42,7 @@ struct p9_trans_module {
3842
struct list_head list;
3943
char *name; /* name of transport */
4044
int maxsize; /* max message size of transport */
45+
bool pooled_rbuffers;
4146
int def; /* this transport should be default */
4247
struct module *owner;
4348
int (*create)(struct p9_client *client,

net/9p/client.c

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -255,24 +255,42 @@ static struct kmem_cache *p9_req_cache;
255255
* p9_tag_alloc - Allocate a new request.
256256
* @c: Client session.
257257
* @type: Transaction type.
258-
* @max_size: Maximum packet size for this request.
258+
* @t_size: Buffer size for holding this request
259+
* (automatic calculation by format template if 0).
260+
* @r_size: Buffer size for holding server's reply on this request
261+
* (automatic calculation by format template if 0).
262+
* @fmt: Format template for assembling 9p request message
263+
* (see p9pdu_vwritef).
264+
* @ap: Variable arguments to be fed to passed format template
265+
* (see p9pdu_vwritef).
259266
*
260267
* Context: Process context.
261268
* Return: Pointer to new request.
262269
*/
263270
static struct p9_req_t *
264-
p9_tag_alloc(struct p9_client *c, int8_t type, unsigned int max_size)
271+
p9_tag_alloc(struct p9_client *c, int8_t type, uint t_size, uint r_size,
272+
const char *fmt, va_list ap)
265273
{
266274
struct p9_req_t *req = kmem_cache_alloc(p9_req_cache, GFP_NOFS);
267-
int alloc_msize = min(c->msize, max_size);
275+
int alloc_tsize;
276+
int alloc_rsize;
268277
int tag;
278+
va_list apc;
279+
280+
va_copy(apc, ap);
281+
alloc_tsize = min_t(size_t, c->msize,
282+
t_size ?: p9_msg_buf_size(c, type, fmt, apc));
283+
va_end(apc);
284+
285+
alloc_rsize = min_t(size_t, c->msize,
286+
r_size ?: p9_msg_buf_size(c, type + 1, fmt, ap));
269287

270288
if (!req)
271289
return ERR_PTR(-ENOMEM);
272290

273-
if (p9_fcall_init(c, &req->tc, alloc_msize))
291+
if (p9_fcall_init(c, &req->tc, alloc_tsize))
274292
goto free_req;
275-
if (p9_fcall_init(c, &req->rc, alloc_msize))
293+
if (p9_fcall_init(c, &req->rc, alloc_rsize))
276294
goto free;
277295

278296
p9pdu_reset(&req->tc);
@@ -592,11 +610,12 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
592610
}
593611

594612
static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
595-
int8_t type, int req_size,
613+
int8_t type, uint t_size, uint r_size,
596614
const char *fmt, va_list ap)
597615
{
598616
int err;
599617
struct p9_req_t *req;
618+
va_list apc;
600619

601620
p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);
602621

@@ -608,7 +627,9 @@ static struct p9_req_t *p9_client_prepare_req(struct p9_client *c,
608627
if (c->status == BeginDisconnect && type != P9_TCLUNK)
609628
return ERR_PTR(-EIO);
610629

611-
req = p9_tag_alloc(c, type, req_size);
630+
va_copy(apc, ap);
631+
req = p9_tag_alloc(c, type, t_size, r_size, fmt, apc);
632+
va_end(apc);
612633
if (IS_ERR(req))
613634
return req;
614635

@@ -643,9 +664,18 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
643664
int sigpending, err;
644665
unsigned long flags;
645666
struct p9_req_t *req;
667+
/* Passing zero for tsize/rsize to p9_client_prepare_req() tells it to
668+
* auto determine an appropriate (small) request/response size
669+
* according to actual message data being sent. Currently RDMA
670+
* transport is excluded from this response message size optimization,
671+
* as it would not cope with it, due to its pooled response buffers
672+
* (using an optimized request size for RDMA as well though).
673+
*/
674+
const uint tsize = 0;
675+
const uint rsize = c->trans_mod->pooled_rbuffers ? c->msize : 0;
646676

647677
va_start(ap, fmt);
648-
req = p9_client_prepare_req(c, type, c->msize, fmt, ap);
678+
req = p9_client_prepare_req(c, type, tsize, rsize, fmt, ap);
649679
va_end(ap);
650680
if (IS_ERR(req))
651681
return req;
@@ -743,7 +773,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
743773
/* We allocate a inline protocol data of only 4k bytes.
744774
* The actual content is passed in zero-copy fashion.
745775
*/
746-
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap);
776+
req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, P9_ZC_HDR_SZ, fmt, ap);
747777
va_end(ap);
748778
if (IS_ERR(req))
749779
return req;

net/9p/protocol.c

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,173 @@
2323

2424
#include <trace/events/9p.h>
2525

26+
/* len[2] text[len] */
27+
#define P9_STRLEN(s) \
28+
(2 + min_t(size_t, s ? strlen(s) : 0, USHRT_MAX))
29+
30+
/**
31+
* p9_msg_buf_size - Returns a buffer size sufficiently large to hold the
32+
* intended 9p message.
33+
* @c: client
34+
* @type: message type
35+
* @fmt: format template for assembling request message
36+
* (see p9pdu_vwritef)
37+
* @ap: variable arguments to be fed to passed format template
38+
* (see p9pdu_vwritef)
39+
*
40+
* Note: Even for response types (P9_R*) the format template and variable
41+
* arguments must always be for the originating request type (P9_T*).
42+
*/
43+
size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
44+
const char *fmt, va_list ap)
45+
{
46+
/* size[4] type[1] tag[2] */
47+
const int hdr = 4 + 1 + 2;
48+
/* ename[s] errno[4] */
49+
const int rerror_size = hdr + P9_ERRMAX + 4;
50+
/* ecode[4] */
51+
const int rlerror_size = hdr + 4;
52+
const int err_size =
53+
c->proto_version == p9_proto_2000L ? rlerror_size : rerror_size;
54+
55+
static_assert(NAME_MAX <= 4*1024, "p9_msg_buf_size() currently assumes "
56+
"a max. allowed directory entry name length of 4k");
57+
58+
switch (type) {
59+
60+
/* message types not used at all */
61+
case P9_TERROR:
62+
case P9_TLERROR:
63+
case P9_TAUTH:
64+
case P9_RAUTH:
65+
BUG();
66+
67+
/* variable length & potentially large message types */
68+
case P9_TATTACH:
69+
BUG_ON(strcmp("ddss?u", fmt));
70+
va_arg(ap, int32_t);
71+
va_arg(ap, int32_t);
72+
{
73+
const char *uname = va_arg(ap, const char *);
74+
const char *aname = va_arg(ap, const char *);
75+
/* fid[4] afid[4] uname[s] aname[s] n_uname[4] */
76+
return hdr + 4 + 4 + P9_STRLEN(uname) + P9_STRLEN(aname) + 4;
77+
}
78+
case P9_TWALK:
79+
BUG_ON(strcmp("ddT", fmt));
80+
va_arg(ap, int32_t);
81+
va_arg(ap, int32_t);
82+
{
83+
uint i, nwname = va_arg(ap, int);
84+
size_t wname_all;
85+
const char **wnames = va_arg(ap, const char **);
86+
for (i = 0, wname_all = 0; i < nwname; ++i) {
87+
wname_all += P9_STRLEN(wnames[i]);
88+
}
89+
/* fid[4] newfid[4] nwname[2] nwname*(wname[s]) */
90+
return hdr + 4 + 4 + 2 + wname_all;
91+
}
92+
case P9_RWALK:
93+
BUG_ON(strcmp("ddT", fmt));
94+
va_arg(ap, int32_t);
95+
va_arg(ap, int32_t);
96+
{
97+
uint nwname = va_arg(ap, int);
98+
/* nwqid[2] nwqid*(wqid[13]) */
99+
return max_t(size_t, hdr + 2 + nwname * 13, err_size);
100+
}
101+
case P9_TCREATE:
102+
BUG_ON(strcmp("dsdb?s", fmt));
103+
va_arg(ap, int32_t);
104+
{
105+
const char *name = va_arg(ap, const char *);
106+
if (c->proto_version == p9_proto_legacy) {
107+
/* fid[4] name[s] perm[4] mode[1] */
108+
return hdr + 4 + P9_STRLEN(name) + 4 + 1;
109+
} else {
110+
va_arg(ap, int32_t);
111+
va_arg(ap, int);
112+
{
113+
const char *ext = va_arg(ap, const char *);
114+
/* fid[4] name[s] perm[4] mode[1] extension[s] */
115+
return hdr + 4 + P9_STRLEN(name) + 4 + 1 + P9_STRLEN(ext);
116+
}
117+
}
118+
}
119+
case P9_TLCREATE:
120+
BUG_ON(strcmp("dsddg", fmt));
121+
va_arg(ap, int32_t);
122+
{
123+
const char *name = va_arg(ap, const char *);
124+
/* fid[4] name[s] flags[4] mode[4] gid[4] */
125+
return hdr + 4 + P9_STRLEN(name) + 4 + 4 + 4;
126+
}
127+
case P9_RREAD:
128+
case P9_RREADDIR:
129+
BUG_ON(strcmp("dqd", fmt));
130+
va_arg(ap, int32_t);
131+
va_arg(ap, int64_t);
132+
{
133+
const int32_t count = va_arg(ap, int32_t);
134+
/* count[4] data[count] */
135+
return max_t(size_t, hdr + 4 + count, err_size);
136+
}
137+
case P9_TWRITE:
138+
BUG_ON(strcmp("dqV", fmt));
139+
va_arg(ap, int32_t);
140+
va_arg(ap, int64_t);
141+
{
142+
const int32_t count = va_arg(ap, int32_t);
143+
/* fid[4] offset[8] count[4] data[count] */
144+
return hdr + 4 + 8 + 4 + count;
145+
}
146+
case P9_TRENAMEAT:
147+
BUG_ON(strcmp("dsds", fmt));
148+
va_arg(ap, int32_t);
149+
{
150+
const char *oldname, *newname;
151+
oldname = va_arg(ap, const char *);
152+
va_arg(ap, int32_t);
153+
newname = va_arg(ap, const char *);
154+
/* olddirfid[4] oldname[s] newdirfid[4] newname[s] */
155+
return hdr + 4 + P9_STRLEN(oldname) + 4 + P9_STRLEN(newname);
156+
}
157+
case P9_TSYMLINK:
158+
BUG_ON(strcmp("dssg", fmt));
159+
va_arg(ap, int32_t);
160+
{
161+
const char *name = va_arg(ap, const char *);
162+
const char *symtgt = va_arg(ap, const char *);
163+
/* fid[4] name[s] symtgt[s] gid[4] */
164+
return hdr + 4 + P9_STRLEN(name) + P9_STRLEN(symtgt) + 4;
165+
}
166+
167+
case P9_RERROR:
168+
return rerror_size;
169+
case P9_RLERROR:
170+
return rlerror_size;
171+
172+
/* small message types */
173+
case P9_TWSTAT:
174+
case P9_RSTAT:
175+
case P9_RREADLINK:
176+
case P9_TXATTRWALK:
177+
case P9_TXATTRCREATE:
178+
case P9_TLINK:
179+
case P9_TMKDIR:
180+
case P9_TMKNOD:
181+
case P9_TRENAME:
182+
case P9_TUNLINKAT:
183+
case P9_TLOCK:
184+
return 8 * 1024;
185+
186+
/* tiny message types */
187+
default:
188+
return 4 * 1024;
189+
190+
}
191+
}
192+
26193
static int
27194
p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);
28195

net/9p/protocol.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
* Copyright (C) 2008 by IBM, Corp.
99
*/
1010

11+
size_t p9_msg_buf_size(struct p9_client *c, enum p9_msg_t type,
12+
const char *fmt, va_list ap);
1113
int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
1214
va_list ap);
1315
int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);

0 commit comments

Comments
 (0)