Skip to content

Commit 48a8f78

Browse files
puranjaymohanpalmer-dabbelt
authored andcommitted
bpf, riscv: use prog pack allocator in the BPF JIT
Use bpf_jit_binary_pack_alloc() for memory management of JIT binaries in RISCV BPF JIT. The bpf_jit_binary_pack_alloc creates a pair of RW and RX buffers. The JIT writes the program into the RW buffer. When the JIT is done, the program is copied to the final RX buffer with bpf_jit_binary_pack_finalize. Implement bpf_arch_text_copy() and bpf_arch_text_invalidate() for RISCV JIT as these functions are required by bpf_jit_binary_pack allocator. Signed-off-by: Puranjay Mohan <puranjay12@gmail.com> Reviewed-by: Song Liu <song@kernel.org> Reviewed-by: Pu Lehui <pulehui@huawei.com> Acked-by: Björn Töpel <bjorn@kernel.org> Tested-by: Björn Töpel <bjorn@rivosinc.com> Acked-by: Daniel Borkmann <daniel@iogearbox.net> Link: https://lore.kernel.org/r/20230831131229.497941-5-puranjay12@gmail.com Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
1 parent cad539b commit 48a8f78

File tree

3 files changed

+141
-28
lines changed

3 files changed

+141
-28
lines changed

arch/riscv/net/bpf_jit.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ static inline bool is_creg(u8 reg)
6868
struct rv_jit_context {
6969
struct bpf_prog *prog;
7070
u16 *insns; /* RV insns */
71+
u16 *ro_insns;
7172
int ninsns;
7273
int prologue_len;
7374
int epilogue_offset;
@@ -85,7 +86,9 @@ static inline int ninsns_rvoff(int ninsns)
8586

8687
struct rv_jit_data {
8788
struct bpf_binary_header *header;
89+
struct bpf_binary_header *ro_header;
8890
u8 *image;
91+
u8 *ro_image;
8992
struct rv_jit_context ctx;
9093
};
9194

arch/riscv/net/bpf_jit_comp64.c

Lines changed: 48 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,11 @@ static bool in_auipc_jalr_range(s64 val)
144144
/* Emit fixed-length instructions for address */
145145
static int emit_addr(u8 rd, u64 addr, bool extra_pass, struct rv_jit_context *ctx)
146146
{
147-
u64 ip = (u64)(ctx->insns + ctx->ninsns);
147+
/*
148+
* Use the ro_insns(RX) to calculate the offset as the BPF program will
149+
* finally run from this memory region.
150+
*/
151+
u64 ip = (u64)(ctx->ro_insns + ctx->ninsns);
148152
s64 off = addr - ip;
149153
s64 upper = (off + (1 << 11)) >> 12;
150154
s64 lower = off & 0xfff;
@@ -464,8 +468,12 @@ static int emit_call(u64 addr, bool fixed_addr, struct rv_jit_context *ctx)
464468
s64 off = 0;
465469
u64 ip;
466470

467-
if (addr && ctx->insns) {
468-
ip = (u64)(long)(ctx->insns + ctx->ninsns);
471+
if (addr && ctx->insns && ctx->ro_insns) {
472+
/*
473+
* Use the ro_insns(RX) to calculate the offset as the BPF
474+
* program will finally run from this memory region.
475+
*/
476+
ip = (u64)(long)(ctx->ro_insns + ctx->ninsns);
469477
off = addr - ip;
470478
}
471479

@@ -578,9 +586,10 @@ static int add_exception_handler(const struct bpf_insn *insn,
578586
{
579587
struct exception_table_entry *ex;
580588
unsigned long pc;
581-
off_t offset;
589+
off_t ins_offset;
590+
off_t fixup_offset;
582591

583-
if (!ctx->insns || !ctx->prog->aux->extable ||
592+
if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable ||
584593
(BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX))
585594
return 0;
586595

@@ -594,12 +603,17 @@ static int add_exception_handler(const struct bpf_insn *insn,
594603
return -EINVAL;
595604

596605
ex = &ctx->prog->aux->extable[ctx->nexentries];
597-
pc = (unsigned long)&ctx->insns[ctx->ninsns - insn_len];
606+
pc = (unsigned long)&ctx->ro_insns[ctx->ninsns - insn_len];
598607

599-
offset = pc - (long)&ex->insn;
600-
if (WARN_ON_ONCE(offset >= 0 || offset < INT_MIN))
608+
/*
609+
* This is the relative offset of the instruction that may fault from
610+
* the exception table itself. This will be written to the exception
611+
* table and if this instruction faults, the destination register will
612+
* be set to '0' and the execution will jump to the next instruction.
613+
*/
614+
ins_offset = pc - (long)&ex->insn;
615+
if (WARN_ON_ONCE(ins_offset >= 0 || ins_offset < INT_MIN))
601616
return -ERANGE;
602-
ex->insn = offset;
603617

604618
/*
605619
* Since the extable follows the program, the fixup offset is always
@@ -608,12 +622,25 @@ static int add_exception_handler(const struct bpf_insn *insn,
608622
* bits. We don't need to worry about buildtime or runtime sort
609623
* modifying the upper bits because the table is already sorted, and
610624
* isn't part of the main exception table.
625+
*
626+
* The fixup_offset is set to the next instruction from the instruction
627+
* that may fault. The execution will jump to this after handling the
628+
* fault.
611629
*/
612-
offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
613-
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, offset))
630+
fixup_offset = (long)&ex->fixup - (pc + insn_len * sizeof(u16));
631+
if (!FIELD_FIT(BPF_FIXUP_OFFSET_MASK, fixup_offset))
614632
return -ERANGE;
615633

616-
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, offset) |
634+
/*
635+
* The offsets above have been calculated using the RO buffer but we
636+
* need to use the R/W buffer for writes.
637+
* switch ex to rw buffer for writing.
638+
*/
639+
ex = (void *)ctx->insns + ((void *)ex - (void *)ctx->ro_insns);
640+
641+
ex->insn = ins_offset;
642+
643+
ex->fixup = FIELD_PREP(BPF_FIXUP_OFFSET_MASK, fixup_offset) |
617644
FIELD_PREP(BPF_FIXUP_REG_MASK, dst_reg);
618645
ex->type = EX_TYPE_BPF;
619646

@@ -1007,6 +1034,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
10071034

10081035
ctx.ninsns = 0;
10091036
ctx.insns = NULL;
1037+
ctx.ro_insns = NULL;
10101038
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
10111039
if (ret < 0)
10121040
return ret;
@@ -1015,7 +1043,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
10151043
return -EFBIG;
10161044

10171045
ctx.ninsns = 0;
1046+
/*
1047+
* The bpf_int_jit_compile() uses a RW buffer (ctx.insns) to write the
1048+
* JITed instructions and later copies it to a RX region (ctx.ro_insns).
1049+
* It also uses ctx.ro_insns to calculate offsets for jumps etc. As the
1050+
* trampoline image uses the same memory area for writing and execution,
1051+
* both ctx.insns and ctx.ro_insns can be set to image.
1052+
*/
10181053
ctx.insns = image;
1054+
ctx.ro_insns = image;
10191055
ret = __arch_prepare_bpf_trampoline(im, m, tlinks, func_addr, flags, &ctx);
10201056
if (ret < 0)
10211057
return ret;

arch/riscv/net/bpf_jit_core.c

Lines changed: 90 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
#include <linux/bpf.h>
1010
#include <linux/filter.h>
11+
#include <linux/memory.h>
12+
#include <asm/patch.h>
1113
#include "bpf_jit.h"
1214

1315
/* Number of iterations to try until offsets converge. */
@@ -117,16 +119,24 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
117119
sizeof(struct exception_table_entry);
118120
prog_size = sizeof(*ctx->insns) * ctx->ninsns;
119121

120-
jit_data->header =
121-
bpf_jit_binary_alloc(prog_size + extable_size,
122-
&jit_data->image,
123-
sizeof(u32),
124-
bpf_fill_ill_insns);
125-
if (!jit_data->header) {
122+
jit_data->ro_header =
123+
bpf_jit_binary_pack_alloc(prog_size + extable_size,
124+
&jit_data->ro_image, sizeof(u32),
125+
&jit_data->header, &jit_data->image,
126+
bpf_fill_ill_insns);
127+
if (!jit_data->ro_header) {
126128
prog = orig_prog;
127129
goto out_offset;
128130
}
129131

132+
/*
133+
* Use the image(RW) for writing the JITed instructions. But also save
134+
* the ro_image(RX) for calculating the offsets in the image. The RW
135+
* image will be later copied to the RX image from where the program
136+
* will run. The bpf_jit_binary_pack_finalize() will do this copy in the
137+
* final step.
138+
*/
139+
ctx->ro_insns = (u16 *)jit_data->ro_image;
130140
ctx->insns = (u16 *)jit_data->image;
131141
/*
132142
* Now, when the image is allocated, the image can
@@ -138,14 +148,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
138148

139149
if (i == NR_JIT_ITERATIONS) {
140150
pr_err("bpf-jit: image did not converge in <%d passes!\n", i);
141-
if (jit_data->header)
142-
bpf_jit_binary_free(jit_data->header);
143151
prog = orig_prog;
144-
goto out_offset;
152+
goto out_free_hdr;
145153
}
146154

147155
if (extable_size)
148-
prog->aux->extable = (void *)ctx->insns + prog_size;
156+
prog->aux->extable = (void *)ctx->ro_insns + prog_size;
149157

150158
skip_init_ctx:
151159
pass++;
@@ -154,23 +162,33 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
154162

155163
bpf_jit_build_prologue(ctx);
156164
if (build_body(ctx, extra_pass, NULL)) {
157-
bpf_jit_binary_free(jit_data->header);
158165
prog = orig_prog;
159-
goto out_offset;
166+
goto out_free_hdr;
160167
}
161168
bpf_jit_build_epilogue(ctx);
162169

163170
if (bpf_jit_enable > 1)
164171
bpf_jit_dump(prog->len, prog_size, pass, ctx->insns);
165172

166-
prog->bpf_func = (void *)ctx->insns;
173+
prog->bpf_func = (void *)ctx->ro_insns;
167174
prog->jited = 1;
168175
prog->jited_len = prog_size;
169176

170-
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
171-
172177
if (!prog->is_func || extra_pass) {
173-
bpf_jit_binary_lock_ro(jit_data->header);
178+
if (WARN_ON(bpf_jit_binary_pack_finalize(prog, jit_data->ro_header,
179+
jit_data->header))) {
180+
/* ro_header has been freed */
181+
jit_data->ro_header = NULL;
182+
prog = orig_prog;
183+
goto out_offset;
184+
}
185+
/*
186+
* The instructions have now been copied to the ROX region from
187+
* where they will execute.
188+
* Write any modified data cache blocks out to memory and
189+
* invalidate the corresponding blocks in the instruction cache.
190+
*/
191+
bpf_flush_icache(jit_data->ro_header, ctx->ro_insns + ctx->ninsns);
174192
for (i = 0; i < prog->len; i++)
175193
ctx->offset[i] = ninsns_rvoff(ctx->offset[i]);
176194
bpf_prog_fill_jited_linfo(prog, ctx->offset);
@@ -185,6 +203,14 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
185203
bpf_jit_prog_release_other(prog, prog == orig_prog ?
186204
tmp : orig_prog);
187205
return prog;
206+
207+
out_free_hdr:
208+
if (jit_data->header) {
209+
bpf_arch_text_copy(&jit_data->ro_header->size, &jit_data->header->size,
210+
sizeof(jit_data->header->size));
211+
bpf_jit_binary_pack_free(jit_data->ro_header, jit_data->header);
212+
}
213+
goto out_offset;
188214
}
189215

190216
u64 bpf_jit_alloc_exec_limit(void)
@@ -204,3 +230,51 @@ void bpf_jit_free_exec(void *addr)
204230
{
205231
return vfree(addr);
206232
}
233+
234+
void *bpf_arch_text_copy(void *dst, void *src, size_t len)
235+
{
236+
int ret;
237+
238+
mutex_lock(&text_mutex);
239+
ret = patch_text_nosync(dst, src, len);
240+
mutex_unlock(&text_mutex);
241+
242+
if (ret)
243+
return ERR_PTR(-EINVAL);
244+
245+
return dst;
246+
}
247+
248+
int bpf_arch_text_invalidate(void *dst, size_t len)
249+
{
250+
int ret;
251+
252+
mutex_lock(&text_mutex);
253+
ret = patch_text_set_nosync(dst, 0, len);
254+
mutex_unlock(&text_mutex);
255+
256+
return ret;
257+
}
258+
259+
void bpf_jit_free(struct bpf_prog *prog)
260+
{
261+
if (prog->jited) {
262+
struct rv_jit_data *jit_data = prog->aux->jit_data;
263+
struct bpf_binary_header *hdr;
264+
265+
/*
266+
* If we fail the final pass of JIT (from jit_subprogs),
267+
* the program may not be finalized yet. Call finalize here
268+
* before freeing it.
269+
*/
270+
if (jit_data) {
271+
bpf_jit_binary_pack_finalize(prog, jit_data->ro_header, jit_data->header);
272+
kfree(jit_data);
273+
}
274+
hdr = bpf_jit_binary_pack_hdr(prog);
275+
bpf_jit_binary_pack_free(hdr, NULL);
276+
WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog));
277+
}
278+
279+
bpf_prog_unlock_free(prog);
280+
}

0 commit comments

Comments
 (0)