Skip to content

Commit 7301fc8

Browse files
author
Yonghong Song
committed
[RFC][BPF] Support Jump Table
NOTE: We probably need cpu v5 or other flags to enable this feature. We can add it later when necessary. This patch adds jump table support. A new insn 'gotox <reg>' is added to allow goto through a register. The register represents the address in the current section. The function is a concrete example with bpf selftest progs/user_ringbuf_success.c. Compilation command line to generate .s file: ============================================= clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \ -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \ -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \ -I/home/yhs/work/bpf-next/tools/include/uapi \ -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include -std=gnu11 \ -fno-strict-aliasing -Wno-compare-distinct-pointer-types \ -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \ -idirafter /usr/local/include -idirafter /usr/include \ -DENABLE_ATOMICS_TESTS -O2 -S progs/user_ringbuf_success.c \ -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o.s \ --target=bpf -mcpu=v3 The related assembly: read_protocol_msg: ... r3 <<= 3 r1 = .LJTI1_0 ll r1 += r3 r1 = *(u64 *)(r1 + 0) gotox r1 LBB1_4: r1 = *(u64 *)(r0 + 8) goto LBB1_5 LBB1_7: r1 = *(u64 *)(r0 + 8) goto LBB1_8 LBB1_9: w1 = *(u32 *)(r0 + 8) r1 <<= 32 r1 s>>= 32 r2 = kern_mutated ll r3 = *(u64 *)(r2 + 0) r3 *= r1 *(u64 *)(r2 + 0) = r3 goto LBB1_11 LBB1_6: w1 = *(u32 *)(r0 + 8) r1 <<= 32 r1 s>>= 32 LBB1_5: ... .section .rodata,"a",@progbits .p2align 3, 0x0 .LJTI1_0: .quad LBB1_4 .quad LBB1_6 .quad LBB1_7 .quad LBB1_9 ... publish_next_kern_msg: ... r6 <<= 3 r1 = .LJTI6_0 ll r1 += r6 r1 = *(u64 *)(r1 + 0) gotox r1 LBB6_3: ... LBB6_5: ... LBB6_6: ... LBB6_4: ... .section .rodata,"a",@progbits .p2align 3, 0x0 .LJTI6_0: .quad LBB6_3 .quad LBB6_4 .quad LBB6_5 .quad LBB6_6 Now let us look at .o file ========================== clang -g -Wall -Werror -D__TARGET_ARCH_x86 -mlittle-endian \ -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf/tools/include \ -I/home/yhs/work/bpf-next/tools/testing/selftests/bpf \ -I/home/yhs/work/bpf-next/tools/include/uapi \ -I/home/yhs/work/bpf-next/tools/testing/selftests/usr/include \ -std=gnu11 -fno-strict-aliasing -Wno-compare-distinct-pointer-types \ -idirafter /home/yhs/work/llvm-project/llvm/build.21/Release/lib/clang/21/include \ -idirafter /usr/local/include -idirafter /usr/include -DENABLE_ATOMICS_TESTS \ -O2 -c progs/user_ringbuf_success.c \ -o /home/yhs/work/bpf-next/tools/testing/selftests/bpf/user_ringbuf_success.bpf.o \ --target=bpf -mcpu=v3 In obj file, all .rodata sections are merged together. So we have $ llvm-readelf -x '.rodata' user_ringbuf_success.bpf.o Hex dump of section '.rodata': 0x00000000 a8020000 00000000 10030000 00000000 ................ 0x00000010 b8020000 00000000 c8020000 00000000 ................ 0x00000020 40040000 00000000 18050000 00000000 @............... 0x00000030 88040000 00000000 d0040000 00000000 ................ 0x00000040 44726169 6e207265 7475726e 65643a20 Drain returned: 0x00000050 256c640a 00556e65 78706563 7465646c %ld..Unexpectedl 0x00000060 79206661 696c6564 20746f20 67657420 y failed to get 0x00000070 6d73670a 00556e72 65636f67 6e697a65 msg..Unrecognize 0x00000080 64206f70 2025640a 00256c75 20213d20 d op %d..%lu != 0x00000090 256c750a 00627066 5f64796e 7074725f %lu..bpf_dynptr_ 0x000000a0 72656164 28292066 61696c65 643a2025 read() failed: % 0x000000b0 640a0055 6e657870 65637465 646c7920 d..Unexpectedly 0x000000c0 6661696c 65642074 6f20676 74207361 failed to get sa 0x000000d0 6d706c65 0a00 mple.. Let us look at the insns. Some annotation explains details. $ llvm-objdump -Sr user_ringbuf_success.bpf.o .... Disassembly of section .text: 0000000000000000 <read_protocol_msg>: ; msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg)); 0: b4 02 00 00 00 00 00 00 w2 = 0x0 1: b4 03 00 00 10 00 00 00 w3 = 0x10 2: 85 00 00 00 cb 00 00 00 call 0xcb ... 0000000000000268 <handle_sample_msg>: ; switch (msg->msg_op) { 77: 61 13 00 00 00 00 00 00 w3 = *(u32 *)(r1 + 0x0) 78: 26 03 1c 00 03 00 00 00 if w3 > 0x3 goto +0x1c <handle_sample_msg+0xf0> 79: 67 03 00 00 03 00 00 00 r3 <<= 0x3 80: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll 0000000000000280: R_BPF_64_64 .rodata <=== r2 will be the address of .rodata with offset 0. <=== look at the first 32 bytes of .rodata: 0x00000000 a8020000 00000000 10030000 00000000 ................ 0x00000010 b8020000 00000000 c8020000 00000000 ................ The four actual addresses are 0x2a8: insn idx 0x2a8/8 = 85 0x310: insn idx 0x310/8 = 98 0x2b8: insn idx 0x2b8/8 = 87 0x2c8: insn idx 0x2c8/8 = 89 82: 0f 32 00 00 00 00 00 00 r2 += r3 83: 79 22 00 00 00 00 00 00 r2 = *(u64 *)(r2 + 0x0) 84: 0d 02 00 00 00 00 00 00 gotox r2 <=== So eventually gotox will go to the insn idx in this section. ; kern_mutated += msg->operand_64; 85: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8) 86: 05 00 0e 00 00 00 00 00 goto +0xe <handle_sample_msg+0xc0> ; kern_mutated *= msg->operand_64; 87: 79 11 08 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x8) 88: 05 00 03 00 00 00 00 00 goto +0x3 <handle_sample_msg+0x78> ; kern_mutated *= msg->operand_32; 89: 61 11 08 00 00 00 00 00 w1 = *(u32 *)(r1 + 0x8) 90: 67 01 00 00 20 00 00 00 r1 <<= 0x20 91: c7 01 00 00 20 00 00 00 r1 s>>= 0x20 92: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0x0 ll ... 00000000000003a0 <publish_next_kern_msg>: ; { 116: bc 16 00 00 00 00 00 00 w6 = w1 ; msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0); 117: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll 00000000000003a8: R_BPF_64_64 kernel_ringbuf 119: b7 02 00 00 10 00 00 00 r2 = 0x10 120: b7 03 00 00 00 00 00 00 r3 = 0x0 121: 85 00 00 00 83 00 00 00 call 0x83 ; if (!msg) { 122: 55 00 06 00 00 00 00 00 if r0 != 0x0 goto +0x6 <publish_next_kern_msg+0x68> ; err = 4; 123: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll 00000000000003d8: R_BPF_64_64 err 125: b4 02 00 00 04 00 00 00 w2 = 0x4 126: 63 21 00 00 00 00 00 00 *(u32 *)(r1 + 0x0) = w2 127: b4 00 00 00 01 00 00 00 w0 = 0x1 ; return 1; 128: 05 00 31 00 00 00 00 00 goto +0x31 <publish_next_kern_msg+0x1f0> ; switch (index % TEST_MSG_OP_NUM_OPS) { 129: 54 06 00 00 03 00 00 00 w6 &= 0x3 130: 67 06 00 00 03 00 00 00 r6 <<= 0x3 131: 18 01 00 00 20 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x20 ll 0000000000000418: R_BPF_64_64 .rodata <=== r2 will be the address of .rodata with offset 20. <=== look at the first 32 bytes of .rodata: 0x00000020 40040000 00000000 18050000 00000000 @............... 0x00000030 88040000 00000000 d0040000 00000000 ................ The four actual addresses are 0x440: insn idx 0x440/8 = 136 0x518: insn idx 0x518/8 = 163 0x488: insn idx 0x488/8 = 145 0x4d0: insn idx 0x4d0/8 = 154 133: 0f 61 00 00 00 00 00 00 r1 += r6 134: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0) 135: 0d 01 00 00 00 00 00 00 gotox r1 <=== So eventually gotox will go to the insn idx in this section. 136: b4 01 00 00 00 00 00 00 w1 = 0x0 ; msg->msg_op = TEST_MSG_OP_INC64; 137: 63 10 00 00 00 00 00 00 *(u32 *)(r0 + 0x0) = w1 138: b7 01 00 00 04 00 00 00 r1 = 0x4 ; msg->operand_64 = operand_64; 139: 7b 10 08 00 00 00 00 00 *(u64 *)(r0 + 0x8) = r1 ; expected_user_mutated += operand_64; 140: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0x0 ll 0000000000000460: R_BPF_64_64 expected_user_mutated 142: 79 11 00 00 00 00 00 00 r1 = *(u64 *)(r1 + 0x0) 143: 07 01 00 00 04 00 00 00 r1 += 0x4 ; break; 144: 05 00 1a 00 00 00 00 00 goto +0x1a <publish_next_kern_msg+0x1b8> 145: b4 01 00 00 02 00 00 00 w1 = 0x2 ; msg->msg_op = TEST_MSG_OP_MUL64; ... There are a few things worth to discuss. First, in the above, it is hard to find jump table size for a particular relocation ('R_BPF_64_64 .rodata + <offset>'). One thing is to scan through the whole elf file and you can find all '.rodata + <offset>' relocations. For example, here we have .rodata + 0 .rodata + 0x20 .rodata + 0x40 .rodata + 0x55 .rodata + 0x75 .rodata + 0x89 .rodata + 0x95 .rodata + 0xb3 With the above information, the size for each sub-rodata can be found easily. An option -bpf-min-jump-table-entries is implemented to control the minimum number of entries to use a jump table on BPF. The default value 4, but it can be changed with the following clang option clang ... -mllvm -bpf-min-jump-table-entries=6 where the number of jump table cases needs to be >= 6 in order to use jump table.
1 parent d4002b4 commit 7301fc8

File tree

4 files changed

+66
-2
lines changed

4 files changed

+66
-2
lines changed

llvm/lib/Target/BPF/BPFISelLowering.cpp

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
3636
cl::Hidden, cl::init(false),
3737
cl::desc("Expand memcpy into load/store pairs in order"));
3838

39+
static cl::opt<unsigned> BPFMinimumJumpTableEntries(
40+
"bpf-min-jump-table-entries", cl::init(4), cl::Hidden,
41+
cl::desc("Set minimum number of entries to use a jump table on BPF"));
42+
3943
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
4044
SDValue Val = {}) {
4145
std::string Str;
@@ -65,10 +69,11 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
6569

6670
setOperationAction(ISD::BR_CC, MVT::i64, Custom);
6771
setOperationAction(ISD::BR_JT, MVT::Other, Expand);
68-
setOperationAction(ISD::BRIND, MVT::Other, Expand);
6972
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
7073

71-
setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, MVT::i64, Custom);
74+
setOperationAction({ISD::GlobalAddress, ISD::ConstantPool, ISD::JumpTable,
75+
ISD::BlockAddress},
76+
MVT::i64, Custom);
7277

7378
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
7479
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
@@ -155,6 +160,7 @@ BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
155160

156161
setBooleanContents(ZeroOrOneBooleanContent);
157162
setMaxAtomicSizeInBitsSupported(64);
163+
setMinimumJumpTableEntries(BPFMinimumJumpTableEntries);
158164

159165
// Function alignments
160166
setMinFunctionAlignment(Align(8));
@@ -312,10 +318,14 @@ SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
312318
report_fatal_error("unimplemented opcode: " + Twine(Op.getOpcode()));
313319
case ISD::BR_CC:
314320
return LowerBR_CC(Op, DAG);
321+
case ISD::JumpTable:
322+
return LowerJumpTable(Op, DAG);
315323
case ISD::GlobalAddress:
316324
return LowerGlobalAddress(Op, DAG);
317325
case ISD::ConstantPool:
318326
return LowerConstantPool(Op, DAG);
327+
case ISD::BlockAddress:
328+
return LowerBlockAddress(Op, DAG);
319329
case ISD::SELECT_CC:
320330
return LowerSELECT_CC(Op, DAG);
321331
case ISD::SDIV:
@@ -726,6 +736,11 @@ SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
726736
return Op;
727737
}
728738

739+
SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
740+
JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
741+
return getAddr(N, DAG);
742+
}
743+
729744
const char *BPFTargetLowering::getTargetNodeName(unsigned Opcode) const {
730745
switch ((BPFISD::NodeType)Opcode) {
731746
case BPFISD::FIRST_NUMBER:
@@ -757,6 +772,17 @@ static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
757772
N->getOffset(), Flags);
758773
}
759774

775+
static SDValue getTargetNode(BlockAddressSDNode *N, const SDLoc &DL, EVT Ty,
776+
SelectionDAG &DAG, unsigned Flags) {
777+
return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
778+
Flags);
779+
}
780+
781+
static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
782+
SelectionDAG &DAG, unsigned Flags) {
783+
return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
784+
}
785+
760786
template <class NodeTy>
761787
SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
762788
unsigned Flags) const {
@@ -783,6 +809,12 @@ SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
783809
return getAddr(N, DAG);
784810
}
785811

812+
SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
813+
SelectionDAG &DAG) const {
814+
BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
815+
return getAddr(N, DAG);
816+
}
817+
786818
unsigned
787819
BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
788820
unsigned Reg, bool isSigned) const {

llvm/lib/Target/BPF/BPFISelLowering.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class BPFTargetLowering : public TargetLowering {
8080
SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
8181
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
8282
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
83+
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
84+
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
8385

8486
template <class NodeTy>
8587
SDValue getAddr(NodeTy *N, SelectionDAG &DAG, unsigned Flags = 0) const;

llvm/lib/Target/BPF/BPFInstrInfo.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,15 @@ class TYPE_LD_ST<bits<3> mode, bits<2> size,
183183
let Inst{60-59} = size;
184184
}
185185

186+
// For indirect jump
187+
class TYPE_IND_JMP<bits<4> op, bits<1> srctype,
188+
dag outs, dag ins, string asmstr, list<dag> pattern>
189+
: InstBPF<outs, ins, asmstr, pattern> {
190+
191+
let Inst{63-60} = op;
192+
let Inst{59} = srctype;
193+
}
194+
186195
// jump instructions
187196
class JMP_RR<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
188197
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
@@ -216,6 +225,18 @@ class JMP_RI<BPFJumpOp Opc, string OpcodeStr, PatLeaf Cond>
216225
let BPFClass = BPF_JMP;
217226
}
218227

228+
class JMP_IND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
229+
: TYPE_ALU_JMP<Opc.Value, BPF_X.Value,
230+
(outs),
231+
(ins GPR:$dst),
232+
!strconcat(OpcodeStr, " $dst"),
233+
Pattern> {
234+
bits<4> dst;
235+
236+
let Inst{51-48} = dst;
237+
let BPFClass = BPF_JMP;
238+
}
239+
219240
class JMP_JCOND<BPFJumpOp Opc, string OpcodeStr, list<dag> Pattern>
220241
: TYPE_ALU_JMP<Opc.Value, BPF_K.Value,
221242
(outs),
@@ -281,6 +302,10 @@ defm JSLT : J<BPF_JSLT, "s<", BPF_CC_LT, BPF_CC_LT_32>;
281302
defm JSLE : J<BPF_JSLE, "s<=", BPF_CC_LE, BPF_CC_LE_32>;
282303
defm JSET : J<BPF_JSET, "&", NoCond, NoCond>;
283304
def JCOND : JMP_JCOND<BPF_JCOND, "may_goto", []>;
305+
306+
let isIndirectBranch = 1 in {
307+
def JX : JMP_IND<BPF_JA, "gotox", [(brind i64:$dst)]>;
308+
}
284309
}
285310

286311
// ALU instructions
@@ -851,6 +876,8 @@ let usesCustomInserter = 1, isCodeGenOnly = 1 in {
851876
// load 64-bit global addr into register
852877
def : Pat<(BPFWrapper tglobaladdr:$in), (LD_imm64 tglobaladdr:$in)>;
853878
def : Pat<(BPFWrapper tconstpool:$in), (LD_imm64 tconstpool:$in)>;
879+
def : Pat<(BPFWrapper tblockaddress:$in), (LD_imm64 tblockaddress:$in)>;
880+
def : Pat<(BPFWrapper tjumptable:$in), (LD_imm64 tjumptable:$in)>;
854881

855882
// 0xffffFFFF doesn't fit into simm32, optimize common case
856883
def : Pat<(i64 (and (i64 GPR:$src), 0xffffFFFF)),

llvm/lib/Target/BPF/BPFMCInstLower.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ void BPFMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
7777
case MachineOperand::MO_ConstantPoolIndex:
7878
MCOp = LowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
7979
break;
80+
case MachineOperand::MO_JumpTableIndex:
81+
MCOp = LowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
82+
break;
8083
}
8184

8285
OutMI.addOperand(MCOp);

0 commit comments

Comments
 (0)