Skip to content

Commit 7fdc056

Browse files
authored
vm : test generator for vm_interp (#104)
1 parent bbffef5 commit 7fdc056

File tree

2 files changed

+347
-0
lines changed

2 files changed

+347
-0
lines changed

generators/vm_interp.py

Lines changed: 323 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,323 @@
1+
import hashlib
2+
import test_suite.vm_pb2 as vm_pb
3+
import struct
4+
5+
OUTPUT_DIR = "./test-vectors/vm_interp/tests/"
6+
HEAP_START = 0x300000000
7+
STACK_START = 0x200000000
8+
9+
CU_BASE_LOG = 100
10+
CU_PER_BYTE = 1 # this is actually every 2 bytes...
11+
CU_MEM_OP = 10
12+
13+
# fmt: off
14+
INVALID_IXS = [
15+
0x00, 0x01, 0x02, 0x03, 0x06, 0x08, 0x09, 0x0a, 0x0b, 0x0d, 0x0e,
16+
0x10, 0x11, 0x12, 0x13, 0x16, 0x18, 0x19, 0x1a, 0x1b, 0x1e,
17+
0x20, 0x21, 0x22, 0x23, 0x26, 0x28, 0x29, 0x2a, 0x2b, 0x2e,
18+
0x30, 0x31, 0x32, 0x33, 0x38, 0x39, 0x3a, 0x3b,
19+
0x40, 0x41, 0x42, 0x43, 0x48, 0x49, 0x4a, 0x4b,
20+
0x50, 0x51, 0x52, 0x53, 0x58, 0x59, 0x5a, 0x5b,
21+
0x60, 0x68,
22+
0x70, 0x78,
23+
0x80, 0x81, 0x82, 0x83, 0x88, 0x89, 0x8a, 0x8b,
24+
0x90, 0x91, 0x92, 0x93, 0x98, 0x99, 0x9a, 0x9b,
25+
0xa0, 0xa1, 0xa2, 0xa3, 0xa6, 0xa8, 0xa9, 0xaa, 0xab, 0xae,
26+
0xb0, 0xb1, 0xb2, 0xb3, 0xb8, 0xb9, 0xba, 0xbb,
27+
0xc0, 0xc1, 0xc2, 0xc3, 0xc8, 0xc9, 0xca, 0xcb,
28+
0xd0, 0xd1, 0xd2, 0xd3, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdf,
29+
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xef,
30+
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xff,
31+
]
32+
33+
LOAD_STORE_IXS = [
34+
# v0
35+
0x61, 0x62, 0x63, 0x69, 0x6a, 0x6b,
36+
0x71, 0x72, 0x73, 0x79, 0x7a, 0x7b,
37+
# v2
38+
# 0x27, 0x2c, 0x2f,
39+
# 0x37, 0x3c, 0x3f,
40+
# 0x87, 0x8c, 0x8f,
41+
# 0x97, 0x9c, 0x9f,
42+
]
43+
# fmt: on
44+
45+
46+
def heap_vec(data_vec, start):
47+
res = []
48+
last = start + len(data_vec) * 16
49+
for data in data_vec:
50+
res += struct.pack("<Q", last)
51+
res += struct.pack("<Q", len(data))
52+
last += len(data)
53+
for data in data_vec:
54+
if isinstance(data, str):
55+
res += bytes(data, "ascii")
56+
else:
57+
res += bytes(data)
58+
return res
59+
60+
61+
def exact_cu_cost(data_vec):
62+
return 100 + 100 * len(data_vec) + sum([len(x) for x in data_vec])
63+
64+
65+
# fmt: off
66+
test_vectors_all_ix = []
67+
for op in range(0xFF):
68+
def validate():
69+
for sreg in [0, 2, 6, 9, 10, 11]:
70+
for dreg in [0, 9, 10, 11]:
71+
for imm in [0x0, 0x2, 0xA, 0x10, 0x20, 0x39, 0x40, 0x41, 0x12345678, 0x7fffffff, 0x80000000, 0xffffffff]:
72+
test_vectors_all_ix.append({
73+
"op": f"{op:02x}",
74+
"cu_avail": 100,
75+
"r2": 0xffffffff,
76+
"r6": 0xffffffffffff,
77+
"r9": 0xffffffffffffffff,
78+
"rodata":
79+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
80+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
81+
})
82+
# for ix that we know are invalid, we only emit 1 test case
83+
if op in INVALID_IXS:
84+
return
85+
86+
# generate most tests
87+
validate()
88+
89+
# generate programs with length that's not a multiple of 8
90+
if op == 0x00:
91+
for i in range(8):
92+
test_vectors_all_ix.append({
93+
"op": f"{op:02x}",
94+
"cu_avail": 100,
95+
"rodata":
96+
bytes([0x95] + [0]*i)
97+
})
98+
test_vectors_all_ix.append({
99+
"op": f"{op:02x}",
100+
"cu_avail": 100,
101+
"rodata":
102+
bytes([0x95] + [0]*i) + \
103+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
104+
})
105+
106+
# 0x18 = lddw (v0, v1) - ix 0x18 followed by 0x00
107+
if op == 0x18:
108+
sreg = 0
109+
dreg = 0
110+
imm = 0x12345678
111+
test_vectors_all_ix.append({
112+
"op": f"{op:02x}",
113+
"cu_avail": 100,
114+
"r2": 0xffffffff,
115+
"r6": 0xffffffffffff,
116+
"r9": 0xffffffffffffffff,
117+
"rodata":
118+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little")
119+
})
120+
121+
for sreg in [2, 10]:
122+
for dreg in [0, 9, 10]:
123+
for imm2 in [0x0, 0x7fffffff, 0x80000001, 0xfffffffe]:
124+
for imm in [0x0, 0x2, 0x12345678, 0x7fffffff, 0x80000000, 0xffffffff]:
125+
test_vectors_all_ix.append({
126+
"op": f"{op:02x}",
127+
"cu_avail": 100,
128+
"r2": 0xffffffff,
129+
"r6": 0xffffffffffff,
130+
"r9": 0xffffffffffffffff,
131+
"rodata":
132+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
133+
bytes([0x00, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm2.to_bytes(4, "little") + \
134+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
135+
})
136+
test_vectors_all_ix.append({
137+
"op": f"{op:02x}",
138+
"cu_avail": 100,
139+
"r2": 0xffffffff,
140+
"r6": 0xffffffffffff,
141+
"r9": 0xffffffffffffffff,
142+
"rodata":
143+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
144+
bytes([0x00, (((sreg + 1) << 4) + dreg) % 0xFF, 0, 0]) + imm2.to_bytes(4, "little") + \
145+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
146+
})
147+
test_vectors_all_ix.append({
148+
"op": f"{op:02x}",
149+
"cu_avail": 100,
150+
"r2": 0xffffffff,
151+
"r6": 0xffffffffffff,
152+
"r9": 0xffffffffffffffff,
153+
"rodata":
154+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
155+
bytes([0x00, ((sreg << 4) + (dreg - 1)) % 0xFF, 0, 0]) + imm2.to_bytes(4, "little") + \
156+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
157+
})
158+
159+
# call
160+
if op == 0x85:
161+
sreg = 0
162+
dreg = 0
163+
for imm in [
164+
0x53075d44, # pchash(1) - success
165+
0x63852afc, # pchash(0) - SIGSTACK
166+
0xc61fa2f4, # pchash(2) - fail
167+
0xa33b57b3, # pchash(3) - illegal ix (not in call_whitelist)
168+
0xd0220d26, # pchash(4) - illegal ix (not in call_whitelist)
169+
0x71e3cf81, # magic - always SIGSTACK (ignore call_whitelist)
170+
0x12345678, # invalid
171+
0x0b00c380, # inverse of magic - just invalid
172+
]:
173+
test_vectors_all_ix.append({
174+
"op": f"{op:02x}",
175+
"cu_avail": 100,
176+
# hashmap containing vaild pc: 0, 1, 2 (higher are trimmed)
177+
"call_whitelist": [0xff],
178+
"rodata":
179+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
180+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0]) + \
181+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
182+
})
183+
test_vectors_all_ix.append({
184+
"op": f"{op:02x}",
185+
"cu_avail": 100,
186+
# no hashmap
187+
# "call_whitelist": [0x00],
188+
"rodata":
189+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
190+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0]) + \
191+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
192+
})
193+
194+
# callx
195+
if op == 0x8d:
196+
sreg = 3
197+
dreg = 0
198+
imm = 3
199+
for r3 in [
200+
0x100000000, # SIGSTACK
201+
0x100000008, # working
202+
0x100000010, # target_pc=2 > 1
203+
0x200000008, # region=2 != 1
204+
0x100000009, # !aligned
205+
0x200000009, # region=2 != 1 && !aligned
206+
0x100000011, # target_pc=2 > 1 && !aligned
207+
0x200000010, # target_pc=2 > 1 && region=2 != 1
208+
0x200000011, # target_pc=2 > 1 && region=2 != 1 && !aligned
209+
0xfffffffffffffff8, # overflow
210+
]:
211+
test_vectors_all_ix.append({
212+
"op": f"{op:02x}",
213+
"cu_avail": 100,
214+
"r3": r3,
215+
"rodata":
216+
bytes([op, ((sreg << 4) + dreg) % 0xFF, 0, 0]) + imm.to_bytes(4, "little") + \
217+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
218+
})
219+
220+
# load/store ops
221+
if op in LOAD_STORE_IXS:
222+
for reg in [
223+
0x0FFFFFFFF,
224+
0x100000000,
225+
0x1FFFFFFFF,
226+
0x200000000,
227+
0x200000008,
228+
0x2FFFFFFFF,
229+
0x300000000,
230+
0x3FFFFFFFF,
231+
0x400000000,
232+
0x4FFFFFFFF,
233+
0xffffffffffffffff,
234+
]:
235+
for offset in [0x0000, 0x0001, 0x0008, 0x00FF, 0x01FF, 0xFFF8, 0xFFFF]:
236+
sreg = 2
237+
dreg = 3
238+
imm = reg & 0xffffffff
239+
test_vectors_all_ix.append({
240+
"op": f"{op:02x}",
241+
"cu_avail": 100,
242+
"r2": reg,
243+
"r3": reg,
244+
"stack_prefix": [1, 2, 3, 4, 5, 6, 7, 8]*4,
245+
"heap_prefix": [1, 2, 3, 4, 5, 6, 7, 8]*4,
246+
"input_data_region": [1, 2, 3, 4, 5, 6, 7, 8]*4,
247+
"rodata":
248+
bytes([op, ((sreg << 4) + dreg) % 0xFF]) + offset.to_bytes(2, "little") + imm.to_bytes(4, "little") + \
249+
bytes([0x95, 0, 0, 0, 0, 0, 0, 0])
250+
})
251+
# fmt: on
252+
253+
254+
def _into_key_data(key_prefix, test_vectors):
255+
return [(key_prefix + str(j), data) for j, data in enumerate(test_vectors)]
256+
257+
258+
test_vectors_all_ix = _into_key_data("v", test_vectors_all_ix)
259+
260+
if __name__ == "__main__":
261+
print("Generating tests for all SBF instructions...")
262+
263+
for key, test in test_vectors_all_ix:
264+
heap_prefix = test.get("heap_prefix", [])
265+
syscall_ctx = vm_pb.SyscallContext()
266+
267+
syscall_ctx.instr_ctx.cu_avail = test.get("cu_avail", 0)
268+
syscall_ctx.instr_ctx.program_id = bytes(
269+
[0] * 32
270+
) # solfuzz-agave expectes a program_id
271+
272+
syscall_ctx.vm_ctx.r0 = test.get("r0", 0)
273+
# syscall_ctx.vm_ctx.r1 = test.get("r1", 0)
274+
syscall_ctx.vm_ctx.r2 = test.get("r2", 0)
275+
syscall_ctx.vm_ctx.r3 = test.get("r3", 0)
276+
syscall_ctx.vm_ctx.r4 = test.get("r4", 0)
277+
syscall_ctx.vm_ctx.r5 = test.get("r5", 0)
278+
syscall_ctx.vm_ctx.r6 = test.get("r6", 0)
279+
syscall_ctx.vm_ctx.r7 = test.get("r7", 0)
280+
syscall_ctx.vm_ctx.r8 = test.get("r8", 0)
281+
syscall_ctx.vm_ctx.r9 = test.get("r9", 0)
282+
# syscall_ctx.vm_ctx.r10 = test.get("r10", 0)
283+
# syscall_ctx.vm_ctx.r11 = test.get("r11", 0)
284+
syscall_ctx.vm_ctx.rodata = test.get("rodata")
285+
syscall_ctx.vm_ctx.call_whitelist = bytes(
286+
[b for x in test.get("call_whitelist", []) for b in x.to_bytes(8, "little")]
287+
)
288+
syscall_ctx.syscall_invocation.heap_prefix = bytes(test.get("heap_prefix", []))
289+
syscall_ctx.vm_ctx.heap_max = len(syscall_ctx.syscall_invocation.heap_prefix)
290+
syscall_ctx.syscall_invocation.stack_prefix = bytes(
291+
test.get("stack_prefix", [])
292+
)
293+
input_data_region = bytes(test.get("input_data_region", []))
294+
if input_data_region:
295+
region = vm_pb.InputDataRegion()
296+
region.offset = 0
297+
region.content = input_data_region
298+
region.is_writable = True
299+
syscall_ctx.vm_ctx.input_data_regions.append(region)
300+
301+
testname = "validate"
302+
syscall_ctx.vm_ctx.sbpf_version = 0
303+
serialized_instr = syscall_ctx.SerializeToString(deterministic=True)
304+
filename = (
305+
"ix_"
306+
+ test.get("op")
307+
+ "_"
308+
+ hashlib.sha3_256(serialized_instr).hexdigest()[:16]
309+
)
310+
with open(f"{OUTPUT_DIR}/{testname}/v0/{filename}.bin", "wb") as f:
311+
f.write(serialized_instr)
312+
313+
syscall_ctx.vm_ctx.sbpf_version = 1
314+
serialized_instr = syscall_ctx.SerializeToString(deterministic=True)
315+
with open(f"{OUTPUT_DIR}/{testname}/v1/{filename}.bin", "wb") as f:
316+
f.write(serialized_instr)
317+
318+
syscall_ctx.vm_ctx.sbpf_version = 2
319+
serialized_instr = syscall_ctx.SerializeToString(deterministic=True)
320+
with open(f"{OUTPUT_DIR}/{testname}/v2/{filename}.bin", "wb") as f:
321+
f.write(serialized_instr)
322+
323+
print("done!")

src/test_suite/syscall/codec_utils.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,30 @@ def encode_input(input: vm_pb.SyscallContext):
1111
instr_ctx.CopyFrom(input.instr_ctx)
1212
instr_encode_input(instr_ctx)
1313
input.instr_ctx.CopyFrom(instr_ctx)
14+
if input.vm_ctx:
15+
if input.vm_ctx.rodata:
16+
input.vm_ctx.rodata = encode_hex_compact(input.vm_ctx.rodata)
17+
for i in range(len(input.vm_ctx.input_data_regions)):
18+
input.vm_ctx.input_data_regions[i].content = encode_hex_compact(
19+
input.vm_ctx.input_data_regions[i].content
20+
)
21+
if input.vm_ctx.call_whitelist:
22+
input.vm_ctx.call_whitelist = encode_hex_compact(
23+
input.vm_ctx.call_whitelist
24+
)
25+
if input.syscall_invocation:
26+
if input.syscall_invocation.function_name:
27+
input.syscall_invocation.function_name = encode_hex_compact(
28+
input.syscall_invocation.function_name
29+
)
30+
if input.syscall_invocation.stack_prefix:
31+
input.syscall_invocation.stack_prefix = encode_hex_compact(
32+
input.syscall_invocation.stack_prefix
33+
)
34+
if input.syscall_invocation.heap_prefix:
35+
input.syscall_invocation.heap_prefix = encode_hex_compact(
36+
input.syscall_invocation.heap_prefix
37+
)
1438

1539

1640
def encode_output(effects: vm_pb.SyscallEffects):

0 commit comments

Comments
 (0)