Skip to content

Commit f2e576d

Browse files
niaowdeadprogram
authored andcommitted
interp: do not unroll loops
This change triggers a revert whenever a basic block runs instructions at runtime twice. As a result, a loop body with runtime-only instructions will no longer be unrolled. This should help some extreme cases where loops can be expanded into hundreds or thousands of instructions.
1 parent 2d61972 commit f2e576d

File tree

4 files changed

+106
-1
lines changed

4 files changed

+106
-1
lines changed

interp/errors.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ var (
1818
errUnsupportedInst = errors.New("interp: unsupported instruction")
1919
errUnsupportedRuntimeInst = errors.New("interp: unsupported instruction (to be emitted at runtime)")
2020
errMapAlreadyCreated = errors.New("interp: map already created")
21+
errLoopUnrolled = errors.New("interp: loop unrolled")
2122
)
2223

2324
// This is one of the errors that can be returned from toLLVMValue when the
@@ -26,7 +27,9 @@ var (
2627
var errInvalidPtrToIntSize = errors.New("interp: ptrtoint integer size does not equal pointer size")
2728

2829
func isRecoverableError(err error) bool {
29-
return err == errIntegerAsPointer || err == errUnsupportedInst || err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated
30+
return err == errIntegerAsPointer || err == errUnsupportedInst ||
31+
err == errUnsupportedRuntimeInst || err == errMapAlreadyCreated ||
32+
err == errLoopUnrolled
3033
}
3134

3235
// ErrorLine is one line in a traceback. The position may be missing.

interp/interpreter.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,39 @@ func (r *runner) run(fn *function, params []value, parentMem *memoryView, indent
2424
locals[i] = param
2525
}
2626

27+
// Track what blocks have run instructions at runtime.
28+
// This is used to prevent unrolling.
29+
var runtimeBlocks map[int]struct{}
30+
2731
// Start with the first basic block and the first instruction.
2832
// Branch instructions may modify both bb and instIndex when branching.
2933
bb := fn.blocks[0]
3034
currentBB := 0
3135
lastBB := -1 // last basic block is undefined, only defined after a branch
3236
var operands []value
37+
startRTInsts := len(mem.instructions)
3338
for instIndex := 0; instIndex < len(bb.instructions); instIndex++ {
3439
if instIndex == 0 {
3540
// This is the start of a new basic block.
41+
if len(mem.instructions) != startRTInsts {
42+
if _, ok := runtimeBlocks[lastBB]; ok {
43+
// This loop has been unrolled.
44+
// Avoid doing this, as it can result in a large amount of extra machine code.
45+
// This currently uses the branch from the last block, as there is no available information to give a better location.
46+
lastBBInsts := fn.blocks[lastBB].instructions
47+
return nil, mem, r.errorAt(lastBBInsts[len(lastBBInsts)-1], errLoopUnrolled)
48+
}
49+
50+
// Flag the last block as having run stuff at runtime.
51+
if runtimeBlocks == nil {
52+
runtimeBlocks = make(map[int]struct{})
53+
}
54+
runtimeBlocks[lastBB] = struct{}{}
55+
56+
// Reset the block-start runtime instructions counter.
57+
startRTInsts = len(mem.instructions)
58+
}
59+
3660
// There may be PHI nodes that need to be resolved. Resolve all PHI
3761
// nodes before continuing with regular instructions.
3862
// PHI nodes need to be treated specially because they can have a

interp/testdata/revert.ll

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,17 @@ target triple = "x86_64--linux"
33

44
declare void @externalCall(i64)
55

6+
declare i64 @ptrHash(i8* nocapture)
7+
68
@foo.knownAtRuntime = global i64 0
79
@bar.knownAtRuntime = global i64 0
810
@baz.someGlobal = external global [3 x {i64, i32}]
911
@baz.someInt = global i32 0
1012
@x.atomicNum = global i32 0
1113
@x.volatileNum = global i32 0
1214
@y.ready = global i32 0
15+
@z.bloom = global i64 0
16+
@z.arr = global [32 x i8] zeroinitializer
1317

1418
define void @runtime.initAll() unnamed_addr {
1519
entry:
@@ -19,6 +23,7 @@ entry:
1923
call void @main.init(i8* undef)
2024
call void @x.init(i8* undef)
2125
call void @y.init(i8* undef)
26+
call void @z.init(i8* undef)
2227
ret void
2328
}
2429

@@ -72,3 +77,43 @@ loop:
7277
end:
7378
ret void
7479
}
80+
81+
define internal void @z.init(i8* %context) unnamed_addr {
82+
%bloom = bitcast i64* @z.bloom to i8*
83+
84+
; This can be safely expanded.
85+
call void @z.setArr(i8* %bloom, i64 1, i8* %bloom)
86+
87+
; This call should be reverted to prevent unrolling.
88+
call void @z.setArr(i8* bitcast ([32 x i8]* @z.arr to i8*), i64 32, i8* %bloom)
89+
90+
ret void
91+
}
92+
93+
define internal void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr {
94+
entry:
95+
br label %loop
96+
97+
loop:
98+
%prev = phi i64 [ %n, %entry ], [ %idx, %loop ]
99+
%idx = sub i64 %prev, 1
100+
%elem = getelementptr i8, i8* %arr, i64 %idx
101+
call void @z.set(i8* %elem, i8* %context)
102+
%done = icmp eq i64 %idx, 0
103+
br i1 %done, label %end, label %loop
104+
105+
end:
106+
ret void
107+
}
108+
109+
define internal void @z.set(i8* %ptr, i8* %context) unnamed_addr {
110+
; Insert the pointer into the Bloom filter.
111+
%hash = call i64 @ptrHash(i8* %ptr)
112+
%index = lshr i64 %hash, 58
113+
%bit = shl i64 1, %index
114+
%bloom = bitcast i8* %context to i64*
115+
%old = load i64, i64* %bloom
116+
%new = or i64 %old, %bit
117+
store i64 %new, i64* %bloom
118+
ret void
119+
}

interp/testdata/revert.out.ll

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,13 @@ target triple = "x86_64--linux"
88
@x.atomicNum = local_unnamed_addr global i32 0
99
@x.volatileNum = global i32 0
1010
@y.ready = local_unnamed_addr global i32 0
11+
@z.bloom = global i64 0
12+
@z.arr = global [32 x i8] zeroinitializer
1113

1214
declare void @externalCall(i64) local_unnamed_addr
1315

16+
declare i64 @ptrHash(i8* nocapture) local_unnamed_addr
17+
1418
define void @runtime.initAll() unnamed_addr {
1519
entry:
1620
call fastcc void @baz.init(i8* undef)
@@ -24,6 +28,8 @@ entry:
2428
%y = load volatile i32, i32* @x.volatileNum, align 4
2529
store volatile i32 %y, i32* @x.volatileNum, align 4
2630
call fastcc void @y.init(i8* undef)
31+
call fastcc void @z.set(i8* bitcast (i64* @z.bloom to i8*), i8* bitcast (i64* @z.bloom to i8*))
32+
call fastcc void @z.setArr(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @z.arr, i32 0, i32 0), i64 32, i8* bitcast (i64* @z.bloom to i8*))
2733
ret void
2834
}
2935

@@ -48,3 +54,30 @@ loop: ; preds = %loop, %entry
4854
end: ; preds = %loop
4955
ret void
5056
}
57+
58+
define internal fastcc void @z.setArr(i8* %arr, i64 %n, i8* %context) unnamed_addr {
59+
entry:
60+
br label %loop
61+
62+
loop: ; preds = %loop, %entry
63+
%prev = phi i64 [ %n, %entry ], [ %idx, %loop ]
64+
%idx = sub i64 %prev, 1
65+
%elem = getelementptr i8, i8* %arr, i64 %idx
66+
call fastcc void @z.set(i8* %elem, i8* %context)
67+
%done = icmp eq i64 %idx, 0
68+
br i1 %done, label %end, label %loop
69+
70+
end: ; preds = %loop
71+
ret void
72+
}
73+
74+
define internal fastcc void @z.set(i8* %ptr, i8* %context) unnamed_addr {
75+
%hash = call i64 @ptrHash(i8* %ptr)
76+
%index = lshr i64 %hash, 58
77+
%bit = shl i64 1, %index
78+
%bloom = bitcast i8* %context to i64*
79+
%old = load i64, i64* %bloom, align 8
80+
%new = or i64 %old, %bit
81+
store i64 %new, i64* %bloom, align 8
82+
ret void
83+
}

0 commit comments

Comments
 (0)