Skip to content

Commit 8606972

Browse files
committed
runtime: optimize findHead
This is similar to #3899, but smaller and hopefully just as efficient. Thanks to @HattoriHanzo031 for starting this work, benchmarking, and for improving the performance of the code even further.
1 parent 91563cf commit 8606972

File tree

2 files changed

+40
-6
lines changed

2 files changed

+40
-6
lines changed

builder/sizes_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@ func TestBinarySize(t *testing.T) {
4141
// This is a small number of very diverse targets that we want to test.
4242
tests := []sizeTest{
4343
// microcontrollers
44-
{"hifive1b", "examples/echo", 4580, 280, 0, 2268},
45-
{"microbit", "examples/serial", 2888, 388, 8, 2272},
46-
{"wioterminal", "examples/pininterrupt", 6124, 1484, 116, 6832},
44+
{"hifive1b", "examples/echo", 4600, 280, 0, 2268},
45+
{"microbit", "examples/serial", 2908, 388, 8, 2272},
46+
{"wioterminal", "examples/pininterrupt", 6140, 1484, 116, 6832},
4747

4848
// TODO: also check wasm. Right now this is difficult, because
4949
// wasm binaries are run through wasm-opt and therefore the

src/runtime/gc_blocks.go

Lines changed: 37 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,13 @@ const (
7676
blockStateMask blockState = 3 // 11
7777
)
7878

79+
// The byte value of a block where every block is a 'tail' block.
80+
const blockStateByteAllTails = 0 |
81+
uint8(blockStateTail<<(stateBits*3)) |
82+
uint8(blockStateTail<<(stateBits*2)) |
83+
uint8(blockStateTail<<(stateBits*1)) |
84+
uint8(blockStateTail<<(stateBits*0))
85+
7986
// String returns a human-readable version of the block state, for debugging.
8087
func (s blockState) String() string {
8188
switch s {
@@ -123,7 +130,25 @@ func (b gcBlock) address() uintptr {
123130
// points to an allocated object. It returns the same block if this block
124131
// already points to the head.
125132
func (b gcBlock) findHead() gcBlock {
126-
for b.state() == blockStateTail {
133+
for {
134+
// Optimization: check whether the current block state byte (which
135+
// contains the state of multiple blocks) is composed entirely of tail
136+
// blocks. If so, we can skip back to the last block in the previous
137+
// state byte.
138+
// This optimization speeds up findHead for pointers that point into a
139+
// large allocation.
140+
stateByte := b.stateByte()
141+
if stateByte == blockStateByteAllTails {
142+
b -= (b % blocksPerStateByte) + 1
143+
continue
144+
}
145+
146+
// Check whether we've found a non-tail block, which means we found the
147+
// head.
148+
state := b.stateFromByte(stateByte)
149+
if state != blockStateTail {
150+
break
151+
}
127152
b--
128153
}
129154
if gcAsserts {
@@ -146,10 +171,19 @@ func (b gcBlock) findNext() gcBlock {
146171
return b
147172
}
148173

174+
func (b gcBlock) stateByte() byte {
175+
return *(*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
176+
}
177+
178+
// Return the block state given a state byte. The state byte must have been
179+
// obtained using b.stateByte(), otherwise the result is incorrect.
180+
func (b gcBlock) stateFromByte(stateByte byte) blockState {
181+
return blockState(stateByte>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
182+
}
183+
149184
// State returns the current block state.
150185
func (b gcBlock) state() blockState {
151-
stateBytePtr := (*uint8)(unsafe.Add(metadataStart, b/blocksPerStateByte))
152-
return blockState(*stateBytePtr>>((b%blocksPerStateByte)*stateBits)) & blockStateMask
186+
return b.stateFromByte(b.stateByte())
153187
}
154188

155189
// setState sets the current block to the given state, which must contain more

0 commit comments

Comments
 (0)