@@ -98,7 +98,17 @@ func (d *indexBlockDesc) decode(blob []byte) {
98
98
// Each chunk begins with the full value of the first integer, followed by
99
99
// subsequent integers representing the differences between the current value
100
100
// and the preceding one. Integers are encoded with variable-size for best
101
- // storage efficiency.
101
+ // storage efficiency. Each chunk can be illustrated as below.
102
+ //
103
+ // Restart ---> +----------------+
104
+ // | Full integer |
105
+ // +----------------+
106
+ // | Diff with prev |
107
+ // +----------------+
108
+ // | ... |
109
+ // +----------------+
110
+ // | Diff with prev |
111
+ // +----------------+
102
112
//
103
113
// Empty index block is regarded as invalid.
104
114
func parseIndexBlock (blob []byte ) ([]uint32 , []byte , error ) {
@@ -118,6 +128,8 @@ func parseIndexBlock(blob []byte) ([]uint32, []byte, error) {
118
128
restart := binary .BigEndian .Uint32 (blob [len (blob )- int (i + 1 )* 4 :])
119
129
restarts = append (restarts , restart )
120
130
}
131
+ // Validate that restart points are strictly ordered and within the valid
132
+ // data range.
121
133
var prev uint32
122
134
for i := 0 ; i < len (restarts ); i ++ {
123
135
if i != 0 {
@@ -198,18 +210,21 @@ func (br *blockReader) readGreaterThan(id uint64) (uint64, error) {
198
210
}
199
211
pos += n
200
212
}
213
+ // The element which is greater than specified id is not found.
201
214
if index == len (br .restarts ) {
202
215
return math .MaxUint64 , nil
203
216
}
217
+ // The element which is the first one greater than the specified id
218
+ // is exactly the one located at the restart point.
204
219
item , _ := binary .Uvarint (br .data [br .restarts [index ]:])
205
220
return item , nil
206
221
}
207
222
208
223
type blockWriter struct {
209
- desc * indexBlockDesc
210
- restarts []uint32
211
- scratch []byte
212
- data []byte
224
+ desc * indexBlockDesc // Descriptor of the block
225
+ restarts []uint32 // Offsets into the data slice, marking the start of each section
226
+ scratch []byte // Buffer used for encoding full integers or value differences
227
+ data []byte // Aggregated encoded data slice
213
228
}
214
229
215
230
func newBlockWriter (blob []byte , desc * indexBlockDesc ) (* blockWriter , error ) {
@@ -234,16 +249,18 @@ func newBlockWriter(blob []byte, desc *indexBlockDesc) (*blockWriter, error) {
234
249
}
235
250
236
251
// append adds a new element to the block. The new element must be greater than
237
- // the previously one.
252
+ // the previous one. The provided ID is assumed to always be greater than 0 .
238
253
func (b * blockWriter ) append (id uint64 ) error {
239
254
if id == 0 {
240
255
return errors .New ("invalid zero id" )
241
256
}
242
257
if id <= b .desc .max {
243
258
return fmt .Errorf ("append element out of order, last: %d, this: %d" , b .desc .max , id )
244
259
}
260
+ // Rotate the current restart section if it's full
245
261
if b .desc .entries % indexBlockRestartLen == 0 {
246
- // Rotate the current restart range if it's full
262
+ // Save the offset within the data slice as the restart point
263
+ // for the next section.
247
264
b .restarts = append (b .restarts , uint32 (len (b .data )))
248
265
249
266
// The restart point item can either be encoded in variable
@@ -256,8 +273,10 @@ func (b *blockWriter) append(id uint64) error {
256
273
n := binary .PutUvarint (b .scratch [0 :], id )
257
274
b .data = append (b .data , b .scratch [:n ]... )
258
275
} else {
259
- // Non-head elements within a restart range are encoded using
260
- // their difference from the preceding element.
276
+ // The current section is not full, append the element.
277
+ // The element which is not the first one in the section
278
+ // is encoded using the value difference from the preceding
279
+ // element.
261
280
n := binary .PutUvarint (b .scratch [0 :], id - b .desc .max )
262
281
b .data = append (b .data , b .scratch [:n ]... )
263
282
}
@@ -310,16 +329,17 @@ func (b *blockWriter) sectionLast(section int) uint64 {
310
329
311
330
// sectionSearch looks up the specified value in the given section,
312
331
// the position and the preceding value will be returned if found.
313
- func (b * blockWriter ) sectionSearch (section int , n uint64 ) (prev uint64 , pos int ) {
332
+ func (b * blockWriter ) sectionSearch (section int , n uint64 ) (found bool , prev uint64 , pos int ) {
314
333
b .scanSection (section , func (v uint64 , p int ) bool {
315
334
if n == v {
316
335
pos = p
317
- return true
336
+ found = true
337
+ return true // terminate iteration
318
338
}
319
339
prev = v
320
- return false
340
+ return false // continue iteration
321
341
})
322
- return prev , pos
342
+ return found , prev , pos
323
343
}
324
344
325
345
// pop removes the last element from the block. The assumption is held that block
@@ -351,7 +371,10 @@ func (b *blockWriter) pop(id uint64) error {
351
371
}
352
372
// Look up the element preceding the one to be popped, in order to update
353
373
// the maximum element in the block.
354
- prev , pos := b .sectionSearch (len (b .restarts )- 1 , id )
374
+ found , prev , pos := b .sectionSearch (len (b .restarts )- 1 , id )
375
+ if ! found {
376
+ return fmt .Errorf ("pop element is not found, last: %d, this: %d" , b .desc .max , id )
377
+ }
355
378
b .desc .max = prev
356
379
b .data = b .data [:pos ]
357
380
b .desc .entries -= 1
0 commit comments