1
1
use std:: io:: { self , Write } ;
2
2
3
- use common:: BinarySerializable ;
4
3
use ownedbytes:: OwnedBytes ;
5
4
use tantivy_bitpacker:: { compute_num_bits, BitPacker , BitUnpacker } ;
6
5
6
+ use crate :: serialize:: NormalizedHeader ;
7
7
use crate :: { Column , FastFieldCodec , FastFieldCodecType } ;
8
8
9
9
/// Depending on the field type, a different
@@ -12,80 +12,26 @@ use crate::{Column, FastFieldCodec, FastFieldCodecType};
12
12
pub struct BitpackedReader {
13
13
data : OwnedBytes ,
14
14
bit_unpacker : BitUnpacker ,
15
- min_value_u64 : u64 ,
16
- max_value_u64 : u64 ,
17
- num_vals : u64 ,
15
+ normalized_header : NormalizedHeader ,
18
16
}
19
17
20
18
impl Column for BitpackedReader {
21
19
#[ inline]
22
20
fn get_val ( & self , doc : u64 ) -> u64 {
23
- self . min_value_u64 + self . bit_unpacker . get ( doc, & self . data )
21
+ self . bit_unpacker . get ( doc, & self . data )
24
22
}
25
23
#[ inline]
26
24
fn min_value ( & self ) -> u64 {
27
- self . min_value_u64
25
+ // The BitpackedReader assumes a normalized vector.
26
+ 0
28
27
}
29
28
#[ inline]
30
29
fn max_value ( & self ) -> u64 {
31
- self . max_value_u64
30
+ self . normalized_header . max_value
32
31
}
33
32
#[ inline]
34
33
fn num_vals ( & self ) -> u64 {
35
- self . num_vals
36
- }
37
- }
38
- pub struct BitpackedSerializerLegacy < ' a , W : ' a + Write > {
39
- bit_packer : BitPacker ,
40
- write : & ' a mut W ,
41
- min_value : u64 ,
42
- num_vals : u64 ,
43
- amplitude : u64 ,
44
- num_bits : u8 ,
45
- }
46
-
47
- impl < ' a , W : Write > BitpackedSerializerLegacy < ' a , W > {
48
- /// Creates a new fast field serializer.
49
- ///
50
- /// The serializer in fact encode the values by bitpacking
51
- /// `(val - min_value)`.
52
- ///
53
- /// It requires a `min_value` and a `max_value` to compute
54
- /// compute the minimum number of bits required to encode
55
- /// values.
56
- pub fn open (
57
- write : & ' a mut W ,
58
- min_value : u64 ,
59
- max_value : u64 ,
60
- ) -> io:: Result < BitpackedSerializerLegacy < ' a , W > > {
61
- assert ! ( min_value <= max_value) ;
62
- let amplitude = max_value - min_value;
63
- let num_bits = compute_num_bits ( amplitude) ;
64
- let bit_packer = BitPacker :: new ( ) ;
65
- Ok ( BitpackedSerializerLegacy {
66
- bit_packer,
67
- write,
68
- min_value,
69
- num_vals : 0 ,
70
- amplitude,
71
- num_bits,
72
- } )
73
- }
74
- /// Pushes a new value to the currently open u64 fast field.
75
- #[ inline]
76
- pub fn add_val ( & mut self , val : u64 ) -> io:: Result < ( ) > {
77
- let val_to_write: u64 = val - self . min_value ;
78
- self . bit_packer
79
- . write ( val_to_write, self . num_bits , & mut self . write ) ?;
80
- self . num_vals += 1 ;
81
- Ok ( ( ) )
82
- }
83
- pub fn close_field ( mut self ) -> io:: Result < ( ) > {
84
- self . bit_packer . close ( & mut self . write ) ?;
85
- self . min_value . serialize ( & mut self . write ) ?;
86
- self . amplitude . serialize ( & mut self . write ) ?;
87
- self . num_vals . serialize ( & mut self . write ) ?;
88
- Ok ( ( ) )
34
+ self . normalized_header . num_vals
89
35
}
90
36
}
91
37
@@ -98,50 +44,39 @@ impl FastFieldCodec for BitpackedCodec {
98
44
type Reader = BitpackedReader ;
99
45
100
46
/// Opens a fast field given a file.
101
- fn open_from_bytes ( bytes : OwnedBytes ) -> io:: Result < Self :: Reader > {
102
- let footer_offset = bytes. len ( ) - 24 ;
103
- let ( data, mut footer) = bytes. split ( footer_offset) ;
104
- let min_value = u64:: deserialize ( & mut footer) ?;
105
- let amplitude = u64:: deserialize ( & mut footer) ?;
106
- let num_vals = u64:: deserialize ( & mut footer) ?;
107
- let max_value = min_value + amplitude;
108
- let num_bits = compute_num_bits ( amplitude) ;
47
+ fn open_from_bytes (
48
+ data : OwnedBytes ,
49
+ normalized_header : NormalizedHeader ,
50
+ ) -> io:: Result < Self :: Reader > {
51
+ let num_bits = compute_num_bits ( normalized_header. max_value ) ;
109
52
let bit_unpacker = BitUnpacker :: new ( num_bits) ;
110
53
Ok ( BitpackedReader {
111
54
data,
112
55
bit_unpacker,
113
- min_value_u64 : min_value,
114
- max_value_u64 : max_value,
115
- num_vals,
56
+ normalized_header,
116
57
} )
117
58
}
118
59
119
60
/// Serializes data with the BitpackedFastFieldSerializer.
120
61
///
121
- /// The serializer in fact encode the values by bitpacking
122
- /// `(val - min_value)`.
62
+ /// The bitpacker assumes that the column has been normalized.
63
+ /// i.e. It has already been shifted by its minimum value, so that its
64
+ /// current minimum value is 0.
123
65
///
124
- /// It requires a `min_value` and a `max_value` to compute
125
- /// compute the minimum number of bits required to encode
126
- /// values.
127
- fn serialize ( write : & mut impl Write , fastfield_accessor : & dyn Column ) -> io:: Result < ( ) > {
128
- let mut serializer = BitpackedSerializerLegacy :: open (
129
- write,
130
- fastfield_accessor. min_value ( ) ,
131
- fastfield_accessor. max_value ( ) ,
132
- ) ?;
133
-
134
- for val in fastfield_accessor. iter ( ) {
135
- serializer. add_val ( val) ?;
66
+ /// Ideally, we made a shift upstream on the column so that `col.min_value() == 0`.
67
+ fn serialize ( column : & dyn Column , write : & mut impl Write ) -> io:: Result < ( ) > {
68
+ assert_eq ! ( column. min_value( ) , 0u64 ) ;
69
+ let num_bits = compute_num_bits ( column. max_value ( ) ) ;
70
+ let mut bit_packer = BitPacker :: new ( ) ;
71
+ for val in column. iter ( ) {
72
+ bit_packer. write ( val, num_bits, write) ?;
136
73
}
137
- serializer. close_field ( ) ?;
138
-
74
+ bit_packer. close ( write) ?;
139
75
Ok ( ( ) )
140
76
}
141
77
142
- fn estimate ( fastfield_accessor : & impl Column ) -> Option < f32 > {
143
- let amplitude = fastfield_accessor. max_value ( ) - fastfield_accessor. min_value ( ) ;
144
- let num_bits = compute_num_bits ( amplitude) ;
78
+ fn estimate ( column : & impl Column ) -> Option < f32 > {
79
+ let num_bits = compute_num_bits ( column. max_value ( ) ) ;
145
80
let num_bits_uncompressed = 64 ;
146
81
Some ( num_bits as f32 / num_bits_uncompressed as f32 )
147
82
}
0 commit comments