Skip to content

Commit 14cb817

Browse files
authored
Merge pull request #1509 from quickwit-oss/refact-fast-field
refactor, fix api
2 parents 9497794 + edd9155 commit 14cb817

File tree

9 files changed

+151
-182
lines changed

9 files changed

+151
-182
lines changed

fastfield_codecs/benches/bench.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,6 @@ extern crate test;
66
mod tests {
77
use std::sync::Arc;
88

9-
use fastfield_codecs::bitpacked::BitpackedCodec;
10-
use fastfield_codecs::blockwise_linear::BlockwiseLinearCodec;
11-
use fastfield_codecs::linear::LinearCodec;
129
use fastfield_codecs::*;
1310

1411
fn get_data() -> Vec<u64> {

fastfield_codecs/src/column.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,7 @@ where
176176
T: Copy + Ord + Default,
177177
{
178178
fn min_max(&self) -> (T, T) {
179-
if let Some((min, max)) = self.min_max_cache.lock().unwrap().clone() {
179+
if let Some((min, max)) = *self.min_max_cache.lock().unwrap() {
180180
return (min, max);
181181
}
182182
let (min, max) =

fastfield_codecs/src/lib.rs

Lines changed: 32 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,28 @@ extern crate test;
99

1010
use std::io;
1111
use std::io::Write;
12+
use std::sync::Arc;
1213

1314
use common::BinarySerializable;
1415
use ownedbytes::OwnedBytes;
16+
use serialize::Header;
1517

1618
mod bitpacked;
1719
mod blockwise_linear;
1820
pub(crate) mod line;
1921
mod linear;
22+
mod monotonic_mapping;
2023

2124
mod column;
2225
mod gcd;
2326
mod serialize;
2427

28+
pub use self::bitpacked::BitpackedCodec;
29+
pub use self::blockwise_linear::BlockwiseLinearCodec;
2530
pub use self::column::{monotonic_map_column, Column, VecColumn};
26-
pub use self::serialize::{estimate, open, serialize, serialize_and_load, NormalizedHeader};
31+
pub use self::linear::LinearCodec;
32+
pub use self::monotonic_mapping::MonotonicallyMappableToU64;
33+
pub use self::serialize::{estimate, serialize, serialize_and_load, NormalizedHeader};
2734

2835
#[derive(PartialEq, Eq, PartialOrd, Ord, Debug, Clone, Copy)]
2936
#[repr(u8)]
@@ -61,70 +68,39 @@ impl FastFieldCodecType {
6168
}
6269
}
6370

64-
pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy {
65-
/// Converts a value to u64.
66-
///
67-
/// Internally all fast field values are encoded as u64.
68-
fn to_u64(self) -> u64;
69-
70-
/// Converts a value from u64
71-
///
72-
/// Internally all fast field values are encoded as u64.
73-
/// **Note: To be used for converting encoded Term, Posting values.**
74-
fn from_u64(val: u64) -> Self;
75-
}
76-
77-
impl MonotonicallyMappableToU64 for u64 {
78-
fn to_u64(self) -> u64 {
79-
self
80-
}
81-
82-
fn from_u64(val: u64) -> Self {
83-
val
84-
}
85-
}
86-
87-
impl MonotonicallyMappableToU64 for i64 {
88-
#[inline(always)]
89-
fn to_u64(self) -> u64 {
90-
common::i64_to_u64(self)
91-
}
92-
93-
#[inline(always)]
94-
fn from_u64(val: u64) -> Self {
95-
common::u64_to_i64(val)
96-
}
97-
}
98-
99-
impl MonotonicallyMappableToU64 for bool {
100-
#[inline(always)]
101-
fn to_u64(self) -> u64 {
102-
if self {
103-
1
104-
} else {
105-
0
71+
/// Returns the correct codec reader wrapped in the `Arc` for the data.
72+
pub fn open<T: MonotonicallyMappableToU64>(
73+
mut bytes: OwnedBytes,
74+
) -> io::Result<Arc<dyn Column<T>>> {
75+
let header = Header::deserialize(&mut bytes)?;
76+
match header.codec_type {
77+
FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
78+
FastFieldCodecType::Linear => open_specific_codec::<LinearCodec, _>(bytes, &header),
79+
FastFieldCodecType::BlockwiseLinear => {
80+
open_specific_codec::<BlockwiseLinearCodec, _>(bytes, &header)
10681
}
10782
}
108-
109-
#[inline(always)]
110-
fn from_u64(val: u64) -> Self {
111-
val > 0
112-
}
11383
}
11484

115-
impl MonotonicallyMappableToU64 for f64 {
116-
fn to_u64(self) -> u64 {
117-
common::f64_to_u64(self)
118-
}
119-
120-
fn from_u64(val: u64) -> Self {
121-
common::u64_to_f64(val)
85+
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
86+
bytes: OwnedBytes,
87+
header: &Header,
88+
) -> io::Result<Arc<dyn Column<Item>>> {
89+
let normalized_header = header.normalized();
90+
let reader = C::open_from_bytes(bytes, normalized_header)?;
91+
let min_value = header.min_value;
92+
if let Some(gcd) = header.gcd {
93+
let monotonic_mapping = move |val: u64| Item::from_u64(min_value + val * gcd.get());
94+
Ok(Arc::new(monotonic_map_column(reader, monotonic_mapping)))
95+
} else {
96+
let monotonic_mapping = move |val: u64| Item::from_u64(min_value + val);
97+
Ok(Arc::new(monotonic_map_column(reader, monotonic_mapping)))
12298
}
12399
}
124100

125101
/// The FastFieldSerializerEstimate trait is required on all variants
126102
/// of fast field compressions, to decide which one to choose.
127-
trait FastFieldCodec: 'static {
103+
pub trait FastFieldCodec: 'static {
128104
/// A codex needs to provide a unique name and id, which is
129105
/// used for debugging and de/serialization.
130106
const CODEC_TYPE: FastFieldCodecType;
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
pub trait MonotonicallyMappableToU64: 'static + PartialOrd + Copy {
2+
/// Converts a value to u64.
3+
///
4+
/// Internally all fast field values are encoded as u64.
5+
fn to_u64(self) -> u64;
6+
7+
/// Converts a value from u64
8+
///
9+
/// Internally all fast field values are encoded as u64.
10+
/// **Note: To be used for converting encoded Term, Posting values.**
11+
fn from_u64(val: u64) -> Self;
12+
}
13+
14+
impl MonotonicallyMappableToU64 for u64 {
15+
fn to_u64(self) -> u64 {
16+
self
17+
}
18+
19+
fn from_u64(val: u64) -> Self {
20+
val
21+
}
22+
}
23+
24+
impl MonotonicallyMappableToU64 for i64 {
25+
#[inline(always)]
26+
fn to_u64(self) -> u64 {
27+
common::i64_to_u64(self)
28+
}
29+
30+
#[inline(always)]
31+
fn from_u64(val: u64) -> Self {
32+
common::u64_to_i64(val)
33+
}
34+
}
35+
36+
impl MonotonicallyMappableToU64 for bool {
37+
#[inline(always)]
38+
fn to_u64(self) -> u64 {
39+
if self {
40+
1
41+
} else {
42+
0
43+
}
44+
}
45+
46+
#[inline(always)]
47+
fn from_u64(val: u64) -> Self {
48+
val > 0
49+
}
50+
}
51+
52+
impl MonotonicallyMappableToU64 for f64 {
53+
fn to_u64(self) -> u64 {
54+
common::f64_to_u64(self)
55+
}
56+
57+
fn from_u64(val: u64) -> Self {
58+
common::u64_to_f64(val)
59+
}
60+
}

fastfield_codecs/src/serialize.rs

Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -47,11 +47,11 @@ pub struct NormalizedHeader {
4747

4848
#[derive(Debug, Copy, Clone)]
4949
pub(crate) struct Header {
50-
num_vals: u64,
51-
min_value: u64,
52-
max_value: u64,
53-
gcd: Option<NonZeroU64>,
54-
codec_type: FastFieldCodecType,
50+
pub num_vals: u64,
51+
pub min_value: u64,
52+
pub max_value: u64,
53+
pub gcd: Option<NonZeroU64>,
54+
pub codec_type: FastFieldCodecType,
5555
}
5656

5757
impl Header {
@@ -124,36 +124,6 @@ impl BinarySerializable for Header {
124124
}
125125
}
126126

127-
/// Returns correct the reader wrapped in the `DynamicFastFieldReader` enum for the data.
128-
pub fn open<T: MonotonicallyMappableToU64>(
129-
mut bytes: OwnedBytes,
130-
) -> io::Result<Arc<dyn Column<T>>> {
131-
let header = Header::deserialize(&mut bytes)?;
132-
match header.codec_type {
133-
FastFieldCodecType::Bitpacked => open_specific_codec::<BitpackedCodec, _>(bytes, &header),
134-
FastFieldCodecType::Linear => open_specific_codec::<LinearCodec, _>(bytes, &header),
135-
FastFieldCodecType::BlockwiseLinear => {
136-
open_specific_codec::<BlockwiseLinearCodec, _>(bytes, &header)
137-
}
138-
}
139-
}
140-
141-
fn open_specific_codec<C: FastFieldCodec, Item: MonotonicallyMappableToU64>(
142-
bytes: OwnedBytes,
143-
header: &Header,
144-
) -> io::Result<Arc<dyn Column<Item>>> {
145-
let normalized_header = header.normalized();
146-
let reader = C::open_from_bytes(bytes, normalized_header)?;
147-
let min_value = header.min_value;
148-
if let Some(gcd) = header.gcd {
149-
let monotonic_mapping = move |val: u64| Item::from_u64(min_value + val * gcd.get());
150-
Ok(Arc::new(monotonic_map_column(reader, monotonic_mapping)))
151-
} else {
152-
let monotonic_mapping = move |val: u64| Item::from_u64(min_value + val);
153-
Ok(Arc::new(monotonic_map_column(reader, monotonic_mapping)))
154-
}
155-
}
156-
157127
pub fn estimate<T: MonotonicallyMappableToU64>(
158128
typed_column: impl Column<T>,
159129
codec_type: FastFieldCodecType,
@@ -217,8 +187,7 @@ fn detect_codec(
217187
// removing nan values for codecs with broken calculations, and max values which disables
218188
// codecs
219189
estimations.retain(|estimation| !estimation.0.is_nan() && estimation.0 != f32::MAX);
220-
estimations
221-
.sort_by(|(score_left, _), (score_right, _)| score_left.partial_cmp(&score_right).unwrap());
190+
estimations.sort_by(|(score_left, _), (score_right, _)| score_left.total_cmp(score_right));
222191
Some(estimations.first()?.1)
223192
}
224193

src/fastfield/bytes/writer.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::io;
1+
use std::io::{self, Write};
22

33
use fastfield_codecs::VecColumn;
44

@@ -112,7 +112,6 @@ impl BytesFastFieldWriter {
112112
doc_id_map: Option<&DocIdMapping>,
113113
) -> io::Result<()> {
114114
// writing the offset index
115-
// TODO FIXME No need to double the memory.
116115
{
117116
self.doc_index.push(self.vals.len() as u64);
118117
let col = VecColumn::from(&self.doc_index[..]);
@@ -128,7 +127,7 @@ impl BytesFastFieldWriter {
128127
}
129128
}
130129
// writing the values themselves
131-
let mut value_serializer = serializer.new_bytes_fast_field_with_idx(self.field, 1);
130+
let mut value_serializer = serializer.new_bytes_fast_field(self.field);
132131
// the else could be removed, but this is faster (difference not benchmarked)
133132
if let Some(doc_id_map) = doc_id_map {
134133
for vals in self.get_ordered_values(Some(doc_id_map)) {

0 commit comments

Comments
 (0)