Skip to content

Commit b9569bc

Browse files
committed
Speedup fixed row encoding
1 parent b6240b3 commit b9569bc

File tree

3 files changed

+61
-14
lines changed

3 files changed

+61
-14
lines changed

arrow-buffer/src/bigint/mod.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ use num::cast::AsPrimitive;
2121
use num::{BigInt, FromPrimitive, ToPrimitive};
2222
use std::cmp::Ordering;
2323
use std::num::ParseIntError;
24-
use std::ops::{BitAnd, BitOr, BitXor, Neg, Shl, Shr};
24+
use std::ops::{BitAnd, BitOr, BitXor, Neg, Not, Shl, Shr};
2525
use std::str::FromStr;
2626

2727
mod div;
@@ -126,6 +126,15 @@ impl From<i64> for i256 {
126126
}
127127
}
128128

129+
impl Not for i256 {
130+
type Output = i256;
131+
132+
#[inline]
133+
fn not(self) -> Self::Output {
134+
Self::from_parts(!self.low, !self.high)
135+
}
136+
}
137+
129138
/// Parse `s` with any sign and leading 0s removed
130139
fn parse_impl(s: &str, negative: bool) -> Result<i256, ParseI256Error> {
131140
if s.len() <= 38 {

arrow-ord/src/cmp.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,9 @@ impl<'a, T: ByteViewType> ArrayOrd for &'a GenericByteViewArray<T> {
581581
if l_len != r_len {
582582
return false;
583583
}
584+
if l_len == 0 && r_len == 0 {
585+
return true;
586+
}
584587

585588
// # Safety
586589
// The index is within bounds as it is checked in value()

arrow-row/src/fixed.rs

Lines changed: 48 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ pub trait FixedLengthEncoding: Copy {
5151

5252
fn encode(self) -> Self::Encoded;
5353

54+
fn encode_desc(self) -> Self::Encoded {
55+
let mut encoded = self.encode();
56+
// Flip bits to reverse order
57+
encoded.as_mut().iter_mut().for_each(|v| *v = !*v);
58+
encoded
59+
}
60+
5461
fn decode(encoded: Self::Encoded) -> Self;
5562
}
5663

@@ -61,6 +68,10 @@ impl FixedLengthEncoding for bool {
6168
[self as u8]
6269
}
6370

71+
fn encode_desc(self) -> [u8; 1] {
72+
[!self as u8]
73+
}
74+
6475
fn decode(encoded: Self::Encoded) -> Self {
6576
encoded[0] != 0
6677
}
@@ -71,13 +82,24 @@ macro_rules! encode_signed {
7182
impl FixedLengthEncoding for $t {
7283
type Encoded = [u8; $n];
7384

85+
#[inline]
7486
fn encode(self) -> [u8; $n] {
7587
let mut b = self.to_be_bytes();
7688
// Toggle top "sign" bit to ensure consistent sort order
7789
b[0] ^= 0x80;
7890
b
7991
}
8092

93+
#[inline]
94+
fn encode_desc(self) -> Self::Encoded {
95+
// fast path for descending order
96+
let b = !self;
97+
let mut b = b.to_be_bytes();
98+
// Toggle top "sign" bit to ensure consistent sort order
99+
b[0] ^= 0x80;
100+
b
101+
}
102+
81103
fn decode(mut encoded: Self::Encoded) -> Self {
82104
// Toggle top "sign" bit
83105
encoded[0] ^= 0x80;
@@ -99,10 +121,16 @@ macro_rules! encode_unsigned {
99121
impl FixedLengthEncoding for $t {
100122
type Encoded = [u8; $n];
101123

124+
// #[inline]
102125
fn encode(self) -> [u8; $n] {
103126
self.to_be_bytes()
104127
}
105128

129+
// #[inline]
130+
fn encode_desc(self) -> [u8; $n] {
131+
(!self).to_be_bytes()
132+
}
133+
106134
fn decode(encoded: Self::Encoded) -> Self {
107135
Self::from_be_bytes(encoded)
108136
}
@@ -251,20 +279,27 @@ pub fn encode_not_null<T: FixedLengthEncoding>(
251279
values: &[T],
252280
opts: SortOptions,
253281
) {
254-
for (value_idx, val) in values.iter().enumerate() {
255-
let offset = &mut offsets[value_idx + 1];
256-
let end_offset = *offset + T::ENCODED_LEN;
257-
258-
let to_write = &mut data[*offset..end_offset];
259-
to_write[0] = 1;
260-
let mut encoded = val.encode();
261-
if opts.descending {
262-
// Flip bits to reverse order
263-
encoded.as_mut().iter_mut().for_each(|v| *v = !*v)
282+
#[inline]
283+
fn encode<const DESC: bool, T: FixedLengthEncoding>(
284+
data: &mut [u8],
285+
offsets: &mut [usize],
286+
values: &[T],
287+
) {
288+
for (val, offset) in values.iter().zip(offsets[1..].iter_mut()) {
289+
let to_write = &mut data[*offset..*offset + T::ENCODED_LEN];
290+
to_write[0] = 1;
291+
let encoded = if DESC {
292+
val.encode_desc()
293+
} else {
294+
val.encode()
295+
};
296+
to_write[1..].copy_from_slice(encoded.as_ref());
297+
*offset = *offset + T::ENCODED_LEN;
264298
}
265-
to_write[1..].copy_from_slice(encoded.as_ref());
266-
267-
*offset = end_offset;
299+
}
300+
match opts.descending {
301+
true => encode::<true, T>(data, offsets, values),
302+
false => encode::<false, T>(data, offsets, values),
268303
}
269304
}
270305

0 commit comments

Comments
 (0)