Skip to content

Commit 04df094

Browse files
authored
fix(query): join predict use cast_expr_to_non_null_boolean (#16937)
* fix(query): join predict use cast_expr_to_non_null_boolean * update * update
1 parent 682039f commit 04df094

File tree

5 files changed

+82
-58
lines changed

5 files changed

+82
-58
lines changed

src/common/column/src/buffer/immutable.rs

Lines changed: 31 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,20 @@ pub struct Buffer<T> {
6363
/// the internal byte buffer.
6464
data: Arc<Bytes<T>>,
6565

66-
/// The offset into the buffer.
67-
offset: usize,
66+
/// Pointer into `data` valid
67+
///
68+
/// We store a pointer instead of an offset to avoid pointer arithmetic
69+
/// which causes LLVM to fail to vectorise code correctly
70+
ptr: *const T,
6871

6972
// the length of the buffer. Given a region `data` of N bytes, [offset..offset+length] is visible
7073
// to this buffer.
7174
length: usize,
7275
}
7376

77+
unsafe impl<T: Send> Send for Buffer<T> {}
78+
unsafe impl<T: Sync> Sync for Buffer<T> {}
79+
7480
impl<T: PartialEq> PartialEq for Buffer<T> {
7581
#[inline]
7682
fn eq(&self, other: &Self) -> bool {
@@ -101,9 +107,10 @@ impl<T> Buffer<T> {
101107
/// Auxiliary method to create a new Buffer
102108
pub(crate) fn from_bytes(bytes: Bytes<T>) -> Self {
103109
let length = bytes.len();
110+
let ptr = bytes.as_ptr();
104111
Buffer {
105112
data: Arc::new(bytes),
106-
offset: 0,
113+
ptr,
107114
length,
108115
}
109116
}
@@ -130,24 +137,7 @@ impl<T> Buffer<T> {
130137
/// Returns the byte slice stored in this buffer
131138
#[inline]
132139
pub fn as_slice(&self) -> &[T] {
133-
// Safety:
134-
// invariant of this struct `offset + length <= data.len()`
135-
debug_assert!(self.offset + self.length <= self.data.len());
136-
unsafe {
137-
self.data
138-
.get_unchecked(self.offset..self.offset + self.length)
139-
}
140-
}
141-
142-
/// Returns the byte slice stored in this buffer
143-
/// # Safety
144-
/// `index` must be smaller than `len`
145-
#[inline]
146-
pub(super) unsafe fn get_unchecked(&self, index: usize) -> &T {
147-
// Safety:
148-
// invariant of this function
149-
debug_assert!(index < self.length);
150-
unsafe { self.data.get_unchecked(self.offset + index) }
140+
self
151141
}
152142

153143
/// Returns a new [`Buffer`] that is a slice of this buffer starting at `offset`.
@@ -193,20 +183,20 @@ impl<T> Buffer<T> {
193183
/// The caller must ensure `offset + length <= self.len()`
194184
#[inline]
195185
pub unsafe fn slice_unchecked(&mut self, offset: usize, length: usize) {
196-
self.offset += offset;
186+
self.ptr = self.ptr.add(offset);
197187
self.length = length;
198188
}
199189

200190
/// Returns a pointer to the start of this buffer.
201191
#[inline]
202192
pub(crate) fn data_ptr(&self) -> *const T {
203-
self.data.deref().as_ptr()
193+
self.data.as_ptr()
204194
}
205195

206196
/// Returns the offset of this buffer.
207197
#[inline]
208198
pub fn offset(&self) -> usize {
209-
self.offset
199+
unsafe { self.ptr.offset_from(self.data_ptr()) as usize }
210200
}
211201

212202
/// # Safety
@@ -253,10 +243,11 @@ impl<T> Buffer<T> {
253243
/// * has not been imported from the c data interface (FFI)
254244
#[inline]
255245
pub fn get_mut_slice(&mut self) -> Option<&mut [T]> {
246+
let offset = self.offset();
256247
Arc::get_mut(&mut self.data)
257248
.and_then(|b| b.get_vec())
258249
// Safety: the invariant of this struct
259-
.map(|x| unsafe { x.get_unchecked_mut(self.offset..self.offset + self.length) })
250+
.map(|x| unsafe { x.get_unchecked_mut(offset..offset + self.length) })
260251
}
261252

262253
/// Get the strong count of underlying `Arc` data buffer.
@@ -269,28 +260,14 @@ impl<T> Buffer<T> {
269260
Arc::weak_count(&self.data)
270261
}
271262

272-
/// Returns its internal representation
273-
#[must_use]
274-
pub fn into_inner(self) -> (Arc<Bytes<T>>, usize, usize) {
275-
let Self {
276-
data,
277-
offset,
278-
length,
279-
} = self;
280-
(data, offset, length)
281-
}
282-
283263
/// Creates a `[Bitmap]` from its internal representation.
284264
/// This is the inverted from `[Bitmap::into_inner]`
285265
///
286266
/// # Safety
287267
/// Callers must ensure all invariants of this struct are upheld.
288268
pub unsafe fn from_inner_unchecked(data: Arc<Bytes<T>>, offset: usize, length: usize) -> Self {
289-
Self {
290-
data,
291-
offset,
292-
length,
293-
}
269+
let ptr = data.as_ptr().add(offset);
270+
Self { data, ptr, length }
294271
}
295272
}
296273

@@ -313,8 +290,9 @@ impl<T> From<Vec<T>> for Buffer<T> {
313290
#[inline]
314291
fn from(p: Vec<T>) -> Self {
315292
let bytes: Bytes<T> = p.into();
293+
let ptr = bytes.as_ptr();
316294
Self {
317-
offset: 0,
295+
ptr,
318296
length: bytes.len(),
319297
data: Arc::new(bytes),
320298
}
@@ -326,7 +304,15 @@ impl<T> std::ops::Deref for Buffer<T> {
326304

327305
#[inline]
328306
fn deref(&self) -> &[T] {
329-
self.as_slice()
307+
debug_assert!(self.offset() + self.length <= self.data.len());
308+
unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
309+
}
310+
}
311+
312+
impl<T> AsRef<[T]> for Buffer<T> {
313+
#[inline]
314+
fn as_ref(&self) -> &[T] {
315+
self
330316
}
331317
}
332318

@@ -375,8 +361,9 @@ impl<T: crate::types::NativeType> From<arrow_buffer::Buffer> for Buffer<T> {
375361

376362
impl<T: crate::types::NativeType> From<Buffer<T>> for arrow_buffer::Buffer {
377363
fn from(value: Buffer<T>) -> Self {
364+
let offset = value.offset();
378365
crate::buffer::to_buffer(value.data).slice_with_length(
379-
value.offset * std::mem::size_of::<T>(),
366+
offset * std::mem::size_of::<T>(),
380367
value.length * std::mem::size_of::<T>(),
381368
)
382369
}

src/common/column/tests/it/buffer/immutable.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,9 @@ fn from_slice() {
2727
let buffer = Buffer::<i32>::from(vec![0, 1, 2]);
2828
assert_eq!(buffer.len(), 3);
2929
assert_eq!(buffer.as_slice(), &[0, 1, 2]);
30+
31+
assert_eq!(unsafe { *buffer.get_unchecked(1) }, 1);
32+
assert_eq!(unsafe { *buffer.get_unchecked(2) }, 2);
3033
}
3134

3235
#[test]

src/query/expression/benches/bench.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#[macro_use]
1616
extern crate criterion;
1717

18+
use arrow_buffer::ScalarBuffer;
1819
use criterion::Criterion;
1920
use databend_common_column::buffer::Buffer;
2021
use databend_common_expression::arrow::deserialize_column;
@@ -135,6 +136,9 @@ fn bench(c: &mut Criterion) {
135136
for length in [10240, 102400] {
136137
let (left, right) = generate_random_int_data(&mut rng, length);
137138

139+
let left_scalar = ScalarBuffer::from_iter(left.iter().cloned());
140+
let right_scalar = ScalarBuffer::from_iter(right.iter().cloned());
141+
138142
group.bench_function(format!("function_iterator_iterator_v1/{length}"), |b| {
139143
b.iter(|| {
140144
let left = left.clone();
@@ -170,6 +174,19 @@ fn bench(c: &mut Criterion) {
170174
},
171175
);
172176

177+
group.bench_function(
178+
format!("function_buffer_scalar_index_unchecked_iterator/{length}"),
179+
|b| {
180+
b.iter(|| {
181+
let _c = (0..length)
182+
.map(|i| unsafe {
183+
left_scalar.get_unchecked(i) + right_scalar.get_unchecked(i)
184+
})
185+
.collect::<Vec<i32>>();
186+
})
187+
},
188+
);
189+
173190
group.bench_function(
174191
format!("function_slice_index_unchecked_iterator/{length}"),
175192
|b| {

src/query/service/src/pipelines/processors/transforms/hash_join/desc.rs

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use databend_common_expression::type_check::check_function;
1717
use databend_common_expression::Expr;
1818
use databend_common_expression::RemoteExpr;
1919
use databend_common_functions::BUILTIN_FUNCTIONS;
20+
use databend_common_sql::executor::cast_expr_to_non_null_boolean;
2021
use databend_common_sql::executor::physical_plans::HashJoin;
2122
use databend_common_sql::IndexType;
2223
use parking_lot::RwLock;
@@ -96,11 +97,21 @@ impl HashJoinDesc {
9697
}
9798

9899
fn join_predicate(non_equi_conditions: &[RemoteExpr]) -> Result<Option<Expr>> {
99-
non_equi_conditions
100+
let expr = non_equi_conditions
100101
.iter()
101102
.map(|expr| expr.as_expr(&BUILTIN_FUNCTIONS))
102103
.try_reduce(|lhs, rhs| {
103104
check_function(None, "and_filters", &[], &[lhs, rhs], &BUILTIN_FUNCTIONS)
104-
})
105+
});
106+
// For RIGHT MARK join, we can't use is_true to cast filter into non_null boolean
107+
match expr {
108+
Ok(Some(expr)) => match expr {
109+
Expr::Constant { ref scalar, .. } if !scalar.is_null() => {
110+
Ok(Some(cast_expr_to_non_null_boolean(expr)?))
111+
}
112+
_ => Ok(Some(expr)),
113+
},
114+
other => other,
115+
}
105116
}
106117
}

tests/sqllogictests/suites/query/join/join.test

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ statement ok
22
drop table if exists t1;
33

44
statement ok
5-
create table t1 (a int);
5+
create or replace table t1 (a int);
66

77
# right join with empty build side
88
query II
@@ -82,7 +82,7 @@ statement ok
8282
drop table if exists t1;
8383

8484
statement ok
85-
create table t1(a int, b int)
85+
create or replace table t1(a int, b int)
8686

8787
statement ok
8888
insert into t1 values(1, 2), (2, 4), (3, 6), (4, 8), (5, 10)
@@ -91,7 +91,7 @@ statement ok
9191
drop table if exists t2
9292

9393
statement ok
94-
create table t2(a int, b int)
94+
create or replace table t2(a int, b int)
9595

9696
statement ok
9797
insert into t2 values(1, 2), (1, 4), (1, 6), (1, 8), (1, 10);
@@ -124,10 +124,10 @@ statement ok
124124
drop table if exists t2;
125125

126126
statement ok
127-
create table t1 (id int, val bigint unsigned default 0);
127+
create or replace table t1 (id int, val bigint unsigned default 0);
128128

129129
statement ok
130-
create table t2 (id int, val bigint unsigned default 0);
130+
create or replace table t2 (id int, val bigint unsigned default 0);
131131

132132
statement ok
133133
insert into t1 values(1, 1696549154011), (2, 1696549154013);
@@ -153,13 +153,13 @@ statement ok
153153
drop table t2;
154154

155155
statement ok
156-
create table t(id int);
156+
create or replace table t(id int);
157157

158158
statement ok
159159
insert into t values(1), (2);
160160

161161
statement ok
162-
create table t1(id int, col1 varchar);
162+
create or replace table t1(id int, col1 varchar);
163163

164164
statement ok
165165
insert into t1 values(1, 'c'), (3, 'd');
@@ -203,13 +203,13 @@ statement ok
203203
drop table if exists t2;
204204

205205
statement ok
206-
create table t1(a int, b int);
206+
create or replace table t1(a int, b int);
207207

208208
statement ok
209209
insert into t1 values(1, 1),(2, 2),(3, 3);
210210

211211
statement ok
212-
create table t2(a int, b int);
212+
create or replace table t2(a int, b int);
213213

214214
statement ok
215215
insert into t2 values(1, 1),(2, 2);
@@ -237,13 +237,13 @@ statement ok
237237
drop table if exists t2;
238238

239239
statement ok
240-
create table t1(a int, b int, c int, d int);
240+
create or replace table t1(a int, b int, c int, d int);
241241

242242
statement ok
243243
insert into t1 values(1, 2, 3, 4);
244244

245245
statement ok
246-
create table t2(a int, b int, c int, d int);
246+
create or replace table t2(a int, b int, c int, d int);
247247

248248
statement ok
249249
insert into t2 values(1, 2, 3, 4);
@@ -255,7 +255,7 @@ statement ok
255255
drop table if exists t;
256256

257257
statement ok
258-
create table t(a int);
258+
create or replace table t(a int);
259259

260260
statement ok
261261
insert into t values(1),(2),(3);
@@ -274,5 +274,11 @@ select * from (select number from numbers(5)) t2 full outer join (select a, 'A'
274274
2 2 A
275275
3 3 A
276276

277+
statement ok
278+
select * from (select number from numbers(5)) t2 full outer join (select a, 'A' as name from t) t1 on t1.a = t2.number and 123;
279+
280+
statement error
281+
select * from (select number from numbers(5)) t2 full outer join (select a, 'A' as name from t) t1 on t1.a = t2.number and 11981933213501947393::DATE;
282+
277283
statement ok
278284
drop table if exists t;

0 commit comments

Comments
 (0)