Skip to content

Commit f02aa84

Browse files
authored
Merge pull request #7645 from sundy-li/in-improve
refactor(query): improve in function
2 parents 5691187 + 21e80b1 commit f02aa84

File tree

14 files changed

+245
-111
lines changed

14 files changed

+245
-111
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/common/hashtable/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ test = false
1515
common-base = { path = "../base" }
1616

1717
# Crates.io dependencies
18+
ahash = "0.7.6"
1819
ordered-float = { git = "https://github.com/andylokandy/rust-ordered-float.git", branch = "as", features = ["serde"] }
1920
primitive-types = "0.11.1"
2021

src/common/hashtable/src/hash_set.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,8 @@ impl<Key: HashTableKeyable, Grower: HashTableGrower, Allocator: AllocatorTrait +
3030
self.insert_key(value.get_key(), &mut inserted);
3131
}
3232
}
33+
34+
pub fn contains(&self, key: &Key) -> bool {
35+
self.find_key(key).is_some()
36+
}
3337
}

src/common/hashtable/src/hash_table_key.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ impl HashTableKeyable for OrderedFloat<f32> {
126126

127127
#[inline(always)]
128128
fn is_zero(&self) -> bool {
129-
self.is_nan()
129+
self.0 == 0.0
130130
}
131131

132132
#[inline(always)]
@@ -144,7 +144,7 @@ impl HashTableKeyable for OrderedFloat<f64> {
144144

145145
#[inline(always)]
146146
fn is_zero(&self) -> bool {
147-
self.is_nan()
147+
self.0 == 0.0
148148
}
149149

150150
#[inline(always)]

src/common/hashtable/src/keys_ref.rs

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
// Copyright 2021 Datafuse Labs.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use std::hash::Hasher;
16+
17+
use ahash::AHasher;
18+
19+
use super::HashTableKeyable;
20+
21+
#[derive(Clone, Copy)]
22+
pub struct KeysRef {
23+
pub length: usize,
24+
pub address: usize,
25+
}
26+
27+
impl KeysRef {
28+
pub fn create(address: usize, length: usize) -> KeysRef {
29+
KeysRef { length, address }
30+
}
31+
}
32+
33+
impl Eq for KeysRef {}
34+
35+
impl PartialEq for KeysRef {
36+
fn eq(&self, other: &Self) -> bool {
37+
if self.length != other.length {
38+
return false;
39+
}
40+
41+
unsafe {
42+
let self_value = std::slice::from_raw_parts(self.address as *const u8, self.length);
43+
let other_value = std::slice::from_raw_parts(other.address as *const u8, other.length);
44+
self_value == other_value
45+
}
46+
}
47+
}
48+
49+
impl HashTableKeyable for KeysRef {
50+
const BEFORE_EQ_HASH: bool = true;
51+
52+
fn is_zero(&self) -> bool {
53+
self.length == 0
54+
}
55+
56+
fn fast_hash(&self) -> u64 {
57+
unsafe {
58+
// TODO(Winter) We need more efficient hash algorithm
59+
let value = std::slice::from_raw_parts(self.address as *const u8, self.length);
60+
61+
let mut hasher = AHasher::default();
62+
hasher.write(value);
63+
hasher.finish()
64+
}
65+
}
66+
67+
fn set_key(&mut self, new_value: &Self) {
68+
self.length = new_value.length;
69+
self.address = new_value.address;
70+
}
71+
}
72+
73+
impl std::hash::Hash for KeysRef {
74+
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
75+
let self_value =
76+
unsafe { std::slice::from_raw_parts(self.address as *const u8, self.length) };
77+
self_value.hash(state);
78+
}
79+
}

src/common/hashtable/src/lib.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,11 @@ mod hash_table_entity;
3636
mod hash_table_grower;
3737
mod hash_table_iter;
3838
mod hash_table_key;
39+
mod keys_ref;
3940
mod two_level_hash_table;
4041

42+
pub use keys_ref::KeysRef;
43+
4144
#[cfg(not(target_os = "linux"))]
4245
type HashTableAllocator = common_base::mem_allocator::JEAllocator;
4346
#[cfg(target_os = "linux")]

src/query/datavalues/src/columns/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ where T: Scalar
5151
}
5252
}
5353

54-
fn build_nonull(&mut self, length: usize) -> ColumnRef {
54+
pub fn build_nonull(&mut self, length: usize) -> ColumnRef {
5555
let size = self.len();
5656
let col = self.builder.to_column();
5757
if length != size && size == 1 {

src/query/functions-v2/src/aggregates/aggregate_combinator_distinct.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,14 @@ use common_expression::with_number_mapped_type;
2626
use common_expression::Column;
2727
use common_expression::ColumnBuilder;
2828
use common_expression::Scalar;
29+
use common_hashtable::KeysRef;
2930
use common_io::prelude::*;
3031

3132
use super::aggregate_distinct_state::AggregateDistinctNumberState;
3233
use super::aggregate_distinct_state::AggregateDistinctState;
3334
use super::aggregate_distinct_state::AggregateDistinctStringState;
3435
use super::aggregate_distinct_state::DataGroupValue;
3536
use super::aggregate_distinct_state::DistinctStateFunc;
36-
use super::aggregate_distinct_state::KeysRef;
3737
use super::aggregate_function::AggregateFunction;
3838
use super::aggregate_function_factory::AggregateFunctionCreator;
3939
use super::aggregate_function_factory::AggregateFunctionDescription;

src/query/functions-v2/src/aggregates/aggregate_distinct_state.rs

Lines changed: 1 addition & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
// limitations under the License.
1414
use std::collections::hash_map::RandomState;
1515
use std::collections::HashSet;
16-
use std::hash::Hash;
1716
use std::marker::Send;
1817
use std::marker::Sync;
1918

@@ -34,6 +33,7 @@ use common_expression::Scalar;
3433
use common_hashtable::HashSetWithStackMemory;
3534
use common_hashtable::HashTableEntity;
3635
use common_hashtable::HashTableKeyable;
36+
use common_hashtable::KeysRef;
3737
use common_io::prelude::*;
3838
use serde::de::DeserializeOwned;
3939
use serde::Serialize;
@@ -327,39 +327,3 @@ where T: Number + Serialize + DeserializeOwned + HashTableKeyable
327327
Ok(vec![NumberType::<T>::upcast_column(values)])
328328
}
329329
}
330-
331-
#[derive(Clone, Copy)]
332-
pub struct KeysRef {
333-
pub length: usize,
334-
pub address: usize,
335-
}
336-
337-
impl KeysRef {
338-
pub fn create(address: usize, length: usize) -> KeysRef {
339-
KeysRef { length, address }
340-
}
341-
}
342-
343-
impl Eq for KeysRef {}
344-
345-
impl PartialEq for KeysRef {
346-
fn eq(&self, other: &Self) -> bool {
347-
if self.length != other.length {
348-
return false;
349-
}
350-
351-
unsafe {
352-
let self_value = std::slice::from_raw_parts(self.address as *const u8, self.length);
353-
let other_value = std::slice::from_raw_parts(other.address as *const u8, other.length);
354-
self_value == other_value
355-
}
356-
}
357-
}
358-
359-
impl Hash for KeysRef {
360-
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
361-
let self_value =
362-
unsafe { std::slice::from_raw_parts(self.address as *const u8, self.length) };
363-
self_value.hash(state);
364-
}
365-
}

src/query/functions/src/aggregates/aggregate_combinator_distinct.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ use std::sync::Arc;
2020
use common_arrow::arrow::bitmap::Bitmap;
2121
use common_datavalues::prelude::*;
2222
use common_exception::Result;
23+
use common_hashtable::KeysRef;
2324
use common_io::prelude::*;
2425
use ordered_float::OrderedFloat;
2526

@@ -28,7 +29,6 @@ use super::aggregate_distinct_state::AggregateDistinctState;
2829
use super::aggregate_distinct_state::AggregateDistinctStringState;
2930
use super::aggregate_distinct_state::DataGroupValues;
3031
use super::aggregate_distinct_state::DistinctStateFunc;
31-
use super::aggregate_distinct_state::KeysRef;
3232
use super::aggregate_function::AggregateFunction;
3333
use super::aggregate_function_factory::AggregateFunctionCreator;
3434
use super::aggregate_function_factory::AggregateFunctionDescription;

0 commit comments

Comments
 (0)