@@ -24,56 +24,135 @@ use databend_common_expression::types::Decimal;
24
24
use databend_common_expression:: types:: Decimal128Type ;
25
25
use databend_common_expression:: types:: Decimal256Type ;
26
26
use databend_common_expression:: types:: Decimal64Type ;
27
+ use databend_common_expression:: types:: Float32Type ;
28
+ use databend_common_expression:: types:: Float64Type ;
29
+ use databend_common_expression:: types:: Int16Type ;
30
+ use databend_common_expression:: types:: Int32Type ;
31
+ use databend_common_expression:: types:: Int64Type ;
32
+ use databend_common_expression:: types:: Int8Type ;
27
33
use databend_common_expression:: types:: NumberDataType ;
28
- use databend_common_expression:: types:: NumberType ;
29
34
use databend_common_expression:: types:: StringType ;
30
35
use databend_common_expression:: types:: TimestampType ;
36
+ use databend_common_expression:: types:: UInt16Type ;
37
+ use databend_common_expression:: types:: UInt32Type ;
38
+ use databend_common_expression:: types:: UInt64Type ;
39
+ use databend_common_expression:: types:: UInt8Type ;
31
40
use databend_common_expression:: types:: ValueType ;
32
- use databend_common_expression:: with_number_mapped_type;
33
41
use databend_common_expression:: Column ;
34
42
use databend_common_expression:: Scalar ;
35
43
use databend_common_expression:: ScalarRef ;
36
44
use databend_common_expression:: SELECTIVITY_THRESHOLD ;
37
45
use databend_storages_common_table_meta:: meta:: ColumnStatistics ;
46
+ use enum_dispatch:: enum_dispatch;
38
47
39
48
use crate :: statistics:: Trim ;
40
49
41
- pub trait ColumnStatisticsBuilder : Send + Sync {
42
- fn update_column ( & mut self , column : & Column ) ;
50
+ pub type CommonBuilder < T > = GenericColumnStatisticsBuilder < T , CommonAdapter > ;
51
+ pub type DecimalBuilder < T > = GenericColumnStatisticsBuilder < T , DecimalAdapter > ;
52
+
53
+ #[ enum_dispatch( ColumnStatsOps ) ]
54
+ pub enum ColumnStatisticsBuilder {
55
+ Int8 ( CommonBuilder < Int8Type > ) ,
56
+ Int16 ( CommonBuilder < Int16Type > ) ,
57
+ Int32 ( CommonBuilder < Int32Type > ) ,
58
+ Int64 ( CommonBuilder < Int64Type > ) ,
59
+ UInt8 ( CommonBuilder < UInt8Type > ) ,
60
+ UInt16 ( CommonBuilder < UInt16Type > ) ,
61
+ UInt32 ( CommonBuilder < UInt32Type > ) ,
62
+ UInt64 ( CommonBuilder < UInt64Type > ) ,
63
+ Float32 ( CommonBuilder < Float32Type > ) ,
64
+ Float64 ( CommonBuilder < Float64Type > ) ,
65
+ String ( CommonBuilder < StringType > ) ,
66
+ Date ( CommonBuilder < DateType > ) ,
67
+ Timestamp ( CommonBuilder < TimestampType > ) ,
68
+ Decimal64 ( DecimalBuilder < Decimal64Type > ) ,
69
+ Decimal128 ( DecimalBuilder < Decimal128Type > ) ,
70
+ Decimal256 ( DecimalBuilder < Decimal256Type > ) ,
71
+ }
43
72
73
+ #[ enum_dispatch]
74
+ pub trait ColumnStatsOps {
75
+ fn update_column ( & mut self , column : & Column ) ;
44
76
fn update_scalar ( & mut self , scalar : & ScalarRef , num_rows : usize , data_type : & DataType ) ;
77
+ fn finalize ( self ) -> Result < ColumnStatistics > ;
78
+ }
79
+
80
+ impl < T , A > ColumnStatsOps for GenericColumnStatisticsBuilder < T , A >
81
+ where
82
+ T : ValueType + Send + Sync ,
83
+ T :: Scalar : Send + Sync ,
84
+ A : ColumnStatisticsAdapter < T > + ' static ,
85
+ for < ' a , ' b > T :: ScalarRef < ' a > : PartialOrd < T :: ScalarRef < ' b > > ,
86
+ {
87
+ fn update_column ( & mut self , column : & Column ) {
88
+ GenericColumnStatisticsBuilder :: update_column ( self , column) ;
89
+ }
90
+
91
+ fn update_scalar ( & mut self , scalar : & ScalarRef , num_rows : usize , data_type : & DataType ) {
92
+ GenericColumnStatisticsBuilder :: update_scalar ( self , scalar, num_rows, data_type) ;
93
+ }
94
+
95
+ fn finalize ( self ) -> Result < ColumnStatistics > {
96
+ GenericColumnStatisticsBuilder :: finalize ( self )
97
+ }
98
+ }
45
99
46
- fn finalize ( self : Box < Self > ) -> Result < ColumnStatistics > ;
100
+ macro_rules! create_builder_for_type {
101
+ ( $data_type: expr, $variant: ident, $type: ty) => {
102
+ ColumnStatisticsBuilder :: $variant( CommonBuilder :: <$type>:: create( $data_type) )
103
+ } ;
104
+ ( $data_type: expr, $variant: ident, $type: ty, decimal) => {
105
+ ColumnStatisticsBuilder :: $variant( DecimalBuilder :: <$type>:: create( $data_type) )
106
+ } ;
47
107
}
48
108
49
- pub fn create_column_stats_builder ( data_type : & DataType ) -> Box < dyn ColumnStatisticsBuilder > {
109
+ pub fn create_column_stats_builder ( data_type : & DataType ) -> ColumnStatisticsBuilder {
50
110
let inner_type = data_type. remove_nullable ( ) ;
51
- with_number_mapped_type ! ( |NUM_TYPE | match inner_type {
52
- DataType :: Number ( NumberDataType :: NUM_TYPE ) => {
53
- GenericColumnStatisticsBuilder :: <NumberType <NUM_TYPE >, CommonAdapter >:: create(
54
- inner_type,
55
- )
111
+ match inner_type {
112
+ DataType :: Number ( NumberDataType :: Int8 ) => {
113
+ create_builder_for_type ! ( inner_type, Int8 , Int8Type )
114
+ }
115
+ DataType :: Number ( NumberDataType :: Int16 ) => {
116
+ create_builder_for_type ! ( inner_type, Int16 , Int16Type )
117
+ }
118
+ DataType :: Number ( NumberDataType :: Int32 ) => {
119
+ create_builder_for_type ! ( inner_type, Int32 , Int32Type )
120
+ }
121
+ DataType :: Number ( NumberDataType :: Int64 ) => {
122
+ create_builder_for_type ! ( inner_type, Int64 , Int64Type )
56
123
}
57
- DataType :: String => {
58
- GenericColumnStatisticsBuilder :: < StringType , CommonAdapter > :: create ( inner_type)
124
+ DataType :: Number ( NumberDataType :: UInt8 ) => {
125
+ create_builder_for_type ! ( inner_type, UInt8 , UInt8Type )
59
126
}
60
- DataType :: Date => {
61
- GenericColumnStatisticsBuilder :: < DateType , CommonAdapter > :: create ( inner_type)
127
+ DataType :: Number ( NumberDataType :: UInt16 ) => {
128
+ create_builder_for_type ! ( inner_type, UInt16 , UInt16Type )
62
129
}
63
- DataType :: Timestamp => {
64
- GenericColumnStatisticsBuilder :: < TimestampType , CommonAdapter > :: create ( inner_type)
130
+ DataType :: Number ( NumberDataType :: UInt32 ) => {
131
+ create_builder_for_type ! ( inner_type, UInt32 , UInt32Type )
65
132
}
133
+ DataType :: Number ( NumberDataType :: UInt64 ) => {
134
+ create_builder_for_type ! ( inner_type, UInt64 , UInt64Type )
135
+ }
136
+ DataType :: Number ( NumberDataType :: Float32 ) => {
137
+ create_builder_for_type ! ( inner_type, Float32 , Float32Type )
138
+ }
139
+ DataType :: Number ( NumberDataType :: Float64 ) => {
140
+ create_builder_for_type ! ( inner_type, Float64 , Float64Type )
141
+ }
142
+ DataType :: String => create_builder_for_type ! ( inner_type, String , StringType ) ,
143
+ DataType :: Date => create_builder_for_type ! ( inner_type, Date , DateType ) ,
144
+ DataType :: Timestamp => create_builder_for_type ! ( inner_type, Timestamp , TimestampType ) ,
66
145
DataType :: Decimal ( size) => {
67
146
if size. can_carried_by_64 ( ) {
68
- GenericColumnStatisticsBuilder :: < Decimal64Type , DecimalAdapter > :: create ( inner_type )
147
+ create_builder_for_type ! ( inner_type , Decimal64 , Decimal64Type , decimal )
69
148
} else if size. can_carried_by_128 ( ) {
70
- GenericColumnStatisticsBuilder :: < Decimal128Type , DecimalAdapter > :: create ( inner_type )
149
+ create_builder_for_type ! ( inner_type , Decimal128 , Decimal128Type , decimal )
71
150
} else {
72
- GenericColumnStatisticsBuilder :: < Decimal256Type , DecimalAdapter > :: create ( inner_type )
151
+ create_builder_for_type ! ( inner_type , Decimal256 , Decimal256Type , decimal )
73
152
}
74
153
}
75
154
_ => unreachable ! ( "Unsupported data type: {:?}" , data_type) ,
76
- } )
155
+ }
77
156
}
78
157
79
158
pub trait ColumnStatisticsAdapter < T : ValueType > : Send + Sync {
@@ -86,7 +165,7 @@ pub trait ColumnStatisticsAdapter<T: ValueType>: Send + Sync {
86
165
fn update_value ( value : & mut Self :: Value , scalar : T :: ScalarRef < ' _ > , ordering : Ordering ) ;
87
166
}
88
167
89
- struct CommonAdapter ;
168
+ pub struct CommonAdapter ;
90
169
91
170
impl < T > ColumnStatisticsAdapter < T > for CommonAdapter
92
171
where
@@ -111,7 +190,7 @@ where
111
190
}
112
191
}
113
192
114
- struct DecimalAdapter ;
193
+ pub struct DecimalAdapter ;
115
194
116
195
impl < T > ColumnStatisticsAdapter < T > for DecimalAdapter
117
196
where
@@ -137,7 +216,7 @@ where
137
216
}
138
217
}
139
218
140
- struct GenericColumnStatisticsBuilder < T , A >
219
+ pub struct GenericColumnStatisticsBuilder < T , A >
141
220
where
142
221
T : ValueType ,
143
222
A : ColumnStatisticsAdapter < T > ,
@@ -158,15 +237,15 @@ where
158
237
A : ColumnStatisticsAdapter < T > + ' static ,
159
238
for < ' a , ' b > T :: ScalarRef < ' a > : PartialOrd < T :: ScalarRef < ' b > > ,
160
239
{
161
- fn create ( data_type : DataType ) -> Box < dyn ColumnStatisticsBuilder > {
162
- Box :: new ( Self {
240
+ fn create ( data_type : DataType ) -> Self {
241
+ Self {
163
242
min : None ,
164
243
max : None ,
165
244
null_count : 0 ,
166
245
in_memory_size : 0 ,
167
246
data_type,
168
247
_phantom : PhantomData ,
169
- } )
248
+ }
170
249
}
171
250
172
251
fn add_batch < ' a , I > ( & mut self , mut iter : I )
@@ -201,15 +280,7 @@ where
201
280
self . max = Some ( A :: scalar_to_value ( max) ) ;
202
281
}
203
282
}
204
- }
205
283
206
- impl < T , A > ColumnStatisticsBuilder for GenericColumnStatisticsBuilder < T , A >
207
- where
208
- T : ValueType + Send + Sync ,
209
- T :: Scalar : Send + Sync ,
210
- A : ColumnStatisticsAdapter < T > + ' static ,
211
- for < ' a , ' b > T :: ScalarRef < ' a > : PartialOrd < T :: ScalarRef < ' b > > ,
212
- {
213
284
fn update_column ( & mut self , column : & Column ) {
214
285
self . in_memory_size += column. memory_size ( ) ;
215
286
if column. len ( ) == 0 {
@@ -265,7 +336,7 @@ where
265
336
self . add ( val. clone ( ) , val) ;
266
337
}
267
338
268
- fn finalize ( self : Box < Self > ) -> Result < ColumnStatistics > {
339
+ fn finalize ( self ) -> Result < ColumnStatistics > {
269
340
let min = if let Some ( v) = self . min {
270
341
let v = A :: value_to_scalar ( v) ;
271
342
// safe upwrap.
0 commit comments