Skip to content

Commit 645ed97

Browse files
authored
Merge branch 'main' into fix-agg2
2 parents c9bf3a4 + 009bd0f commit 645ed97

File tree

5 files changed

+49
-17
lines changed

5 files changed

+49
-17
lines changed

src/query/storages/hive/hive/src/converters.rs

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ pub fn try_into_table_info(
8080
} else {
8181
None
8282
};
83-
let schema = Arc::new(try_into_schema(fields, partition_keys.clone())?);
83+
let schema = Arc::new(try_into_schema(fields)?);
8484

8585
let location = if let Some(storage) = &hms_table.sd {
8686
storage
@@ -125,19 +125,12 @@ pub fn try_into_table_info(
125125
Ok(table_info)
126126
}
127127

128-
fn try_into_schema(
129-
hive_fields: Vec<hms::FieldSchema>,
130-
partition_keys: Option<Vec<String>>,
131-
) -> Result<DataSchema> {
128+
fn try_into_schema(hive_fields: Vec<hms::FieldSchema>) -> Result<DataSchema> {
132129
let mut fields = Vec::new();
133-
let partition_keys = partition_keys.unwrap_or_default();
134130
for field in hive_fields {
135131
let name = field.name.unwrap_or_default();
136132
let type_name = field.type_.unwrap_or_default();
137-
let data_type = match partition_keys.contains(&name) {
138-
true => try_from_filed_type_name(type_name)?,
139-
false => NullableType::new_impl(try_from_filed_type_name(type_name)?),
140-
};
133+
let data_type = NullableType::new_impl(try_from_filed_type_name(type_name)?);
141134
let field = DataField::new(&name, data_type);
142135
fields.push(field);
143136
}

src/query/storages/hive/hive/src/hive_block_filter.rs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ use common_storages_table_meta::meta::ColumnStatistics;
2929
use common_storages_table_meta::meta::StatisticsOfColumns;
3030

3131
use crate::hive_parquet_block_reader::HiveParquetBlockReader;
32+
use crate::hive_table::HIVE_DEFAULT_PARTITION;
3233

3334
#[derive(Clone)]
3435
pub struct HiveBlockFilter {
@@ -89,11 +90,18 @@ impl HiveBlockFilter {
8990

9091
for (p_key, p_value) in part_columns {
9192
if let Ok(idx) = self.data_schema.index_of(&p_key) {
92-
let v = DataValue::String(p_value.as_bytes().to_vec());
93+
let mut null_count = 0;
94+
let v = if p_value == HIVE_DEFAULT_PARTITION {
95+
null_count = row_group.num_rows();
96+
DataValue::Null
97+
} else {
98+
DataValue::String(p_value.as_bytes().to_vec())
99+
};
100+
93101
let col_stats = ColumnStatistics {
94102
min: v.clone(),
95103
max: v,
96-
null_count: 0,
104+
null_count: null_count as u64,
97105
in_memory_size: 0,
98106
distinct_of_values: None,
99107
};

src/query/storages/hive/hive/src/hive_partition_filler.rs

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,20 @@
1414

1515
use std::sync::Arc;
1616

17+
use common_arrow::arrow::bitmap::Bitmap;
1718
use common_datablocks::DataBlock;
1819
use common_datavalues::ColumnRef;
1920
use common_datavalues::ConstColumn;
2021
use common_datavalues::DataField;
2122
use common_datavalues::DataTypeImpl;
23+
use common_datavalues::NullableColumn;
2224
use common_datavalues::Series;
2325
use common_datavalues::SeriesFrom;
2426
use common_exception::ErrorCode;
2527
use common_exception::Result;
2628

2729
use crate::hive_partition::HivePartInfo;
30+
use crate::hive_table::HIVE_DEFAULT_PARTITION;
2831

2932
#[derive(Debug, Clone)]
3033
pub struct HivePartitionFiller {
@@ -39,8 +42,17 @@ macro_rules! generate_primitive_column {
3942
}
4043

4144
fn generate_string_column(num_rows: usize, value: String) -> Result<ColumnRef> {
45+
let validity = if value == HIVE_DEFAULT_PARTITION {
46+
Some(Bitmap::from(vec![false]))
47+
} else {
48+
None
49+
};
4250
let column = Series::from_data(vec![value]);
43-
Ok(Arc::new(ConstColumn::new(column, num_rows)))
51+
52+
Ok(Arc::new(ConstColumn::new(
53+
NullableColumn::wrap_inner(column, validity),
54+
num_rows,
55+
)))
4456
}
4557

4658
impl HivePartitionFiller {
@@ -54,7 +66,11 @@ impl HivePartitionFiller {
5466
value: String,
5567
field: &DataField,
5668
) -> Result<ColumnRef> {
57-
match field.data_type().clone() {
69+
let t = match field.data_type() {
70+
DataTypeImpl::Nullable(v) => v.inner_type(),
71+
_ => field.data_type(),
72+
};
73+
match t {
5874
DataTypeImpl::String(_) => generate_string_column(num_rows, value),
5975
DataTypeImpl::Int8(_) => generate_primitive_column!(i8, num_rows, value),
6076
DataTypeImpl::Int16(_) => generate_primitive_column!(i16, num_rows, value),

src/query/storages/hive/hive/src/hive_partition_pruner.rs

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ use common_storages_index::range_filter::RangeFilter;
2727
use common_storages_table_meta::meta::ColumnStatistics;
2828
use common_storages_table_meta::meta::StatisticsOfColumns;
2929

30+
use crate::hive_table::HIVE_DEFAULT_PARTITION;
31+
3032
pub struct HivePartitionPruner {
3133
pub ctx: Arc<dyn TableContext>,
3234
pub filters: Vec<Expression>,
@@ -54,9 +56,21 @@ impl HivePartitionPruner {
5456
for (index, singe_value) in partition.split('/').enumerate() {
5557
let kv = singe_value.split('=').collect::<Vec<&str>>();
5658
let field = self.partition_schema.fields()[index].clone();
59+
let t = match field.data_type() {
60+
DataTypeImpl::Nullable(v) => v.inner_type(),
61+
_ => field.data_type(),
62+
};
5763

58-
let v = match field.data_type() {
59-
DataTypeImpl::String(_) => DataValue::String(kv[1].as_bytes().to_vec()),
64+
let mut null_count = 0;
65+
let v = match t {
66+
DataTypeImpl::String(_) => {
67+
if kv[1] == HIVE_DEFAULT_PARTITION {
68+
null_count = 1;
69+
DataValue::Null
70+
} else {
71+
DataValue::String(kv[1].as_bytes().to_vec())
72+
}
73+
}
6074
DataTypeImpl::Int8(_)
6175
| DataTypeImpl::Int16(_)
6276
| DataTypeImpl::Int32(_)
@@ -79,7 +93,7 @@ impl HivePartitionPruner {
7993
let column_stats = ColumnStatistics {
8094
min: v.clone(),
8195
max: v,
82-
null_count: 0,
96+
null_count,
8397
in_memory_size: 0,
8498
distinct_of_values: None,
8599
};

src/query/storages/hive/hive/src/hive_table.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ use crate::HiveBlockFilter;
6262
use crate::HiveFileSplitter;
6363

6464
pub const HIVE_TABLE_ENGIE: &str = "hive";
65+
pub const HIVE_DEFAULT_PARTITION: &str = "__HIVE_DEFAULT_PARTITION__";
6566

6667
pub struct HiveTable {
6768
table_info: TableInfo,

0 commit comments

Comments
 (0)