Skip to content

Commit fbb7592

Browse files
committed
test dict only
1 parent 9a0182a commit fbb7592

File tree

1 file changed

+16
-33
lines changed

1 file changed

+16
-33
lines changed

src/query/storages/common/blocks/src/parquet_rs.rs

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ fn choose_compression_scheme(
9292
table_schema: &TableSchema,
9393
stat: &StatisticsOfColumns,
9494
) -> Result<WriterPropertiesBuilder> {
95-
for ((parquet_field, table_field), col) in parquet_fields
95+
for ((parquet_field, table_field), _col) in parquet_fields
9696
.iter()
9797
.zip(table_schema.fields.iter())
9898
.zip(block.columns())
@@ -104,19 +104,21 @@ fn choose_compression_scheme(
104104
type_length: _,
105105
scale: _,
106106
precision: _,
107-
} => {
108-
let distinct_of_values = stat
109-
.get(&table_field.column_id)
110-
.and_then(|stat| stat.distinct_of_values);
111-
let num_rows = block.num_rows();
112-
if can_apply_dict_encoding(physical_type, distinct_of_values, num_rows, col)? {
113-
let col_path = ColumnPath::new(vec![table_field.name().clone()]);
114-
props = props.set_column_dictionary_enabled(col_path, true);
115-
} else if can_apply_delta_binary_pack(physical_type, col, num_rows)? {
116-
let col_path = ColumnPath::new(vec![table_field.name().clone()]);
117-
props = props.set_column_encoding(col_path, Encoding::DELTA_BINARY_PACKED);
107+
} => match physical_type {
108+
PhysicalType::BYTE_ARRAY | PhysicalType::FIXED_LEN_BYTE_ARRAY => {
109+
let ndv = stat
110+
.get(&table_field.column_id)
111+
.and_then(|stat| stat.distinct_of_values);
112+
let num_rows = block.num_rows();
113+
if let Some(ndv) = ndv {
114+
if num_rows as f64 / ndv as f64 > 10.0 {
115+
let col_path = ColumnPath::new(vec![table_field.name().clone()]);
116+
props = props.set_column_dictionary_enabled(col_path, true);
117+
}
118+
}
118119
}
119-
}
120+
_ => {}
121+
},
120122
Type::GroupType {
121123
basic_info: _,
122124
fields: _,
@@ -126,26 +128,7 @@ fn choose_compression_scheme(
126128
Ok(props)
127129
}
128130

129-
fn can_apply_dict_encoding(
130-
physical_type: &PhysicalType,
131-
distinct_of_values: Option<u64>,
132-
num_rows: usize,
133-
col: &BlockEntry,
134-
) -> Result<bool> {
135-
const LOW_CARDINALITY_THRESHOLD: f64 = 10.0;
136-
const AVG_BYTES_PER_VALUE: f64 = 10.0;
137-
if !matches!(physical_type, PhysicalType::BYTE_ARRAY) {
138-
return Ok(false);
139-
}
140-
let is_low_cardinality = distinct_of_values
141-
.is_some_and(|ndv| num_rows as f64 / ndv as f64 > LOW_CARDINALITY_THRESHOLD);
142-
let column = col.value.convert_to_full_column(&col.data_type, num_rows);
143-
let memory_size = column.memory_size();
144-
let total_bytes = memory_size - num_rows * 8;
145-
let avg_bytes_per_value = total_bytes as f64 / num_rows as f64;
146-
Ok(is_low_cardinality && avg_bytes_per_value < AVG_BYTES_PER_VALUE)
147-
}
148-
131+
#[allow(dead_code)]
149132
fn can_apply_delta_binary_pack(
150133
physical_type: &PhysicalType,
151134
col: &BlockEntry,

0 commit comments

Comments
 (0)