Skip to content

Commit 85c68b9

Browse files
authored
refactor: iceberg::spec::values::Struct to remove bitvec (#1203)
1 parent be65bdc commit 85c68b9

File tree

9 files changed

+38
-100
lines changed

9 files changed

+38
-100
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ jobs:
155155
cargo generate-lockfile -Z direct-minimal-versions -Z minimal-versions
156156
# Some dependencies don't correctly specify a minimal version for their dependencies and will fail to build.
157157
# So we update these transitive dependencies here.
158-
cargo update tap faststr metainfo linkedbytes
158+
cargo update faststr metainfo linkedbytes
159159
# munge 0.4.2 will cause fail to use 1.7.1 so specify the correct version here.
160160
cargo update -p munge --precise 0.4.1
161161
- name: Setup MSRV Rust toolchain

Cargo.lock

Lines changed: 1 addition & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ async-trait = "0.1.86"
5555
aws-config = "1"
5656
aws-sdk-glue = "1.39"
5757
bimap = "0.6"
58-
bitvec = "1.0.1"
5958
bytes = "1.6"
6059
chrono = "0.4.38"
6160
ctor = "0.2.8"

crates/iceberg/Cargo.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ storage-all = ["storage-memory", "storage-fs", "storage-s3", "storage-gcs"]
3535
storage-fs = ["opendal/services-fs"]
3636
storage-gcs = ["opendal/services-gcs"]
3737
storage-memory = ["opendal/services-memory"]
38-
storage-s3 = ["opendal/services-s3"]
3938
storage-oss = ["opendal/services-oss"]
39+
storage-s3 = ["opendal/services-s3"]
4040

4141
async-std = ["dep:async-std"]
4242
tokio = ["tokio/rt-multi-thread"]
@@ -56,7 +56,6 @@ arrow-string = { workspace = true }
5656
async-std = { workspace = true, optional = true, features = ["attributes"] }
5757
async-trait = { workspace = true }
5858
bimap = { workspace = true }
59-
bitvec = { workspace = true }
6059
bytes = { workspace = true }
6160
chrono = { workspace = true }
6261
derive_builder = { workspace = true }

crates/iceberg/src/arrow/schema.rs

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ use arrow_array::{
2828
Int64Array, PrimitiveArray, Scalar, StringArray, TimestampMicrosecondArray,
2929
};
3030
use arrow_schema::{DataType, Field, Fields, Schema as ArrowSchema, TimeUnit};
31-
use bitvec::macros::internal::funty::Fundamental;
3231
use num_bigint::BigInt;
3332
use parquet::arrow::PARQUET_FIELD_ID_META_KEY;
3433
use parquet::file::statistics::Statistics;
@@ -662,10 +661,10 @@ pub(crate) fn get_arrow_datum(datum: &Datum) -> Result<Box<dyn ArrowDatum + Send
662661
Ok(Box::new(Int64Array::new_scalar(*value)))
663662
}
664663
(PrimitiveType::Float, PrimitiveLiteral::Float(value)) => {
665-
Ok(Box::new(Float32Array::new_scalar(value.as_f32())))
664+
Ok(Box::new(Float32Array::new_scalar(value.to_f32().unwrap())))
666665
}
667666
(PrimitiveType::Double, PrimitiveLiteral::Double(value)) => {
668-
Ok(Box::new(Float64Array::new_scalar(value.as_f64())))
667+
Ok(Box::new(Float64Array::new_scalar(value.to_f64().unwrap())))
669668
}
670669
(PrimitiveType::String, PrimitiveLiteral::String(value)) => {
671670
Ok(Box::new(StringArray::new_scalar(value.as_str())))

crates/iceberg/src/expr/accessor.rs

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -58,20 +58,18 @@ impl StructAccessor {
5858

5959
pub(crate) fn get<'a>(&'a self, container: &'a Struct) -> Result<Option<Datum>> {
6060
match &self.inner {
61-
None => {
62-
if container.is_null_at_index(self.position) {
63-
Ok(None)
64-
} else if let Literal::Primitive(literal) = &container[self.position] {
61+
None => match &container[self.position] {
62+
None => Ok(None),
63+
Some(Literal::Primitive(literal)) => {
6564
Ok(Some(Datum::new(self.r#type().clone(), literal.clone())))
66-
} else {
67-
Err(Error::new(
68-
ErrorKind::Unexpected,
69-
"Expected Literal to be Primitive",
70-
))
7165
}
72-
}
66+
Some(_) => Err(Error::new(
67+
ErrorKind::Unexpected,
68+
"Expected Literal to be Primitive",
69+
)),
70+
},
7371
Some(inner) => {
74-
if let Literal::Struct(wrapped) = &container[self.position] {
72+
if let Some(Literal::Struct(wrapped)) = &container[self.position] {
7573
inner.get(wrapped)
7674
} else {
7775
Err(Error::new(

crates/iceberg/src/spec/partition.rs

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -163,11 +163,7 @@ impl PartitionSpec {
163163
.iter()
164164
.enumerate()
165165
.map(|(i, field)| {
166-
let value = if data.is_null_at_index(i) {
167-
None
168-
} else {
169-
Some(&data.fields()[i])
170-
};
166+
let value = data[i].as_ref();
171167
format!(
172168
"{}={}",
173169
field.name,

crates/iceberg/src/spec/values.rs

Lines changed: 10 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ use std::ops::Index;
2727
use std::str::FromStr;
2828

2929
pub use _serde::RawLiteral;
30-
use bitvec::vec::BitVec;
3130
use chrono::{DateTime, NaiveDate, NaiveDateTime, NaiveTime, TimeZone, Utc};
3231
use num_bigint::BigInt;
3332
use ordered_float::OrderedFloat;
@@ -1726,102 +1725,53 @@ impl Literal {
17261725
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
17271726
pub struct Struct {
17281727
/// Vector to store the field values
1729-
fields: Vec<Literal>,
1730-
/// Null bitmap
1731-
null_bitmap: BitVec,
1728+
fields: Vec<Option<Literal>>,
17321729
}
17331730

17341731
impl Struct {
17351732
/// Create a empty struct.
17361733
pub fn empty() -> Self {
1737-
Self {
1738-
fields: Vec::new(),
1739-
null_bitmap: BitVec::new(),
1740-
}
1734+
Self { fields: Vec::new() }
17411735
}
17421736

17431737
/// Create a iterator to read the field in order of field_value.
17441738
pub fn iter(&self) -> impl ExactSizeIterator<Item = Option<&Literal>> {
1745-
self.null_bitmap.iter().zip(self.fields.iter()).map(
1746-
|(null, value)| {
1747-
if *null {
1748-
None
1749-
} else {
1750-
Some(value)
1751-
}
1752-
},
1753-
)
1739+
self.fields.iter().map(|field| field.as_ref())
17541740
}
17551741

17561742
/// returns true if the field at position `index` is null
17571743
pub fn is_null_at_index(&self, index: usize) -> bool {
1758-
self.null_bitmap[index]
1744+
self.fields[index].is_none()
17591745
}
17601746

17611747
/// Return fields in the struct.
1762-
pub fn fields(&self) -> &[Literal] {
1748+
pub fn fields(&self) -> &[Option<Literal>] {
17631749
&self.fields
17641750
}
17651751
}
17661752

17671753
impl Index<usize> for Struct {
1768-
type Output = Literal;
1754+
type Output = Option<Literal>;
17691755

17701756
fn index(&self, idx: usize) -> &Self::Output {
17711757
&self.fields[idx]
17721758
}
17731759
}
17741760

1775-
/// An iterator that moves out of a struct.
1776-
pub struct StructValueIntoIter {
1777-
null_bitmap: bitvec::boxed::IntoIter,
1778-
fields: std::vec::IntoIter<Literal>,
1779-
}
1780-
1781-
impl Iterator for StructValueIntoIter {
1782-
type Item = Option<Literal>;
1783-
1784-
fn next(&mut self) -> Option<Self::Item> {
1785-
match (self.null_bitmap.next(), self.fields.next()) {
1786-
(Some(null), Some(value)) => Some(if null { None } else { Some(value) }),
1787-
_ => None,
1788-
}
1789-
}
1790-
}
1791-
17921761
impl IntoIterator for Struct {
17931762
type Item = Option<Literal>;
17941763

1795-
type IntoIter = StructValueIntoIter;
1764+
type IntoIter = std::vec::IntoIter<Option<Literal>>;
17961765

17971766
fn into_iter(self) -> Self::IntoIter {
1798-
StructValueIntoIter {
1799-
null_bitmap: self.null_bitmap.into_iter(),
1800-
fields: self.fields.into_iter(),
1801-
}
1767+
self.fields.into_iter()
18021768
}
18031769
}
18041770

18051771
impl FromIterator<Option<Literal>> for Struct {
18061772
fn from_iter<I: IntoIterator<Item = Option<Literal>>>(iter: I) -> Self {
1807-
let mut fields = Vec::new();
1808-
let mut null_bitmap = BitVec::new();
1809-
1810-
for value in iter.into_iter() {
1811-
match value {
1812-
Some(value) => {
1813-
fields.push(value);
1814-
null_bitmap.push(false)
1815-
}
1816-
None => {
1817-
fields.push(Literal::Primitive(PrimitiveLiteral::Boolean(false)));
1818-
null_bitmap.push(true)
1819-
}
1820-
}
1821-
}
18221773
Struct {
1823-
fields,
1824-
null_bitmap,
1774+
fields: iter.into_iter().collect(),
18251775
}
18261776
}
18271777
}
@@ -3402,7 +3352,7 @@ mod tests {
34023352
match (&desered_literal, &struct_literal) {
34033353
(Literal::Struct(desered), Literal::Struct(expected)) => {
34043354
match (&desered.fields[0], &expected.fields[0]) {
3405-
(Literal::Map(desered), Literal::Map(expected)) => {
3355+
(Some(Literal::Map(desered)), Some(Literal::Map(expected))) => {
34063356
assert!(desered.has_same_content(expected))
34073357
}
34083358
_ => {

crates/iceberg/src/transaction/snapshot.rs

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -105,21 +105,19 @@ impl<'a> SnapshotProduceAction<'a> {
105105
}
106106

107107
for (value, field) in partition_value.fields().iter().zip(partition_type.fields()) {
108-
if !field
109-
.field_type
110-
.as_primitive_type()
111-
.ok_or_else(|| {
112-
Error::new(
113-
ErrorKind::Unexpected,
114-
"Partition field should only be primitive type.",
115-
)
116-
})?
117-
.compatible(&value.as_primitive_literal().unwrap())
118-
{
119-
return Err(Error::new(
120-
ErrorKind::DataInvalid,
121-
"Partition value is not compatible partition type",
122-
));
108+
let field = field.field_type.as_primitive_type().ok_or_else(|| {
109+
Error::new(
110+
ErrorKind::Unexpected,
111+
"Partition field should only be primitive type.",
112+
)
113+
})?;
114+
if let Some(value) = value {
115+
if !field.compatible(&value.as_primitive_literal().unwrap()) {
116+
return Err(Error::new(
117+
ErrorKind::DataInvalid,
118+
"Partition value is not compatible partition type",
119+
));
120+
}
123121
}
124122
}
125123
Ok(())

0 commit comments

Comments
 (0)