Skip to content

Commit 16bb978

Browse files
chenzl25xxchan
authored andcommitted
feat: expose data file serialized (#26)
* expose data file serialized * fix
1 parent fa6caa9 commit 16bb978

File tree

3 files changed

+28
-19
lines changed

3 files changed

+28
-19
lines changed

crates/iceberg/src/spec/manifest/_serde.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,8 @@ impl ManifestEntryV1 {
9898

9999
#[serde_as]
100100
#[derive(Serialize, Deserialize)]
101-
pub(super) struct DataFileSerde {
101+
/// Data file
102+
pub struct DataFileSerde {
102103
#[serde(default)]
103104
content: i32,
104105
file_path: String,
@@ -122,6 +123,7 @@ pub(super) struct DataFileSerde {
122123
}
123124

124125
impl DataFileSerde {
126+
/// Try to convert a `super::DataFile` to a `DataFileSerde`.
125127
pub fn try_from(
126128
value: super::DataFile,
127129
partition_type: &StructType,
@@ -152,6 +154,7 @@ impl DataFileSerde {
152154
})
153155
}
154156

157+
/// Try to convert a `DataFileSerde` to a `super::DataFile`.
155158
pub fn try_into(
156159
self,
157160
partition_spec_id: i32,
@@ -220,7 +223,7 @@ impl DataFileSerde {
220223
}
221224

222225
#[serde_as]
223-
#[derive(Serialize, Deserialize)]
226+
#[derive(Serialize, Deserialize, Clone)]
224227
#[cfg_attr(test, derive(Debug, PartialEq, Eq))]
225228
struct BytesEntry {
226229
key: i32,
@@ -261,7 +264,7 @@ fn to_bytes_entry(v: impl IntoIterator<Item = (i32, Datum)>) -> Result<Vec<Bytes
261264
Ok(bs)
262265
}
263266

264-
#[derive(Serialize, Deserialize)]
267+
#[derive(Serialize, Deserialize, Clone)]
265268
#[cfg_attr(test, derive(Debug, PartialEq, Eq))]
266269
struct I64Entry {
267270
key: i32,

crates/iceberg/src/spec/manifest/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
// under the License.
1717

1818
mod _serde;
19+
/// Data file
20+
pub use _serde::DataFileSerde as SerializedDataFile;
1921

2022
mod data_file;
2123
pub use data_file::*;

crates/iceberg/src/spec/values.rs

Lines changed: 20 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2231,6 +2231,8 @@ mod timestamptz {
22312231
}
22322232

22332233
mod _serde {
2234+
use std::collections::HashMap;
2235+
22342236
use serde::de::Visitor;
22352237
use serde::ser::{SerializeMap, SerializeSeq, SerializeStruct};
22362238
use serde::{Deserialize, Serialize};
@@ -2241,7 +2243,7 @@ mod _serde {
22412243
use crate::spec::{PrimitiveType, Type, MAP_KEY_FIELD_NAME, MAP_VALUE_FIELD_NAME};
22422244
use crate::{Error, ErrorKind};
22432245

2244-
#[derive(SerializeDerive, DeserializeDerive, Debug)]
2246+
#[derive(SerializeDerive, DeserializeDerive, Debug, Clone)]
22452247
#[serde(transparent)]
22462248
/// Raw literal representation used for serde. The serialize way is used for Avro serializer.
22472249
pub struct RawLiteral(RawLiteralEnum);
@@ -2826,22 +2828,24 @@ mod _serde {
28262828
optional: _,
28272829
}) => match ty {
28282830
Type::Struct(struct_ty) => {
2829-
let iters: Vec<Option<Literal>> = required
2830-
.into_iter()
2831-
.map(|(field_name, value)| {
2832-
let field = struct_ty
2833-
.field_by_name(field_name.as_str())
2834-
.ok_or_else(|| {
2835-
invalid_err_with_reason(
2836-
"record",
2837-
&format!("field {} is not exist", &field_name),
2838-
)
2839-
})?;
2840-
let value = value.try_into(&field.field_type)?;
2841-
Ok(value)
2831+
let mut value_map: HashMap<String, RawLiteralEnum> =
2832+
required.into_iter().collect();
2833+
let values = struct_ty
2834+
.fields()
2835+
.iter()
2836+
.map(|f| {
2837+
if let Some(raw_value) = value_map.remove(&f.name) {
2838+
let value = raw_value.try_into(&f.field_type)?;
2839+
Ok(value)
2840+
} else {
2841+
Err(invalid_err_with_reason(
2842+
"record",
2843+
&format!("field {} is not exist", &f.name),
2844+
))
2845+
}
28422846
})
2843-
.collect::<Result<_, Error>>()?;
2844-
Ok(Some(Literal::Struct(super::Struct::from_iter(iters))))
2847+
.collect::<Result<Vec<_>, Error>>()?;
2848+
Ok(Some(Literal::Struct(super::Struct::from_iter(values))))
28452849
}
28462850
Type::Map(map_ty) => {
28472851
if *map_ty.key_field.field_type != Type::Primitive(PrimitiveType::String) {

0 commit comments

Comments
 (0)