Skip to content

Commit cff3d2b

Browse files
committed
Expose Avro parsers in Python
1 parent 0260aa4 commit cff3d2b

File tree

4 files changed

+28
-23
lines changed

4 files changed

+28
-23
lines changed

bindings/python/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ classifiers = [
3333
name = "pyiceberg-core"
3434
readme = "project-description.md"
3535
requires-python = "~=3.9"
36-
version = "0.22123123.0"
36+
version = "0.4.0"
3737

3838
[tool.maturin]
3939
features = ["pyo3/extension-module"]

bindings/python/src/manifest.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use iceberg::spec::{DataContentType, DataFile, DataFileFormat, FieldSummary, FormatVersion, Literal, Manifest, ManifestContentType, ManifestEntry, ManifestFile, ManifestList, ManifestStatus, PrimitiveLiteral, StructType};
18+
use iceberg::spec::{DataContentType, DataFile, DataFileFormat, FieldSummary, FormatVersion, Literal, Manifest, ManifestContentType, ManifestEntry, ManifestFile, ManifestList, ManifestStatus, PrimitiveLiteral, StructType, Type};
1919
use pyo3::prelude::*;
2020
use std::collections::HashMap;
2121
use std::sync::Arc;
2222
use pyo3::types::PyAny;
23-
use iceberg::Error;
24-
use iceberg::ErrorKind;
23+
use iceberg::{Error, ErrorKind};
24+
2525
#[pyclass]
2626
pub struct PyLiteral {
2727
inner: Literal,
@@ -66,6 +66,7 @@ impl PyDataFile {
6666
DataFileFormat::Avro => "avro",
6767
DataFileFormat::Orc => "orc",
6868
DataFileFormat::Parquet => "parquet",
69+
DataFileFormat::Puffin => "puffin",
6970
}
7071
}
7172

@@ -383,12 +384,21 @@ pub fn read_manifest_list(bs: &[u8], cb: &PartitionSpecProviderCallbackHolder) -
383384
let provider = move |_id| {
384385
let bound = cb.do_the_callback(_id).unwrap();
385386
let json = bound.as_str();
386-
serde_json::from_str::<StructType>(json).map_err(|_|{
387-
Error::new(
387+
388+
// I don't fully comprehend the deserializer here,
389+
// it works for a Type, but not for a StructType
390+
// So I had to do some awkward stuff to make it work
391+
let res: Result<Type, _> = serde_json::from_str(json);
392+
393+
let result: Result<Option<StructType>, Error> = match res {
394+
Ok(Type::Struct(s)) => Ok(Some(s)),
395+
_ => Err(Error::new(
388396
ErrorKind::DataInvalid,
389397
format!("Invalid JSON: {}", json),
390-
)
391-
}).map(|v|Some(v))
398+
))
399+
};
400+
401+
result
392402
};
393403

394404
PyManifestList {

crates/iceberg/src/spec/datatypes.rs

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -942,21 +942,6 @@ mod tests {
942942
)
943943
}
944944

945-
#[test]
946-
fn struct_type_empty() {
947-
let record = "{\"type\":\"struct\",\"fields\":[]}";
948-
949-
check_type_serde(
950-
record,
951-
Type::Struct(StructType {
952-
fields: vec![],
953-
id_lookup: OnceLock::default(),
954-
name_lookup: OnceLock::default(),
955-
}),
956-
)
957-
}
958-
959-
960945
#[test]
961946
fn struct_type() {
962947
let record = r#"

crates/iceberg/src/spec/manifest_list.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,16 @@ pub(super) mod _serde {
883883
if let Some(partitions) = partitions {
884884
if let Some(partition_type) = partition_type {
885885
let partition_types = partition_type.fields();
886+
if partitions.len() != partition_types.len() {
887+
return Err(Error::new(
888+
crate::ErrorKind::DataInvalid,
889+
format!(
890+
"Invalid partition spec. Expected {} fields, got {}",
891+
partition_types.len(),
892+
partitions.len()
893+
),
894+
));
895+
}
886896
partitions
887897
.into_iter()
888898
.zip(partition_types)

0 commit comments

Comments
 (0)