Skip to content

Commit df07537

Browse files
authored
fix: incorrect storage descriptor when creating tables in GlueCatalog (#1429)
## Which issue does this PR close? Fixes two bugs in the `GlueCatalogs` table creation that caused inconsistency with Iceberg core and PyIceberg implementations: * `StorageDescriptor.Location` incorrectly set to metadata file path instead of table base location * `iceberg.field.optional` parameter logic inverted - was setting it to field.required instead of !field.required Iceberg ref: * location: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java#L280 * field.optional: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/glue/IcebergToGlueConverter.java#L377 ## What changes are included in this PR? ### Location #### Before: ``` { "StorageDescriptor": { "Location": "s3://bucket/table/metadata/00000-uuid.metadata.json" }, "Parameters": { "metadata_location": "s3://bucket/table/metadata/00000-uuid.metadata.json" } } ``` #### After: ``` { "StorageDescriptor": { "Location": "s3://bucket/table" }, "Parameters": { "metadata_location": "s3://bucket/table/metadata/00000-uuid.metadata.json" } } ``` ### Schema (with required field) #### Rust Schema ``` NestedField::required(1, "foo", Type::Primitive(PrimitiveType::String)).into(), ``` #### Before: ``` { "Name": "foo", "Type": "string", "Parameters": { "iceberg.field.current": "true", "iceberg.field.id": "2", "iceberg.field.optional": "true" <--- } } ``` #### After: ``` { "Name": "foo", "Type": "string", "Parameters": { "iceberg.field.current": "true", "iceberg.field.id": "2", "iceberg.field.optional": "false" <--- } } ``` ## Are these changes tested? * Repaired the existing test * Tested manually with GlueCatalog.
1 parent 59948fc commit df07537

File tree

2 files changed

+62
-19
lines changed

2 files changed

+62
-19
lines changed

crates/catalog/glue/src/schema.rs

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ impl SchemaVisitor for GlueSchemaBuilder {
118118
(ICEBERG_FIELD_ID.to_string(), format!("{}", field.id)),
119119
(
120120
ICEBERG_FIELD_OPTIONAL.to_string(),
121-
format!("{}", field.required).to_lowercase(),
121+
format!("{}", !field.required).to_lowercase(),
122122
),
123123
(
124124
ICEBERG_FIELD_CURRENT.to_string(),
@@ -209,10 +209,11 @@ mod tests {
209209
name: impl Into<String>,
210210
r#type: impl Into<String>,
211211
id: impl Into<String>,
212+
optional: bool,
212213
) -> Result<Column> {
213214
let parameters = HashMap::from([
214215
(ICEBERG_FIELD_ID.to_string(), id.into()),
215-
(ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()),
216+
(ICEBERG_FIELD_OPTIONAL.to_string(), optional.to_string()),
216217
(ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()),
217218
]);
218219

@@ -318,19 +319,19 @@ mod tests {
318319
let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
319320

320321
let expected = vec![
321-
create_column("c1", "boolean", "1")?,
322-
create_column("c2", "int", "2")?,
323-
create_column("c3", "bigint", "3")?,
324-
create_column("c4", "float", "4")?,
325-
create_column("c5", "double", "5")?,
326-
create_column("c6", "decimal(2,2)", "6")?,
327-
create_column("c7", "date", "7")?,
328-
create_column("c8", "string", "8")?,
329-
create_column("c9", "timestamp", "9")?,
330-
create_column("c10", "string", "10")?,
331-
create_column("c11", "string", "11")?,
332-
create_column("c12", "binary", "12")?,
333-
create_column("c13", "binary", "13")?,
322+
create_column("c1", "boolean", "1", false)?,
323+
create_column("c2", "int", "2", false)?,
324+
create_column("c3", "bigint", "3", false)?,
325+
create_column("c4", "float", "4", false)?,
326+
create_column("c5", "double", "5", false)?,
327+
create_column("c6", "decimal(2,2)", "6", false)?,
328+
create_column("c7", "date", "7", false)?,
329+
create_column("c8", "string", "8", false)?,
330+
create_column("c9", "timestamp", "9", false)?,
331+
create_column("c10", "string", "10", false)?,
332+
create_column("c11", "string", "11", false)?,
333+
create_column("c12", "binary", "12", false)?,
334+
create_column("c13", "binary", "13", false)?,
334335
];
335336

336337
assert_eq!(result, expected);
@@ -378,6 +379,7 @@ mod tests {
378379
"person",
379380
"struct<name:string, age:int>",
380381
"1",
382+
false,
381383
)?];
382384

383385
assert_eq!(result, expected);
@@ -432,6 +434,7 @@ mod tests {
432434
"location",
433435
"array<struct<latitude:float, longitude:float>>",
434436
"1",
437+
false,
435438
)?];
436439

437440
assert_eq!(result, expected);
@@ -475,10 +478,50 @@ mod tests {
475478

476479
let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
477480

478-
let expected = vec![create_column("quux", "map<string,map<string,int>>", "1")?];
481+
let expected = vec![create_column(
482+
"quux",
483+
"map<string,map<string,int>>",
484+
"1",
485+
false,
486+
)?];
479487

480488
assert_eq!(result, expected);
481489

482490
Ok(())
483491
}
492+
493+
#[test]
494+
fn test_schema_with_optional_fields() -> Result<()> {
495+
let record = r#"{
496+
"type": "struct",
497+
"schema-id": 1,
498+
"fields": [
499+
{
500+
"id": 1,
501+
"name": "required_field",
502+
"required": true,
503+
"type": "string"
504+
},
505+
{
506+
"id": 2,
507+
"name": "optional_field",
508+
"required": false,
509+
"type": "int"
510+
}
511+
]
512+
}"#;
513+
514+
let schema = serde_json::from_str::<Schema>(record)?;
515+
let metadata = create_metadata(schema)?;
516+
517+
let result = GlueSchemaBuilder::from_iceberg(&metadata)?.build();
518+
519+
let expected = vec![
520+
create_column("required_field", "string", "1", false)?,
521+
create_column("optional_field", "int", "2", true)?,
522+
];
523+
524+
assert_eq!(result, expected);
525+
Ok(())
526+
}
484527
}

crates/catalog/glue/src/utils.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ pub(crate) fn convert_to_glue_table(
151151

152152
let storage_descriptor = StorageDescriptor::builder()
153153
.set_columns(Some(glue_schema))
154-
.location(&metadata_location)
154+
.location(metadata.location().to_string())
155155
.build();
156156

157157
let mut parameters = HashMap::from([
@@ -345,7 +345,7 @@ mod tests {
345345

346346
let parameters = HashMap::from([
347347
(ICEBERG_FIELD_ID.to_string(), "1".to_string()),
348-
(ICEBERG_FIELD_OPTIONAL.to_string(), "true".to_string()),
348+
(ICEBERG_FIELD_OPTIONAL.to_string(), "false".to_string()),
349349
(ICEBERG_FIELD_CURRENT.to_string(), "true".to_string()),
350350
]);
351351

@@ -359,7 +359,7 @@ mod tests {
359359

360360
let storage_descriptor = StorageDescriptor::builder()
361361
.set_columns(Some(vec![column]))
362-
.location(&metadata_location)
362+
.location(metadata.location())
363363
.build();
364364

365365
let result =

0 commit comments

Comments
 (0)