Skip to content

Commit e93dd04

Browse files
authored
fix: fix location generator (#1479)
## Which issue does this PR close? - It seems we enforce that the `write.data.path` must be a subdirectory of the table location, but it is unnecessary. I met this issue when integrating Databricks's managed iceberg table. - Closes #. ## What changes are included in this PR? - Use `write.data.path` directly if provided. ## Are these changes tested? <!-- Specify what test covers (unit test, integration test, etc.). If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? -->
1 parent ec92bc5 commit e93dd04

File tree

1 file changed

+15
-26
lines changed

1 file changed

+15
-26
lines changed

crates/iceberg/src/writer/file_writer/location_generator.rs

Lines changed: 15 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@
2020
use std::sync::Arc;
2121
use std::sync::atomic::AtomicU64;
2222

23+
use crate::Result;
2324
use crate::spec::{DataFileFormat, TableMetadata};
24-
use crate::{Error, ErrorKind, Result};
2525

2626
/// `LocationGenerator` used to generate the location of data file.
2727
pub trait LocationGenerator: Clone + Send + 'static {
@@ -46,29 +46,16 @@ impl DefaultLocationGenerator {
4646
/// Create a new `DefaultLocationGenerator`.
4747
pub fn new(table_metadata: TableMetadata) -> Result<Self> {
4848
let table_location = table_metadata.location();
49-
let rel_dir_path = {
50-
let prop = table_metadata.properties();
51-
let data_location = prop
52-
.get(WRITE_DATA_LOCATION)
53-
.or(prop.get(WRITE_FOLDER_STORAGE_LOCATION));
54-
if let Some(data_location) = data_location {
55-
data_location.strip_prefix(table_location).ok_or_else(|| {
56-
Error::new(
57-
ErrorKind::DataInvalid,
58-
format!(
59-
"data location {} is not a subpath of table location {}",
60-
data_location, table_location
61-
),
62-
)
63-
})?
64-
} else {
65-
DEFAULT_DATA_DIR
66-
}
49+
let prop = table_metadata.properties();
50+
let data_location = prop
51+
.get(WRITE_DATA_LOCATION)
52+
.or(prop.get(WRITE_FOLDER_STORAGE_LOCATION));
53+
let dir_path = if let Some(data_location) = data_location {
54+
data_location.clone()
55+
} else {
56+
format!("{}{}", table_location, DEFAULT_DATA_DIR)
6757
};
68-
69-
Ok(Self {
70-
dir_path: format!("{}{}", table_location, rel_dir_path),
71-
})
58+
Ok(Self { dir_path })
7259
}
7360
}
7461

@@ -222,13 +209,15 @@ pub(crate) mod test {
222209
"s3://data.db/table/data_2/part-00002-test.parquet"
223210
);
224211

225-
// test invalid data location
226212
table_metadata.properties.insert(
227213
WRITE_DATA_LOCATION.to_string(),
228214
// invalid table location
229215
"s3://data.db/data_3".to_string(),
230216
);
231-
let location_generator = super::DefaultLocationGenerator::new(table_metadata.clone());
232-
assert!(location_generator.is_err());
217+
let location_generator =
218+
super::DefaultLocationGenerator::new(table_metadata.clone()).unwrap();
219+
let location =
220+
location_generator.generate_location(&file_name_genertaor.generate_file_name());
221+
assert_eq!(location, "s3://data.db/data_3/part-00003-test.parquet");
233222
}
234223
}

0 commit comments

Comments
 (0)