Skip to content

Commit 35a0309

Browse files
authored
Fixed issue with table location on storage root (#3094)
closes #3091
1 parent afed563 commit 35a0309

File tree

2 files changed

+15
-2
lines changed

2 files changed

+15
-2
lines changed

src/databricks/labs/ucx/hive_metastore/locations.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,15 @@ def _external_locations(self) -> Iterable[ExternalLocation]:
178178
curr = queue.pop()
179179
num_children = len(curr.children) # 0 - take parent
180180
if curr.location and (num_children > 1 or num_children == 0):
181-
if curr.parent and num_children == 0 and not curr.is_jdbc(): # one table having the prefix
181+
# Checking if the parent location is a valid location for external location.
182+
# If the table location is a leaf location (a foldermore than 2 levels from the root)
183+
# the parent folder will be considered as external location.
184+
# If the table location is a root location (a folder at the root level) it will be considered as
185+
# external location.
186+
187+
if (
188+
curr.parent and curr.parent.is_valid() and num_children == 0 and not curr.is_jdbc()
189+
): # one table having the prefix
182190
curr = curr.parent
183191
assert curr.location is not None
184192
external_location = ExternalLocation(curr.location, len(list(curr.all_tables())))

tests/unit/hive_metastore/test_locations.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
"s3://databricks-e2demofieldengwest/b169/b50",
2626
"s3a://databricks-datasets-oregon/delta-sharing/share/open-datasets.share",
2727
"s3n://bucket-name/path-to-file-in-bucket",
28+
"s3n://bucket-name/path-to-another-file",
29+
"s3n://bucket-name",
2830
"gcs://test_location2/test2/table2",
2931
"abfss://cont1@storagetest1.dfs.core.windows.net/test2/table3",
3032
],
@@ -164,8 +166,10 @@ def test_external_locations():
164166
tables_crawler.snapshot.return_value = [
165167
table_factory(["s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/Location/Table", ""]),
166168
table_factory(["s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/Location/Table2", ""]),
169+
table_factory(["s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/Location2/Table", ""]),
167170
table_factory(["s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-23/testloc/Table3", ""]),
168171
table_factory(["s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-23/anotherloc/Table4", ""]),
172+
table_factory(["s3://root_location", ""]),
169173
table_factory(["gcs://test_location2/a/b/table2", ""]),
170174
table_factory(["dbfs:/mnt/ucx/database1/table1", ""]),
171175
table_factory(["/dbfs/mnt/ucx/database2/table2", ""]),
@@ -221,7 +225,8 @@ def test_external_locations():
221225
ExternalLocation('jdbc:mysql://somemysql.us-east-1.rds.amazonaws.com:3306/test_db', 1),
222226
ExternalLocation('jdbc:providerknown://somedb.us-east-1.rds.amazonaws.com:1234/test_db', 2),
223227
ExternalLocation('jdbc:providerunknown://somedb.us-east-1.rds.amazonaws.com:1234/test_db', 1),
224-
ExternalLocation('s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1/Location', 2),
228+
ExternalLocation('s3://root_location', 1),
229+
ExternalLocation('s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-1', 3),
225230
ExternalLocation('s3://us-east-1-dev-account-staging-uc-ext-loc-bucket-23', 2),
226231
ExternalLocation('s3://us-east-1-ucx-container', 2),
227232
]

0 commit comments

Comments
 (0)