Skip to content

Commit ba5872f

Browse files
authored
Exclude TACL migration in table migration integration tests (#3446)
## Changes Exclude TACL migration in table migration integration tests because these were not asserted, and to speed up the tests and reduce flakiness ### Linked issues Attempt to reduce flakiness blocking CI in #3239 Similar to #3437 in the sense that both PR scope integration tests to a smaller set of resources ### Tests - [x] modified integration tests
1 parent 22b3651 commit ba5872f

File tree

3 files changed

+125
-114
lines changed

3 files changed

+125
-114
lines changed

tests/integration/conftest.py

Lines changed: 47 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import json
2-
from collections.abc import Callable, Generator
32
import functools
43
import collections
54
import os
65
import logging
6+
import shutil
7+
import subprocess
8+
from collections.abc import Callable, Generator
79
from dataclasses import replace
810
from datetime import timedelta
911
from functools import cached_property
10-
import shutil
11-
import subprocess
12+
from typing import Literal
1213

1314
import pytest # pylint: disable=wrong-import-order
1415
from databricks.labs.blueprint.commands import CommandExecutor
@@ -1263,42 +1264,49 @@ def prepare_regular_tables(context, external_csv, schema) -> dict[str, TableInfo
12631264

12641265

12651266
@pytest.fixture
1266-
def prepare_tables_for_migration(
1267-
ws, installation_ctx, make_catalog, make_random, make_mounted_location, env_or_skip, make_storage_dir, request
1268-
) -> tuple[dict[str, TableInfo], SchemaInfo]:
1269-
# Here we use pytest indirect parametrization, so the test function can pass arguments to this fixture and the
1270-
# arguments will be available in the request.param. If the argument is "hiveserde", we will prepare hiveserde
1271-
# tables, otherwise we will prepare regular tables.
1272-
# see documents here for details https://docs.pytest.org/en/8.1.x/example/parametrize.html#indirect-parametrization
1273-
scenario = request.param
1274-
is_hiveserde = scenario == "hiveserde"
1275-
random = make_random(5).lower()
1276-
# create external and managed tables to be migrated
1277-
if scenario == "hiveserde":
1278-
schema = installation_ctx.make_schema(catalog_name="hive_metastore", name=f"hiveserde_in_place_{random}")
1279-
table_base_dir = make_storage_dir(
1280-
path=f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/hiveserde_in_place_{random}'
1281-
)
1282-
tables = prepare_hiveserde_tables(installation_ctx, random, schema, table_base_dir)
1283-
elif scenario == "managed":
1284-
schema_name = f"managed_{random}"
1285-
schema_location = f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/managed_{random}'
1286-
schema = installation_ctx.make_schema(catalog_name="hive_metastore", name=schema_name, location=schema_location)
1287-
tables = prepare_regular_tables(installation_ctx, make_mounted_location, schema)
1288-
elif scenario == "regular":
1289-
schema = installation_ctx.make_schema(catalog_name="hive_metastore", name=f"migrate_{random}")
1290-
tables = prepare_regular_tables(installation_ctx, make_mounted_location, schema)
1291-
1292-
# create destination catalog and schema
1293-
dst_catalog = make_catalog()
1294-
dst_schema = installation_ctx.make_schema(catalog_name=dst_catalog.name, name=schema.name)
1295-
migrate_rules = [Rule.from_src_dst(table, dst_schema) for _, table in tables.items()]
1296-
installation_ctx.with_table_mapping_rules(migrate_rules)
1297-
installation_ctx.with_dummy_resource_permission()
1298-
installation_ctx.save_tables(is_hiveserde=is_hiveserde)
1299-
installation_ctx.save_mounts()
1300-
installation_ctx.with_dummy_grants_and_tacls()
1301-
return tables, dst_schema
1267+
def make_table_migration_context(
1268+
env_or_skip,
1269+
make_random,
1270+
make_mounted_location,
1271+
make_storage_dir,
1272+
) -> Callable[
1273+
[Literal["hiveserde", "managed", "regular"], MockInstallationContext], tuple[dict[str, TableInfo], SchemaInfo]
1274+
]:
1275+
1276+
def prepare(
1277+
scenario: Literal["hiveserde", "managed", "regular"], ctx: MockInstallationContext
1278+
) -> tuple[dict[str, TableInfo], SchemaInfo]:
1279+
random = make_random(5).lower()
1280+
# create external and managed tables to be migrated
1281+
if scenario == "hiveserde":
1282+
schema = ctx.make_schema(catalog_name="hive_metastore", name=f"hiveserde_in_place_{random}")
1283+
table_base_dir = make_storage_dir(
1284+
path=f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/hiveserde_in_place_{random}'
1285+
)
1286+
tables = prepare_hiveserde_tables(ctx, random, schema, table_base_dir)
1287+
elif scenario == "managed":
1288+
schema_name = f"managed_{random}"
1289+
schema_location = f'dbfs:/mnt/{env_or_skip("TEST_MOUNT_NAME")}/a/managed_{random}'
1290+
schema = ctx.make_schema(catalog_name="hive_metastore", name=schema_name, location=schema_location)
1291+
tables = prepare_regular_tables(ctx, make_mounted_location, schema)
1292+
elif scenario == "regular":
1293+
schema = ctx.make_schema(catalog_name="hive_metastore", name=f"migrate_{random}")
1294+
tables = prepare_regular_tables(ctx, make_mounted_location, schema)
1295+
else:
1296+
raise ValueError(f"Unsupported scenario {scenario}")
1297+
1298+
# create destination catalog and schema
1299+
dst_catalog = ctx.make_catalog()
1300+
dst_schema = ctx.make_schema(catalog_name=dst_catalog.name, name=schema.name)
1301+
migrate_rules = [Rule.from_src_dst(table, dst_schema) for _, table in tables.items()]
1302+
ctx.with_table_mapping_rules(migrate_rules)
1303+
ctx.with_dummy_resource_permission()
1304+
ctx.save_tables(is_hiveserde=scenario == "hiveserde")
1305+
ctx.save_mounts()
1306+
ctx.with_dummy_grants_and_tacls()
1307+
return tables, dst_schema
1308+
1309+
return prepare
13021310

13031311

13041312
@pytest.fixture

tests/integration/hive_metastore/test_ext_hms.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ def sql_backend(ws, env_or_skip) -> SqlBackend:
2323

2424

2525
@retried(on=[NotFound, InvalidParameterValue], timeout=timedelta(minutes=5))
26-
@pytest.mark.parametrize('prepare_tables_for_migration', ['regular'], indirect=True)
27-
def test_migration_job_ext_hms(ws, installation_ctx, prepare_tables_for_migration, env_or_skip):
26+
def test_migration_job_ext_hms(ws, installation_ctx, make_table_migration_context, env_or_skip) -> None:
27+
tables, dst_schema = make_table_migration_context("regular", installation_ctx)
2828
ext_hms_cluster_id = env_or_skip("TEST_EXT_HMS_CLUSTER_ID")
29-
tables, dst_schema = prepare_tables_for_migration
3029
ext_hms_ctx = installation_ctx.replace(
3130
config_transform=lambda wc: dataclasses.replace(
3231
wc,
32+
skip_tacl_migration=True,
3333
override_clusters={
3434
"main": ext_hms_cluster_id,
3535
"user_isolation": ext_hms_cluster_id,
@@ -45,18 +45,21 @@ def test_migration_job_ext_hms(ws, installation_ctx, prepare_tables_for_migratio
4545
r"Choose a cluster policy": "0",
4646
},
4747
)
48-
4948
ext_hms_ctx.workspace_installation.run()
50-
ext_hms_ctx.deployed_workflows.run_workflow("migrate-tables")
49+
50+
ext_hms_ctx.deployed_workflows.run_workflow("migrate-tables", skip_job_wait=True)
51+
5152
# assert the workflow is successful
5253
assert ext_hms_ctx.deployed_workflows.validate_step("migrate-tables")
5354

5455
# assert the tables are migrated
56+
missing_tables = set[str]()
5557
for table in tables.values():
56-
try:
57-
assert ws.tables.get(f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}").name
58-
except NotFound:
59-
assert False, f"{table.name} not found in {dst_schema.catalog_name}.{dst_schema.name}"
58+
migrated_table_name = f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}"
59+
if not ext_hms_ctx.workspace_client.tables.exists(migrated_table_name):
60+
missing_tables.add(migrated_table_name)
61+
assert not missing_tables, f"Missing migrated tables: {missing_tables}"
62+
6063
# assert the cluster is configured correctly with ext hms
6164
install_state = ext_hms_ctx.installation.load(RawState)
6265
for job_cluster in ws.jobs.get(install_state.resources["jobs"]["migrate-tables"]).settings.job_clusters:
Lines changed: 66 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,43 @@
1+
import dataclasses
2+
from typing import Literal
3+
14
import pytest
2-
from databricks.sdk.errors import NotFound
5+
from databricks.labs.lsql.core import Row
36

47
from databricks.labs.ucx.framework.utils import escape_sql_identifier
58
from databricks.labs.ucx.hive_metastore.tables import Table
69

710

811
@pytest.mark.parametrize(
9-
"prepare_tables_for_migration,workflow",
12+
"scenario, workflow",
1013
[
1114
("regular", "migrate-tables"),
1215
("hiveserde", "migrate-external-hiveserde-tables-in-place-experimental"),
1316
("hiveserde", "migrate-external-tables-ctas"),
1417
],
15-
indirect=("prepare_tables_for_migration",),
1618
)
1719
def test_table_migration_job_refreshes_migration_status(
18-
ws,
1920
installation_ctx,
20-
prepare_tables_for_migration,
21-
workflow,
22-
):
21+
scenario: Literal["regular", "hiveserde"],
22+
workflow: str,
23+
make_table_migration_context,
24+
) -> None:
2325
"""The migration status should be refreshed after the migration job."""
24-
tables, _ = prepare_tables_for_migration
26+
tables, _ = make_table_migration_context(scenario, installation_ctx)
2527
ctx = installation_ctx.replace(
28+
config_transform=lambda wc: dataclasses.replace(
29+
wc,
30+
skip_tacl_migration=True,
31+
),
2632
extend_prompts={
2733
r".*Do you want to update the existing installation?.*": 'yes',
2834
},
2935
)
3036

3137
ctx.workspace_installation.run()
32-
ctx.deployed_workflows.run_workflow(workflow)
38+
ctx.deployed_workflows.run_workflow(workflow, skip_job_wait=True)
39+
40+
assert ctx.deployed_workflows.validate_step(workflow)
3341

3442
# Avoiding MigrationStatusRefresh as it will refresh the status before fetching
3543
migration_status_query = f"SELECT * FROM {ctx.config.inventory_database}.migration_status"
@@ -62,85 +70,76 @@ def test_table_migration_job_refreshes_migration_status(
6270
assert len(asserts) == 0, assert_message
6371

6472

65-
@pytest.mark.parametrize(
66-
"prepare_tables_for_migration,workflow",
67-
[
68-
("managed", "migrate-tables"),
69-
],
70-
indirect=("prepare_tables_for_migration",),
71-
)
72-
def test_table_migration_for_managed_table(ws, installation_ctx, prepare_tables_for_migration, workflow, sql_backend):
73-
# This test cases test the CONVERT_TO_EXTERNAL scenario.
74-
tables, dst_schema = prepare_tables_for_migration
73+
def test_table_migration_convert_manged_to_external(installation_ctx, make_table_migration_context) -> None:
74+
tables, dst_schema = make_table_migration_context("managed", installation_ctx)
7575
ctx = installation_ctx.replace(
76+
config_transform=lambda wc: dataclasses.replace(
77+
wc,
78+
skip_tacl_migration=True,
79+
),
7680
extend_prompts={
7781
r"If hive_metastore contains managed table with external.*": "0",
7882
r".*Do you want to update the existing installation?.*": 'yes',
7983
},
8084
)
8185

8286
ctx.workspace_installation.run()
83-
ctx.deployed_workflows.run_workflow(workflow)
87+
ctx.deployed_workflows.run_workflow("migrate-tables", skip_job_wait=True)
88+
89+
assert ctx.deployed_workflows.validate_step("migrate-tables")
8490

91+
missing_tables = set[str]()
8592
for table in tables.values():
86-
try:
87-
assert ws.tables.get(f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}").name
88-
except NotFound:
89-
assert False, f"{table.name} not found in {dst_schema.catalog_name}.{dst_schema.name}"
90-
managed_table = tables["src_managed_table"]
93+
migrated_table_name = f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}"
94+
if not ctx.workspace_client.tables.exists(migrated_table_name):
95+
missing_tables.add(migrated_table_name)
96+
assert not missing_tables, f"Missing migrated tables: {missing_tables}"
9197

92-
for key, value, _ in sql_backend.fetch(f"DESCRIBE TABLE EXTENDED {escape_sql_identifier(managed_table.full_name)}"):
98+
managed_table = tables["src_managed_table"]
99+
for key, value, _ in ctx.sql_backend.fetch(
100+
f"DESCRIBE TABLE EXTENDED {escape_sql_identifier(managed_table.full_name)}"
101+
):
93102
if key == "Type":
94103
assert value == "EXTERNAL"
95104
break
96105

97106

98-
@pytest.mark.parametrize('prepare_tables_for_migration', [('hiveserde')], indirect=True)
99-
def test_hiveserde_table_in_place_migration_job(ws, installation_ctx, prepare_tables_for_migration):
100-
tables, dst_schema = prepare_tables_for_migration
107+
@pytest.mark.parametrize(
108+
"workflow", ["migrate-external-hiveserde-tables-in-place-experimental", "migrate-external-tables-ctas"]
109+
)
110+
def test_hiveserde_table_in_place_migration_job(installation_ctx, make_table_migration_context, workflow) -> None:
111+
tables, dst_schema = make_table_migration_context("hiveserde", installation_ctx)
101112
ctx = installation_ctx.replace(
113+
config_transform=lambda wc: dataclasses.replace(
114+
wc,
115+
skip_tacl_migration=True,
116+
),
102117
extend_prompts={
103118
r".*Do you want to update the existing installation?.*": 'yes',
104119
},
105120
)
106121
ctx.workspace_installation.run()
107-
ctx.deployed_workflows.run_workflow("migrate-external-hiveserde-tables-in-place-experimental")
108-
# assert the workflow is successful
109-
assert ctx.deployed_workflows.validate_step("migrate-external-hiveserde-tables-in-place-experimental")
110-
# assert the tables are migrated
122+
123+
ctx.deployed_workflows.run_workflow(workflow, skip_job_wait=True)
124+
125+
assert installation_ctx.deployed_workflows.validate_step(workflow), f"Workflow failed: {workflow}"
126+
missing_tables = set[str]()
111127
for table in tables.values():
112-
try:
113-
assert ws.tables.get(f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}").name
114-
except NotFound:
115-
assert False, f"{table.name} not found in {dst_schema.catalog_name}.{dst_schema.name}"
128+
migrated_table_name = f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}"
129+
if not ctx.workspace_client.tables.exists(migrated_table_name):
130+
missing_tables.add(migrated_table_name)
131+
assert not missing_tables, f"Missing migrated tables: {missing_tables}"
116132

117133

118-
@pytest.mark.parametrize('prepare_tables_for_migration', [('hiveserde')], indirect=True)
119-
def test_hiveserde_table_ctas_migration_job(ws, installation_ctx, prepare_tables_for_migration):
120-
tables, dst_schema = prepare_tables_for_migration
134+
def test_table_migration_job_publishes_remaining_tables(installation_ctx, make_table_migration_context) -> None:
135+
tables, dst_schema = make_table_migration_context("regular", installation_ctx)
121136
ctx = installation_ctx.replace(
122-
extend_prompts={
123-
r".*Do you want to update the existing installation?.*": 'yes',
124-
},
137+
config_transform=lambda wc: dataclasses.replace(
138+
wc,
139+
skip_tacl_migration=True,
140+
),
125141
)
126142
ctx.workspace_installation.run()
127-
ctx.deployed_workflows.run_workflow("migrate-external-tables-ctas")
128-
# assert the workflow is successful
129-
assert ctx.deployed_workflows.validate_step("migrate-external-tables-ctas")
130-
# assert the tables are migrated
131-
for table in tables.values():
132-
try:
133-
assert ws.tables.get(f"{dst_schema.catalog_name}.{dst_schema.name}.{table.name}").name
134-
except NotFound:
135-
assert False, f"{table.name} not found in {dst_schema.catalog_name}.{dst_schema.name}"
136-
137-
138-
@pytest.mark.parametrize('prepare_tables_for_migration', ['regular'], indirect=True)
139-
def test_table_migration_job_publishes_remaining_tables(
140-
ws, installation_ctx, sql_backend, prepare_tables_for_migration, caplog
141-
):
142-
tables, dst_schema = prepare_tables_for_migration
143-
installation_ctx.workspace_installation.run()
144143
second_table = list(tables.values())[1]
145144
table = Table(
146145
"hive_metastore",
@@ -149,19 +148,20 @@ def test_table_migration_job_publishes_remaining_tables(
149148
object_type="UNKNOWN",
150149
table_format="UNKNOWN",
151150
)
152-
installation_ctx.table_mapping.skip_table_or_view(dst_schema.name, second_table.name, load_table=lambda *_: table)
153-
installation_ctx.deployed_workflows.run_workflow("migrate-tables")
154-
assert installation_ctx.deployed_workflows.validate_step("migrate-tables")
151+
ctx.table_mapping.skip_table_or_view(dst_schema.name, second_table.name, load_table=lambda *_: table)
152+
153+
ctx.deployed_workflows.run_workflow("migrate-tables", skip_job_wait=True)
155154

155+
assert ctx.deployed_workflows.validate_step("migrate-tables")
156156
remaining_tables = list(
157-
sql_backend.fetch(
157+
ctx.sql_backend.fetch(
158158
f"""
159159
SELECT
160160
SUBSTRING(message, LENGTH('remained-hive-metastore-table: ') + 1)
161161
AS message
162-
FROM {installation_ctx.inventory_database}.logs
162+
FROM {ctx.inventory_database}.logs
163163
WHERE message LIKE 'remained-hive-metastore-table: %'
164164
"""
165165
)
166166
)
167-
assert remaining_tables[0].message == f'hive_metastore.{dst_schema.name}.{second_table.name}'
167+
assert remaining_tables == [Row(message=f"hive_metastore.{dst_schema.name}.{second_table.name}")]

0 commit comments

Comments
 (0)