-
Notifications
You must be signed in to change notification settings - Fork 96
Refactor refreshing of migration-status information for tables, eliminate another redundant refresh. #3270
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor refreshing of migration-status information for tables, eliminate another redundant refresh. #3270
Changes from 14 commits
3b496e0
c78d552
7b57606
a8d0b79
e7eadae
f2aa76e
0bfe3d6
e0e3e18
bdfa3c1
a15c382
c15cb67
f40c6bb
1940376
ba55951
27662b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,7 @@ | |
|
||
import pytest | ||
from databricks.labs.ucx.hive_metastore import TablesCrawler | ||
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatusRefresher | ||
from databricks.labs.ucx.progress.history import ProgressEncoder | ||
from databricks.sdk import WorkspaceClient | ||
from databricks.sdk.service.catalog import CatalogInfo, MetastoreAssignment | ||
|
@@ -47,22 +48,36 @@ def test_migration_progress_runtime_refresh(run_workflow, task, crawler, history | |
def test_migration_progress_runtime_tables_refresh(run_workflow) -> None: | ||
"""Ensure that the split crawl and update-history-log tasks perform their part of the refresh process.""" | ||
mock_tables_crawler = create_autospec(TablesCrawler) | ||
mock_migration_status_refresher = create_autospec(TableMigrationStatusRefresher) | ||
mock_history_log = create_autospec(ProgressEncoder) | ||
context_replacements = { | ||
"tables_crawler": mock_tables_crawler, | ||
"migration_status_refresher": mock_migration_status_refresher, | ||
"tables_progress": mock_history_log, | ||
"named_parameters": {"parent_run_id": 53}, | ||
} | ||
|
||
# The first part of a 2-step update: the crawl without updating the history log. | ||
# The first part of a 3-step update: the table crawl without updating the history log. | ||
run_workflow(MigrationProgress.crawl_tables, **context_replacements) | ||
mock_tables_crawler.snapshot.assert_called_once_with(force_refresh=True) | ||
mock_tables_crawler.snapshot.reset_mock() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Did not know this, nice! Do you prefer a reset over separate tests? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, I would normally say we should never reset mocks. However in this test we are checking a sequence of operations that need to work together, and resetting allows the test to verify the interactions are happening at the correct point (rather than accumulating). This could be split into a sequence of tests (one for each task), but grouping them here in this way emphasises that the tasks are intended to run in a particular sequence. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just realised that the test did nothing to verify the task ordering, so I've added that in. With that in mind, what would you prefer: like this, or split into separate unit tests? I'm fine with either approach… There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Generally I prefer separate unit tests over big unit tests |
||
mock_history_log.append_inventory_snapshot.assert_not_called() | ||
|
||
mock_tables_crawler.snapshot.reset_mock() | ||
# The second part of the 2-step update: updating the history log (without a forced crawl). | ||
# The second part of a 3-step update: updating table migration status without updating the history log. | ||
task_dependencies = getattr(MigrationProgress.refresh_table_migration_status, "__task__").depends_on | ||
assert MigrationProgress.crawl_tables.__name__ in task_dependencies | ||
run_workflow(MigrationProgress.refresh_table_migration_status, **context_replacements) | ||
mock_migration_status_refresher.snapshot.assert_called_once_with(force_refresh=True) | ||
mock_migration_status_refresher.snapshot.reset_mock() | ||
mock_history_log.append_inventory_snapshot.assert_not_called() | ||
|
||
# The final part of the 3-step update: updating the history log (without a forced crawl). | ||
task_dependencies = getattr(MigrationProgress.update_tables_history_log, "__task__").depends_on | ||
assert MigrationProgress.crawl_tables.__name__ in task_dependencies | ||
assert MigrationProgress.refresh_table_migration_status.__name__ in task_dependencies | ||
run_workflow(MigrationProgress.update_tables_history_log, **context_replacements) | ||
mock_tables_crawler.snapshot.assert_called_once_with() | ||
# migration_status_refresher is not directly used within step 3, so interactions don't need to be checked. | ||
mock_history_log.append_inventory_snapshot.assert_called_once() | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.