Skip to content

Commit 8b25e78

Browse files
committed
Add integration test for initial testing
1 parent 49af2f5 commit 8b25e78

File tree

2 files changed

+51
-2
lines changed

2 files changed

+51
-2
lines changed

src/databricks/labs/ucx/source_code/linters/directfs.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def __init__(self,
221221
super().__init__(session_state, prevent_spark_duplicates)
222222
self.directfs_crawler = directfs_crawler
223223
self.tables_crawler = tables_crawler
224-
self.direct_fs_table_list = [Any, [dict[str,str], Any]]
224+
self.direct_fs_table_list:[Any, [dict[str,str], Any]] = []
225225

226226
def fix_tree(self, tree: Tree) -> Tree:
227227
for directfs_node in self.collect_dfsas_from_tree(tree):

tests/integration/source_code/test_directfs_access.py

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,12 @@
11
import pytest
22

33
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
4-
from databricks.labs.ucx.source_code.base import DirectFsAccess, LineageAtom
4+
from databricks.labs.ucx.source_code.base import DirectFsAccess, LineageAtom, CurrentSessionState
55
from databricks.labs.ucx.source_code.jobs import WorkflowLinter
6+
from databricks.labs.ucx.source_code.linters.directfs import DirectFsAccessPyFixer
7+
from databricks.labs.ucx.source_code.python.python_ast import Tree
8+
from integration.conftest import runtime_ctx
9+
from unit.source_code.linters.test_spark_connect import session_state
610

711

812
def test_legacy_query_dfsa_ownership(runtime_ctx) -> None:
@@ -110,3 +114,48 @@ def test_path_dfsa_ownership(
110114
# Verify ownership can be made.
111115
owner = runtime_ctx.directfs_access_ownership.owner_of(path_record)
112116
assert owner == runtime_ctx.workspace_client.current_user.me().user_name
117+
118+
def test_path_dfsa_replacement(
119+
runtime_ctx,
120+
make_directory,
121+
make_mounted_location,
122+
inventory_schema,
123+
sql_backend,
124+
) -> None:
125+
"""Verify that the direct-fs access in python notebook is replaced with Unity catalog table"""
126+
127+
mounted_location = '/mnt/things/e/f/g'
128+
external_table = runtime_ctx.make_table(external_csv=mounted_location,
129+
)
130+
notebook_content = f"display(spark.read.csv('{mounted_location}'))"
131+
notebook = runtime_ctx.make_notebook(path=f"{make_directory()}/notebook.py",
132+
content=notebook_content.encode("ASCII"))
133+
job = runtime_ctx.make_job(notebook_path=notebook)
134+
135+
# # Produce a DFSA record for the job.
136+
linter = WorkflowLinter(
137+
runtime_ctx.workspace_client,
138+
runtime_ctx.dependency_resolver,
139+
runtime_ctx.path_lookup,
140+
TableMigrationIndex([]),
141+
runtime_ctx.directfs_access_crawler_for_paths,
142+
runtime_ctx.used_tables_crawler_for_paths,
143+
include_job_ids=[job.job_id],
144+
)
145+
linter.refresh_report(sql_backend, inventory_schema)
146+
147+
runtime_ctx.tables_crawler.snapshot()
148+
runtime_ctx.directfs_access_crawler_for_paths.snapshot()
149+
150+
session_state = CurrentSessionState()
151+
directfs_py_fixer = DirectFsAccessPyFixer(session_state,
152+
runtime_ctx.directfs_access_crawler_for_paths,
153+
runtime_ctx.tables_crawler)
154+
directfs_py_fixer.populate_directfs_table_list([runtime_ctx.directfs_access_crawler_for_paths],
155+
runtime_ctx.tables_crawler,
156+
"workspace_name",
157+
"catalog_name")
158+
159+
assert True
160+
directfs_py_fixer.fix_tree(Tree.maybe_normalized_parse(notebook_content).tree)
161+
assert True

0 commit comments

Comments
 (0)