Skip to content

Commit 8de37e1

Browse files
authored
[TECH DEBT] Document and test Folder and FolderLoader (#3651)
## Changes Document and test `Folder` and `FolderLoader` ### Linked issues Progresses #3645 ### Functionality - [x] modified `Folder` and `FolderLoader` ### Tests - [x] added unit tests
1 parent f52a0c9 commit 8de37e1

File tree

4 files changed

+101
-25
lines changed

4 files changed

+101
-25
lines changed

src/databricks/labs/ucx/source_code/folders.py

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,18 @@
1717

1818

1919
class Folder(SourceContainer):
20+
"""A source container that represents a folder."""
21+
22+
# The following paths names are ignore as they do not contain source code
23+
_IGNORE_PATH_NAMES = {
24+
"__pycache__",
25+
".mypy_cache",
26+
".git",
27+
".github",
28+
# Code from libraries are accessed through `imports`, not directly via the folder
29+
".venv",
30+
"site-packages",
31+
}
2032

2133
def __init__(
2234
self,
@@ -30,17 +42,18 @@ def __init__(
3042
self._file_loader = file_loader
3143
self._folder_loader = folder_loader
3244

33-
@property
34-
def path(self) -> Path:
35-
return self._path
36-
3745
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
38-
# don't directly scan non-source directories, let it be done for relevant imports only
39-
if self._path.name in {"__pycache__", ".git", ".github", ".venv", ".mypy_cache", "site-packages"}:
46+
"""Build the dependency graph for the folder.
47+
48+
Here we skip certain directories, like:
49+
- the ones that are not source code.
50+
"""
51+
if self._path.name in self._IGNORE_PATH_NAMES:
4052
return []
4153
return list(self._build_dependency_graph(parent))
4254

4355
def _build_dependency_graph(self, parent: DependencyGraph) -> Iterable[DependencyProblem]:
56+
"""Build the dependency graph for the contents of the folder."""
4457
for child_path in self._path.iterdir():
4558
is_file = child_path.is_file()
4659
is_notebook = is_a_notebook(child_path)
@@ -60,6 +73,7 @@ def __init__(self, notebook_loader: NotebookLoader, file_loader: FileLoader):
6073
self._file_loader = file_loader
6174

6275
def load_dependency(self, path_lookup: PathLookup, dependency: Dependency) -> Folder | None:
76+
"""Load the folder as a dependency."""
6377
absolute_path = path_lookup.resolve(dependency.path)
6478
if not absolute_path:
6579
return None

tests/unit/source_code/linters/test_files.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from databricks.sdk.service.workspace import Language
1414

1515
from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
16-
from databricks.labs.ucx.source_code.folders import Folder, FolderLoader
16+
from databricks.labs.ucx.source_code.folders import FolderLoader
1717
from databricks.labs.ucx.source_code.files import FileLoader, ImportFileResolver
1818
from databricks.labs.ucx.source_code.linters.folders import LocalCodeLinter, LocalFileMigrator
1919
from databricks.labs.ucx.source_code.linters.context import LinterContext
@@ -167,13 +167,6 @@ def test_single_dot_import() -> None:
167167
path_lookup.resolve.assert_called_once_with(Path('/some/path/to/folder/foo.py'))
168168

169169

170-
def test_folder_has_repr() -> None:
171-
notebook_loader = NotebookLoader()
172-
file_loader = FileLoader()
173-
folder = Folder(Path("test"), notebook_loader, file_loader, FolderLoader(notebook_loader, file_loader))
174-
assert len(repr(folder)) > 0
175-
176-
177170
site_packages = locate_site_packages()
178171

179172

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from pathlib import Path
2+
from unittest.mock import create_autospec
3+
4+
import pytest
5+
6+
from databricks.labs.ucx.source_code.base import CurrentSessionState
7+
from databricks.labs.ucx.source_code.files import FileLoader
8+
from databricks.labs.ucx.source_code.folders import FolderLoader, Folder
9+
from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph
10+
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader
11+
from databricks.labs.ucx.source_code.path_lookup import PathLookup
12+
13+
14+
def test_folder_has_repr() -> None:
15+
notebook_loader = NotebookLoader()
16+
file_loader = FileLoader()
17+
folder = Folder(Path("test"), notebook_loader, file_loader, FolderLoader(notebook_loader, file_loader))
18+
assert len(repr(folder)) > 0
19+
20+
21+
@pytest.fixture
22+
def graph_parent_child_context(mock_path_lookup, simple_dependency_resolver) -> DependencyGraph:
23+
"""Dependency graph for the parent-child-context sample directory.
24+
25+
The directory contains three files `grand_parent.py`, `parent.py` and `child.py`. That import each other as such:
26+
grand parent imports parent import child.
27+
"""
28+
path = mock_path_lookup.resolve(Path("parent-child-context/"))
29+
dependency = Dependency(FolderLoader(NotebookLoader(), FileLoader()), path, False)
30+
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
31+
return graph
32+
33+
34+
def test_folder_build_dependency_graph_without_problems(mock_path_lookup, graph_parent_child_context) -> None:
35+
"""No problems should arise form building the dependency graph for the sample folder"""
36+
folder = graph_parent_child_context.dependency.load(mock_path_lookup)
37+
assert folder is not None
38+
problems = folder.build_dependency_graph(graph_parent_child_context)
39+
assert not problems
40+
41+
42+
def test_folder_loads_content(mock_path_lookup, graph_parent_child_context) -> None:
43+
"""The files in the folder should be added to the dependency graph after building."""
44+
expected_dependencies = {graph_parent_child_context.dependency}
45+
for relative_path in "grand_parent.py", "parent.py", "child.py":
46+
path = mock_path_lookup.resolve(Path("parent-child-context") / relative_path)
47+
dependency = Dependency(FileLoader(), path)
48+
expected_dependencies.add(dependency)
49+
50+
folder = graph_parent_child_context.dependency.load(mock_path_lookup)
51+
assert folder is not None
52+
folder.build_dependency_graph(graph_parent_child_context)
53+
54+
assert graph_parent_child_context.all_dependencies == expected_dependencies
55+
56+
57+
def test_folder_cannot_load_unresolved_path(graph_parent_child_context) -> None:
58+
"""An unresolved path cannot be loaded."""
59+
path_lookup = create_autospec(PathLookup)
60+
path_lookup.resolve.return_value = None
61+
folder = graph_parent_child_context.dependency.load(path_lookup)
62+
assert folder is None
63+
path_lookup.resolve.assert_called_once_with(graph_parent_child_context.dependency.path)
64+
65+
66+
@pytest.mark.parametrize("subdirectory", [".venv"])
67+
def test_folder_build_dependency_graph_ignore_subdirectories(
68+
tmp_path, mock_path_lookup, simple_dependency_resolver, subdirectory: str
69+
) -> None:
70+
"""The folder loader should only include directories with source code"""
71+
path = tmp_path / subdirectory / "file.py"
72+
path.parent.mkdir(parents=True, exist_ok=True)
73+
path.touch()
74+
dependency = Dependency(FolderLoader(NotebookLoader(), FileLoader()), tmp_path, inherits_context=False)
75+
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
76+
folder = graph.dependency.load(mock_path_lookup)
77+
assert folder is not None
78+
problems = folder.build_dependency_graph(graph)
79+
assert not problems
80+
assert path not in mock_path_lookup.successfully_resolved_paths, "Subdirectory should be ignored"

tests/unit/source_code/test_graph.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,6 @@ def test_dependency_graph_registers_library_from_egg(mock_path_lookup, simple_de
2828
assert lookup_resolve.exists()
2929

3030

31-
def test_folder_loads_content(mock_path_lookup, simple_dependency_resolver) -> None:
32-
path = Path(__file__).parent / "samples" / "parent-child-context"
33-
dependency = Dependency(FolderLoader(NotebookLoader(), FileLoader()), path, False)
34-
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
35-
container = dependency.load(mock_path_lookup)
36-
assert container is not None
37-
container.build_dependency_graph(graph)
38-
all_paths = [d.path for d in graph.all_dependencies]
39-
assert len(all_paths) == 4
40-
41-
4231
def test_root_dependencies_returns_only_files(mock_path_lookup, simple_dependency_resolver) -> None:
4332
path = Path(__file__).parent / "samples" / "parent-child-context"
4433
dependency = Dependency(FolderLoader(NotebookLoader(), FileLoader()), path, False)

0 commit comments

Comments
 (0)