Skip to content

Commit c88d3f5

Browse files
authored
Update migate-local-code to use latest linter functionality (#3700)
## Changes Update `migate-local-code` to use latest linter functionality - Merge `LocalFileMigrator` with `LocalCodeLinter` - Align `.fix` and `.apply` interface - Introduce `FixerWalker` to fix dependencies in dependency graph ### Linked issues Resolves #3514 Supersedes: #3520 Stacked on: - [x] #3535 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-local-code` ### Tests - [ ] manually tested - [x] modified and added unit tests - [x] modified and added integration tests
1 parent 085d2c1 commit c88d3f5

File tree

16 files changed

+233
-196
lines changed

16 files changed

+233
-196
lines changed

labs.yml

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,18 @@ commands:
263263
- name: revert-cluster-remap
264264
description: Reverting the Re-mapping of the cluster from UC
265265

266-
- name: migrate-local-code
267-
description: (Experimental) Migrate files in the current directory to be more compatible with Unity Catalog.
268-
269266
- name: lint-local-code
270267
description: (Experimental) Lint files in the current directory to highlight incompatibilities with Unity Catalog.
271268
flags:
272269
- name: path
273270
description: Path to the file or directory to lint
274271

272+
- name: migrate-local-code
273+
description: (Experimental) Migrate files in the current directory to be more compatible with Unity Catalog.
274+
flags:
275+
- name: path
276+
description: Path to the file or directory to lint
277+
275278
- name: show-all-metastores
276279
is_account_level: true
277280
description: Show all metastores available in the same region as the specified workspace

src/databricks/labs/ucx/cli.py

Lines changed: 33 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from databricks.labs.ucx.contexts.workspace_cli import WorkspaceContext, LocalCheckoutContext
2020
from databricks.labs.ucx.hive_metastore.tables import What
2121
from databricks.labs.ucx.install import AccountInstaller
22-
from databricks.labs.ucx.source_code.linters.folders import LocalCodeLinter
2322
from databricks.labs.ucx.workspace_access.groups import AccountGroupLookup
2423

2524
ucx = App(__file__)
@@ -605,16 +604,6 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
605604
ctx.cluster_access.revert_cluster_remap(cluster_list, cluster_ids)
606605

607606

608-
@ucx.command
609-
def migrate_local_code(w: WorkspaceClient, prompts: Prompts):
610-
"""Fix the code files based on their language."""
611-
ctx = LocalCheckoutContext(w)
612-
working_directory = Path.cwd()
613-
if not prompts.confirm("Do you want to apply UC migration to all files in the current directory?"):
614-
return
615-
ctx.local_file_migrator.apply(working_directory)
616-
617-
618607
@ucx.command(is_account=True)
619608
def show_all_metastores(a: AccountClient, workspace_id: str | None = None):
620609
"""Show all metastores in the account"""
@@ -873,13 +862,43 @@ def download(
873862

874863
@ucx.command
875864
def lint_local_code(
876-
w: WorkspaceClient, prompts: Prompts, path: str | None = None, ctx: LocalCheckoutContext | None = None
865+
w: WorkspaceClient,
866+
prompts: Prompts,
867+
path: Path | str | None = None,
868+
ctx: LocalCheckoutContext | None = None,
877869
):
878870
"""Lint local code files looking for problems."""
879871
if ctx is None:
880872
ctx = LocalCheckoutContext(w)
881-
linter: LocalCodeLinter = ctx.local_code_linter
882-
linter.lint(prompts, None if path is None else Path(path))
873+
if not path:
874+
response = prompts.question(
875+
"Which file or directory do you want to lint?",
876+
default=Path.cwd().as_posix(),
877+
validate=lambda p_: Path(p_).exists(),
878+
)
879+
assert response
880+
path = Path(response)
881+
for advice in ctx.local_code_linter.lint(Path(path)):
882+
print(advice)
883+
884+
885+
@ucx.command
886+
def migrate_local_code(
887+
w: WorkspaceClient, prompts: Prompts, path: Path | str | None = None, ctx: LocalCheckoutContext | None = None
888+
):
889+
"""Fix the code files based on their language."""
890+
if ctx is None:
891+
ctx = LocalCheckoutContext(w)
892+
if not path:
893+
response = prompts.question(
894+
"Which file or directory do you want to lint?",
895+
default=Path.cwd().as_posix(),
896+
validate=lambda p_: Path(p_).exists(),
897+
)
898+
assert response
899+
path = Path(response)
900+
for advice in ctx.local_code_linter.apply(Path(path)):
901+
print(advice)
883902

884903

885904
@ucx.command

src/databricks/labs/ucx/contexts/workspace_cli.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
from databricks.labs.ucx.progress.install import ProgressTrackingInstallation
2424
from databricks.labs.ucx.source_code.base import CurrentSessionState
2525
from databricks.labs.ucx.source_code.linters.context import LinterContext
26-
from databricks.labs.ucx.source_code.linters.folders import LocalCodeLinter, LocalFileMigrator
26+
from databricks.labs.ucx.source_code.linters.folders import LocalCodeLinter
2727
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader
2828
from databricks.labs.ucx.workspace_access.clusters import ClusterAccess
2929

@@ -223,10 +223,6 @@ def linter_context_factory(self, session_state: CurrentSessionState | None = Non
223223
session_state = CurrentSessionState()
224224
return LinterContext(index, session_state)
225225

226-
@cached_property
227-
def local_file_migrator(self) -> LocalFileMigrator:
228-
return LocalFileMigrator(lambda: self.linter_context_factory(CurrentSessionState()))
229-
230226
@cached_property
231227
def local_code_linter(self) -> LocalCodeLinter:
232228
session_state = CurrentSessionState()
@@ -235,7 +231,6 @@ def local_code_linter(self) -> LocalCodeLinter:
235231
self.file_loader,
236232
self.folder_loader,
237233
self.path_lookup,
238-
session_state,
239234
self.dependency_resolver,
240235
lambda: self.linter_context_factory(session_state),
241236
)

src/databricks/labs/ucx/source_code/graph.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def has_missing_path(self) -> bool:
547547

548548
def as_located_advice(self) -> LocatedAdvice:
549549
"""Converts the dependency problem in a located advice for linting."""
550-
# Advisory level is chosen to treat a dependency problem as a WARNING. It is the most server while not being
550+
# Advisory level is chosen to treat a dependency problem as a WARNING. It is the most severe while not being
551551
# CRITICAL (yet).
552552
advisory = Advisory(
553553
code=self.code,
Lines changed: 68 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
from __future__ import annotations
22

33
import logging
4-
import sys
54
from collections.abc import Callable, Iterable
65
from pathlib import Path
7-
from typing import TextIO
86

9-
from databricks.labs.blueprint.tui import Prompts
10-
from databricks.sdk.service.workspace import Language
11-
12-
from databricks.labs.ucx.source_code.base import CurrentSessionState, LocatedAdvice, is_a_notebook
13-
from databricks.labs.ucx.source_code.graph import DependencyResolver, DependencyLoader, Dependency, DependencyGraph
14-
from databricks.labs.ucx.source_code.linters.context import LinterContext
15-
from databricks.labs.ucx.source_code.folders import FolderLoader
7+
from databricks.labs.ucx.source_code.base import LocatedAdvice, is_a_notebook
168
from databricks.labs.ucx.source_code.files import FileLoader
17-
from databricks.labs.ucx.source_code.linters.graph_walkers import LintingWalker
9+
from databricks.labs.ucx.source_code.folders import FolderLoader
10+
from databricks.labs.ucx.source_code.graph import (
11+
Dependency,
12+
DependencyGraph,
13+
DependencyLoader,
14+
DependencyProblem,
15+
DependencyResolver,
16+
MaybeGraph,
17+
)
18+
from databricks.labs.ucx.source_code.linters.context import LinterContext
19+
from databricks.labs.ucx.source_code.linters.graph_walkers import FixerWalker, LinterWalker
1820
from databricks.labs.ucx.source_code.notebooks.loaders import NotebookLoader
1921
from databricks.labs.ucx.source_code.path_lookup import PathLookup
2022

@@ -23,118 +25,85 @@
2325

2426

2527
class LocalCodeLinter:
28+
"""Lint local code to become Unity Catalog compatible."""
2629

2730
def __init__(
2831
self,
2932
notebook_loader: NotebookLoader,
3033
file_loader: FileLoader,
3134
folder_loader: FolderLoader,
3235
path_lookup: PathLookup,
33-
session_state: CurrentSessionState,
3436
dependency_resolver: DependencyResolver,
3537
context_factory: Callable[[], LinterContext],
3638
) -> None:
3739
self._notebook_loader = notebook_loader
3840
self._file_loader = file_loader
3941
self._folder_loader = folder_loader
4042
self._path_lookup = path_lookup
41-
self._session_state = session_state
4243
self._dependency_resolver = dependency_resolver
43-
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL}
4444
self._context_factory = context_factory
4545

46-
def lint(
47-
self,
48-
prompts: Prompts,
49-
path: Path | None,
50-
stdout: TextIO = sys.stdout,
51-
) -> list[LocatedAdvice]:
52-
"""Lint local code files looking for problems in notebooks and python files."""
53-
if path is None:
54-
response = prompts.question(
55-
"Which file or directory do you want to lint?",
56-
default=Path.cwd().as_posix(),
57-
validate=lambda p_: Path(p_).exists(),
58-
)
59-
path = Path(response)
60-
located_advices = list(self.lint_path(path))
61-
for located_advice in located_advices:
62-
stdout.write(f"{located_advice}\n")
63-
return located_advices
46+
def lint(self, path: Path) -> Iterable[LocatedAdvice]:
47+
"""Lint local code generating advices on becoming Unity Catalog compatible.
6448
65-
def lint_path(self, path: Path) -> Iterable[LocatedAdvice]:
66-
is_dir = path.is_dir()
67-
loader: DependencyLoader
68-
if is_a_notebook(path):
69-
loader = self._notebook_loader
70-
elif path.is_dir():
71-
loader = self._folder_loader
72-
else:
73-
loader = self._file_loader
74-
path_lookup = self._path_lookup.change_directory(path if is_dir else path.parent)
75-
root_dependency = Dependency(loader, path, not is_dir) # don't inherit context when traversing folders
76-
graph = DependencyGraph(root_dependency, None, self._dependency_resolver, path_lookup, self._session_state)
77-
container = root_dependency.load(path_lookup)
78-
assert container is not None # because we just created it
79-
problems = container.build_dependency_graph(graph)
80-
for problem in problems:
81-
yield problem.as_located_advice()
49+
Parameters :
50+
path (Path) : The path to the resource(s) to lint. If the path is a directory, then all files within the
51+
directory and subdirectories are linted.
52+
"""
53+
maybe_graph = self._build_dependency_graph_from_path(path)
54+
if maybe_graph.problems:
55+
for problem in maybe_graph.problems:
56+
yield problem.as_located_advice()
8257
return
83-
walker = LintingWalker(graph, self._path_lookup, self._context_factory)
58+
assert maybe_graph.graph
59+
walker = LinterWalker(maybe_graph.graph, self._path_lookup, self._context_factory)
8460
yield from walker
8561

62+
def apply(self, path: Path) -> Iterable[LocatedAdvice]:
63+
"""Apply local code fixes to become Unity Catalog compatible.
8664
87-
class LocalFileMigrator:
88-
"""The LocalFileMigrator class is responsible for fixing code files based on their language."""
65+
Parameters :
66+
path (Path) : The path to the resource(s) to lint. If the path is a directory, then all files within the
67+
directory and subdirectories are linted.
68+
"""
69+
maybe_graph = self._build_dependency_graph_from_path(path)
70+
if maybe_graph.problems:
71+
for problem in maybe_graph.problems:
72+
yield problem.as_located_advice()
73+
return
74+
assert maybe_graph.graph
75+
walker = FixerWalker(maybe_graph.graph, self._path_lookup, self._context_factory)
76+
list(walker) # Nothing to yield
8977

90-
def __init__(self, context_factory: Callable[[], LinterContext]):
91-
self._extensions = {".py": Language.PYTHON, ".sql": Language.SQL}
92-
self._context_factory = context_factory
78+
def _build_dependency_graph_from_path(self, path: Path) -> MaybeGraph:
79+
"""Build a dependency graph from the path.
9380
94-
def apply(self, path: Path) -> bool:
95-
if path.is_dir():
96-
for child_path in path.iterdir():
97-
self.apply(child_path)
98-
return True
99-
return self._apply_file_fix(path)
81+
It tries to load the path as a directory, file or notebook.
10082
101-
def _apply_file_fix(self, path: Path):
102-
"""
103-
The fix method reads a file, lints it, applies fixes, and writes the fixed code back to the file.
83+
Returns :
84+
MaybeGraph : If the loading fails, the returned maybe graph contains a problem. Otherwise, returned maybe
85+
graph contains the graph.
10486
"""
105-
# Check if the file extension is in the list of supported extensions
106-
if path.suffix not in self._extensions:
107-
return False
108-
# Get the language corresponding to the file extension
109-
language = self._extensions[path.suffix]
110-
# If the language is not supported, return
111-
if not language:
112-
return False
113-
logger.info(f"Analysing {path}")
114-
# Get the linter for the language
115-
context = self._context_factory()
116-
linter = context.linter(language)
117-
# Open the file and read the code
118-
with path.open("r") as f:
119-
try:
120-
code = f.read()
121-
except UnicodeDecodeError as e:
122-
logger.warning(f"Could not decode file {path}: {e}")
123-
return False
124-
applied = False
125-
# Lint the code and apply fixes
126-
for advice in linter.lint(code):
127-
logger.info(f"Found: {advice}")
128-
fixer = context.fixer(language, advice.code)
129-
if not fixer:
130-
continue
131-
logger.info(f"Applying fix for {advice}")
132-
code = fixer.apply(code)
133-
applied = True
134-
if not applied:
135-
return False
136-
# Write the fixed code back to the file
137-
with path.open("w") as f:
138-
logger.info(f"Overwriting {path}")
139-
f.write(code)
140-
return True
87+
resolved_path = self._path_lookup.resolve(path)
88+
if not resolved_path:
89+
problem = DependencyProblem("path-not-found", "Path not found", source_path=path)
90+
return MaybeGraph(None, [problem])
91+
is_dir = resolved_path.is_dir()
92+
loader: DependencyLoader
93+
if is_a_notebook(resolved_path):
94+
loader = self._notebook_loader
95+
elif is_dir:
96+
loader = self._folder_loader
97+
else:
98+
loader = self._file_loader
99+
root_dependency = Dependency(loader, resolved_path, not is_dir) # don't inherit context when traversing folders
100+
container = root_dependency.load(self._path_lookup)
101+
if container is None:
102+
problem = DependencyProblem("dependency-not-found", "Dependency not found", source_path=path)
103+
return MaybeGraph(None, [problem])
104+
session_state = self._context_factory().session_state
105+
graph = DependencyGraph(root_dependency, None, self._dependency_resolver, self._path_lookup, session_state)
106+
problems = list(container.build_dependency_graph(graph))
107+
if problems:
108+
return MaybeGraph(None, problems)
109+
return MaybeGraph(graph, [])

src/databricks/labs/ucx/source_code/linters/graph_walkers.py

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ def lineage(self) -> list[LineageAtom]:
104104
return list(itertools.chain(*lists))
105105

106106

107-
class LintingWalker(DependencyGraphWalker[LocatedAdvice]):
107+
class LinterWalker(DependencyGraphWalker[LocatedAdvice]):
108108
"""Lint the dependencies in the graph."""
109109

110110
def __init__(self, graph: DependencyGraph, path_lookup: PathLookup, context_factory: Callable[[], LinterContext]):
@@ -128,6 +128,30 @@ def _process_dependency(
128128
yield LocatedAdvice(advice, dependency.path)
129129

130130

131+
class FixerWalker(DependencyGraphWalker[None]):
132+
"""Fix the dependencies in the graph."""
133+
134+
def __init__(self, graph: DependencyGraph, path_lookup: PathLookup, context_factory: Callable[[], LinterContext]):
135+
super().__init__(graph, path_lookup)
136+
self._context_factory = context_factory
137+
138+
def _log_walk_one(self, dependency: Dependency) -> None:
139+
"""Log fixing a dependency"""
140+
logger.info(f"Fixing dependency: {dependency}")
141+
142+
def _process_dependency(
143+
self,
144+
dependency: Dependency,
145+
path_lookup: PathLookup,
146+
inherited_tree: Tree | None,
147+
) -> Iterable[None]:
148+
"""Fix the dependency."""
149+
# FileLinter determines which file/notebook linter to use
150+
linter = FileLinter(dependency, path_lookup, self._context_factory(), inherited_tree)
151+
linter.apply()
152+
yield from ()
153+
154+
131155
S = TypeVar("S", bound=SourceInfo)
132156

133157

src/databricks/labs/ucx/source_code/linters/jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from databricks.labs.ucx.source_code.jobs import JobProblem, WorkflowTask, WorkflowTaskContainer
2626
from databricks.labs.ucx.source_code.linters.context import LinterContext
2727
from databricks.labs.ucx.source_code.linters.graph_walkers import (
28-
LintingWalker,
28+
LinterWalker,
2929
DfsaCollectorWalker,
3030
TablesCollectorWalker,
3131
)
@@ -173,7 +173,7 @@ def _build_task_dependency_graph(
173173
return graph, located_advices, session_state
174174

175175
def _lint_task(self, graph: DependencyGraph, session_state: CurrentSessionState) -> Iterable[LocatedAdvice]:
176-
walker = LintingWalker(graph, self._path_lookup, lambda: LinterContext(self._migration_index, session_state))
176+
walker = LinterWalker(graph, self._path_lookup, lambda: LinterContext(self._migration_index, session_state))
177177
yield from walker
178178

179179
def _collect_task_dfsas(

tests/integration/source_code/linters/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)