Skip to content

Commit 54d37bc

Browse files
authored
Support fixing LocalFile's with FileLinter (#3660)
## Changes Add fixing files to `LocalFileLinter`: - Add `apply` method to `LocalFileLinter` to fix files using the linters - Support writing back to a `LocalFile` container ### Linked issues Progresses #3514 ### Functionality - [x] modified existing command: `databricks labs ucx migrate-local-code` ### Tests - [ ] manually tested - [x] modified and added unit tests - [x] modified and added integration tests
1 parent 55e5282 commit 54d37bc

File tree

7 files changed

+710
-387
lines changed

7 files changed

+710
-387
lines changed

src/databricks/labs/ucx/source_code/base.py

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import dataclasses
55
import io
66
import logging
7+
import os
8+
import shutil
79
import sys
810
from abc import abstractmethod, ABC
911
from collections.abc import Iterable
@@ -14,11 +16,10 @@
1416

1517
from astroid import NodeNG # type: ignore
1618

19+
from databricks.labs.blueprint.paths import WorkspacePath
1720
from databricks.sdk.service import compute
1821
from databricks.sdk.service.workspace import Language
1922

20-
from databricks.labs.blueprint.paths import WorkspacePath
21-
2223

2324
if sys.version_info >= (3, 11):
2425
from typing import Self
@@ -412,6 +413,40 @@ def safe_read_text(path: Path, size: int = -1) -> str | None:
412413
return None
413414

414415

416+
def write_text(path: Path, contents: str, *, encoding: str | None = None) -> int:
417+
"""Write content to a file as text, encode according to the BOM marker if that is present.
418+
419+
This differs to the normal `.read_text()` method on path which does not support BOM markers.
420+
421+
Arguments:
422+
path (Path): The file path to write text to.
423+
contents (str) : The content to write to the file.
424+
encoding (str) : Force encoding with a specific locale. If not present the file BOM and
425+
system locale are used.
426+
427+
Returns:
428+
int : The number of characters written to the file.
429+
"""
430+
if not encoding and path.exists():
431+
with path.open("rb") as binary_io:
432+
encoding = _detect_encoding_bom(binary_io, preserve_position=False)
433+
# If encoding=None, the system locale is used for encoding (as per open()).
434+
return path.write_text(contents, encoding=encoding)
435+
436+
437+
def safe_write_text(path: Path, contents: str, *, encoding: str | None = None) -> int | None:
438+
"""Safe write content to a file by handling writing exceptions, see :func:write_text.
439+
440+
Returns:
441+
int | None : The number of characters written to the file. If None, no content was written.
442+
"""
443+
try:
444+
return write_text(path, contents, encoding=encoding)
445+
except OSError as e:
446+
logger.warning(f"Cannot write to file: {path}", exc_info=e)
447+
return None
448+
449+
415450
# duplicated from CellLanguage to prevent cyclic import
416451
LANGUAGE_COMMENT_PREFIXES = {Language.PYTHON: '#', Language.SCALA: '//', Language.SQL: '--'}
417452
NOTEBOOK_HEADER = "Databricks notebook source"
@@ -430,3 +465,64 @@ def is_a_notebook(path: Path, content: str | None = None) -> bool:
430465
return content.startswith(magic_header)
431466
file_header = safe_read_text(path, size=len(magic_header))
432467
return file_header == magic_header
468+
469+
470+
def _add_backup_suffix(path: Path) -> Path:
471+
"""Add a backup suffix to a path.
472+
473+
The backed up path is the same as the original path with an additional
474+
`.bak` appended to the suffix.
475+
476+
Reuse this method so that the backup path is consistent in this module.
477+
"""
478+
# Not checking for the backup suffix to allow making backups of backups.
479+
return path.with_suffix(path.suffix + ".bak")
480+
481+
482+
def back_up_path(path: Path) -> Path | None:
483+
"""Back up a path.
484+
485+
The backed up path is the same as the original path with an additional
486+
`.bak` appended to the suffix.
487+
488+
Returns :
489+
path | None : The backed up path. If None, the backup failed.
490+
"""
491+
path_backed_up = _add_backup_suffix(path)
492+
try:
493+
shutil.copyfile(path, path_backed_up)
494+
except OSError as e:
495+
logger.warning(f"Cannot back up file: {path}", exc_info=e)
496+
return None
497+
return path_backed_up
498+
499+
500+
def revert_back_up_path(path: Path) -> bool | None:
501+
"""Revert a backed up path, see :func:back_up_path.
502+
503+
The backed up path is the same as the original path with an additional
504+
`.bak` appended to the suffix.
505+
506+
Args :
507+
path : The original path, NOT the backed up path.
508+
509+
Returns :
510+
bool : Flag if the revert was successful. If None, the backed up path
511+
does not exist, thus it cannot be reverted and the operation is not
512+
successful nor failed.
513+
"""
514+
path_backed_up = _add_backup_suffix(path)
515+
if not path_backed_up.exists():
516+
logger.warning(f"Backup is missing: {path_backed_up}")
517+
return None
518+
try:
519+
shutil.copyfile(path_backed_up, path)
520+
except OSError as e:
521+
logger.warning(f"Cannot revert backup: {path}", exc_info=e)
522+
return False
523+
try:
524+
os.unlink(path_backed_up)
525+
except OSError as e:
526+
# The backup revert is successful, but the backup file cannot be removed
527+
logger.warning(f"Cannot remove backup file: {path_backed_up}", exc_info=e)
528+
return True

src/databricks/labs/ucx/source_code/files.py

Lines changed: 49 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,13 @@
66

77
from databricks.sdk.service.workspace import Language
88

9-
from databricks.labs.ucx.source_code.base import infer_file_language_if_supported, safe_read_text
9+
from databricks.labs.ucx.source_code.base import (
10+
back_up_path,
11+
infer_file_language_if_supported,
12+
revert_back_up_path,
13+
safe_read_text,
14+
safe_write_text,
15+
)
1016
from databricks.labs.ucx.source_code.graph import (
1117
SourceContainer,
1218
DependencyGraph,
@@ -33,13 +39,53 @@ class LocalFile(SourceContainer):
3339
def __init__(self, path: Path, source: str, language: Language):
3440
self._path = path
3541
self._original_code = source
42+
self._migrated_code = source
3643
self.language = language
3744

3845
@property
39-
def content(self) -> str:
40-
"""The local file content"""
46+
def original_code(self) -> str:
47+
"""The source code when creating the container."""
4148
return self._original_code
4249

50+
@property
51+
def migrated_code(self) -> str:
52+
"""The source code after fixing with a linter."""
53+
return self._migrated_code
54+
55+
@migrated_code.setter
56+
def migrated_code(self, source: str) -> None:
57+
"""Set the source code after fixing with a linter."""
58+
self._migrated_code = source
59+
60+
def _safe_write_text(self, contents: str) -> int | None:
61+
"""Write content to the local file."""
62+
return safe_write_text(self._path, contents)
63+
64+
def _back_up_path(self) -> Path | None:
65+
"""Back up the original file."""
66+
return back_up_path(self._path)
67+
68+
def back_up_original_and_flush_migrated_code(self) -> int | None:
69+
"""Back up the original file and flush the migrated code to the file.
70+
71+
This is a single method to avoid overwriting the original file without a backup.
72+
73+
Returns :
74+
int : The number of characters written. If None, nothing is written to the file.
75+
"""
76+
if self._original_code == self._migrated_code:
77+
# Avoiding unnecessary back up and flush
78+
return len(self._migrated_code)
79+
backed_up_path = self._back_up_path()
80+
if not backed_up_path:
81+
# Failed to back up the original file, avoid overwriting existing file
82+
return None
83+
number_of_characters_written = self._safe_write_text(self._migrated_code)
84+
if number_of_characters_written is None:
85+
# Failed to overwrite original file, clean up by reverting backup
86+
revert_back_up_path(self._path)
87+
return number_of_characters_written
88+
4389
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
4490
"""The dependency graph for the local file."""
4591
if self.language == Language.PYTHON:

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 16 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -213,48 +213,6 @@ def _resolve_and_parse_notebook_path(self, path: Path | None) -> Notebook | None
213213

214214

215215
class FileLinter:
216-
_NOT_YET_SUPPORTED_SUFFIXES = {
217-
'.scala',
218-
'.sh',
219-
'.r',
220-
}
221-
_IGNORED_SUFFIXES = {
222-
'.json',
223-
'.md',
224-
'.txt',
225-
'.xml',
226-
'.yml',
227-
'.toml',
228-
'.cfg',
229-
'.bmp',
230-
'.gif',
231-
'.png',
232-
'.tif',
233-
'.tiff',
234-
'.svg',
235-
'.jpg',
236-
'.jpeg',
237-
'.pyc',
238-
'.whl',
239-
'.egg',
240-
'.class',
241-
'.iml',
242-
'.gz',
243-
}
244-
_IGNORED_NAMES = {
245-
'.ds_store',
246-
'.gitignore',
247-
'.coverage',
248-
'license',
249-
'codeowners',
250-
'makefile',
251-
'pkg-info',
252-
'metadata',
253-
'wheel',
254-
'record',
255-
'notice',
256-
'zip-safe',
257-
}
258216

259217
def __init__(
260218
self,
@@ -270,15 +228,10 @@ def __init__(
270228

271229
def lint(self) -> Iterable[Advice]:
272230
"""Lint the file."""
273-
if self._dependency.path.suffix.lower() in self._IGNORED_SUFFIXES:
274-
return
275-
if self._dependency.path.name.lower() in self._IGNORED_NAMES:
276-
return
277-
if self._dependency.path.suffix.lower() in self._NOT_YET_SUPPORTED_SUFFIXES:
278-
message = f"Unsupported language for suffix: {self._dependency.path.suffix}"
279-
yield Failure("unsupported-language", message, -1, -1, -1, -1)
280-
return
281231
source_container = self._dependency.load(self._path_lookup)
232+
if not source_container:
233+
# The linter only reports **linting** errors, not loading errors
234+
return
282235
if isinstance(source_container, Notebook):
283236
yield from self._lint_notebook(source_container)
284237
elif isinstance(source_container, LocalFile):
@@ -290,7 +243,7 @@ def _lint_file(self, local_file: LocalFile) -> Iterable[Advice]:
290243
"""Lint a local file."""
291244
try:
292245
linter = self._context.linter(local_file.language)
293-
yield from linter.lint(local_file.content)
246+
yield from linter.lint(local_file.original_code)
294247
except ValueError:
295248
# TODO: Remove when implementing: https://github.com/databrickslabs/ucx/issues/3544
296249
yield Failure("unsupported-language", f"Unsupported language: {local_file.language}", -1, -1, -1, -1)
@@ -300,6 +253,18 @@ def _lint_notebook(self, notebook: Notebook) -> Iterable[Advice]:
300253
notebook_linter = NotebookLinter(notebook, self._path_lookup, self._context, self._inherited_tree)
301254
yield from notebook_linter.lint()
302255

256+
def apply(self) -> None:
257+
"""Apply changes to the file."""
258+
source_container = self._dependency.load(self._path_lookup)
259+
if isinstance(source_container, LocalFile):
260+
self._apply_file(source_container)
261+
262+
def _apply_file(self, local_file: LocalFile) -> None:
263+
"""Apply changes to a local file."""
264+
fixed_code = self._context.apply_fixes(local_file.language, local_file.original_code)
265+
local_file.migrated_code = fixed_code
266+
local_file.back_up_original_and_flush_migrated_code()
267+
303268

304269
class NotebookMigrator:
305270
def __init__(self, languages: LinterContext):

0 commit comments

Comments
 (0)