|
3 | 3 | import logging
|
4 | 4 | import shutil
|
5 | 5 | import tempfile
|
6 |
| -from abc import ABC, abstractmethod |
7 |
| -from collections.abc import Generator, Iterable, Callable |
| 6 | +from collections.abc import Generator, Iterable |
8 | 7 | from contextlib import contextmanager
|
9 | 8 | from dataclasses import dataclass
|
10 | 9 | from datetime import datetime, timezone
|
11 | 10 | from importlib import metadata
|
12 | 11 | from pathlib import Path
|
13 |
| -from typing import TypeVar |
14 | 12 | from urllib import parse
|
15 | 13 |
|
16 | 14 | from databricks.labs.blueprint.parallel import Threads
|
|
21 | 19 | from databricks.sdk.service import compute, jobs
|
22 | 20 | from databricks.sdk.service.compute import DataSecurityMode
|
23 | 21 | from databricks.sdk.service.jobs import Source
|
24 |
| -from databricks.sdk.service.workspace import Language |
25 | 22 |
|
26 | 23 | from databricks.labs.ucx.assessment.crawlers import runtime_version_tuple
|
27 | 24 | from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex
|
28 | 25 | from databricks.labs.ucx.mixins.cached_workspace_path import WorkspaceCache, InvalidPath
|
29 | 26 | from databricks.labs.ucx.source_code.base import (
|
30 | 27 | CurrentSessionState,
|
31 | 28 | LocatedAdvice,
|
32 |
| - is_a_notebook, |
33 |
| - file_language, |
34 |
| - SourceInfo, |
35 | 29 | UsedTable,
|
36 | 30 | LineageAtom,
|
37 |
| - safe_read_text, |
38 | 31 | )
|
39 | 32 | from databricks.labs.ucx.source_code.directfs_access import (
|
40 | 33 | DirectFsAccessCrawler,
|
|
47 | 40 | DependencyResolver,
|
48 | 41 | SourceContainer,
|
49 | 42 | WrappingLoader,
|
50 |
| - DependencyGraphWalker, |
| 43 | +) |
| 44 | +from databricks.labs.ucx.source_code.linters.graph_walkers import ( |
| 45 | + LintingWalker, |
| 46 | + DfsaCollectorWalker, |
| 47 | + TablesCollectorWalker, |
51 | 48 | )
|
52 | 49 | from databricks.labs.ucx.source_code.linters.context import LinterContext
|
53 |
| -from databricks.labs.ucx.source_code.notebooks.cells import CellLanguage |
54 |
| -from databricks.labs.ucx.source_code.python.python_ast import MaybeTree, Tree, PythonSequentialLinter |
55 |
| -from databricks.labs.ucx.source_code.notebooks.sources import FileLinter, Notebook |
56 | 50 | from databricks.labs.ucx.source_code.path_lookup import PathLookup
|
57 | 51 | from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
|
58 | 52 |
|
@@ -532,7 +526,12 @@ def _lint_task(
|
532 | 526 | linted_paths: set[Path],
|
533 | 527 | ) -> Iterable[LocatedAdvice]:
|
534 | 528 | walker = LintingWalker(
|
535 |
| - graph, linted_paths, self._path_lookup, task.task_key, session_state, self._migration_index |
| 529 | + graph, |
| 530 | + linted_paths, |
| 531 | + self._path_lookup, |
| 532 | + task.task_key, |
| 533 | + session_state, |
| 534 | + lambda: LinterContext(self._migration_index, session_state), |
536 | 535 | )
|
537 | 536 | yield from walker
|
538 | 537 |
|
@@ -569,164 +568,3 @@ def _collect_task_tables(
|
569 | 568 | LineageAtom(object_type="TASK", object_id=f"{job_id}/{task.task_key}"),
|
570 | 569 | ]
|
571 | 570 | yield dataclasses.replace(used_table, source_lineage=atoms + used_table.source_lineage)
|
572 |
| - |
573 |
| - |
574 |
| -class LintingWalker(DependencyGraphWalker[LocatedAdvice]): |
575 |
| - |
576 |
| - def __init__( |
577 |
| - self, |
578 |
| - graph: DependencyGraph, |
579 |
| - walked_paths: set[Path], |
580 |
| - path_lookup: PathLookup, |
581 |
| - key: str, |
582 |
| - session_state: CurrentSessionState, |
583 |
| - migration_index: TableMigrationIndex, |
584 |
| - ): |
585 |
| - super().__init__(graph, walked_paths, path_lookup) |
586 |
| - self._key = key |
587 |
| - self._session_state = session_state |
588 |
| - self._linter_context = LinterContext(migration_index, session_state) |
589 |
| - |
590 |
| - def _log_walk_one(self, dependency: Dependency) -> None: |
591 |
| - logger.info(f'Linting {self._key} dependency: {dependency}') |
592 |
| - |
593 |
| - def _process_dependency( |
594 |
| - self, |
595 |
| - dependency: Dependency, |
596 |
| - path_lookup: PathLookup, |
597 |
| - inherited_tree: Tree | None, |
598 |
| - ) -> Iterable[LocatedAdvice]: |
599 |
| - # FileLinter determines which file/notebook linter to use |
600 |
| - linter = FileLinter(self._linter_context, path_lookup, self._session_state, dependency.path, inherited_tree) |
601 |
| - for advice in linter.lint(): |
602 |
| - yield LocatedAdvice(advice, dependency.path) |
603 |
| - |
604 |
| - |
605 |
| -T = TypeVar("T", bound=SourceInfo) |
606 |
| - |
607 |
| - |
608 |
| -class _CollectorWalker(DependencyGraphWalker[T], ABC): |
609 |
| - |
610 |
| - def __init__( |
611 |
| - self, |
612 |
| - graph: DependencyGraph, |
613 |
| - walked_paths: set[Path], |
614 |
| - path_lookup: PathLookup, |
615 |
| - session_state: CurrentSessionState, |
616 |
| - migration_index: TableMigrationIndex, |
617 |
| - ): |
618 |
| - super().__init__(graph, walked_paths, path_lookup) |
619 |
| - self._session_state = session_state |
620 |
| - self._linter_context = LinterContext(migration_index, session_state) |
621 |
| - |
622 |
| - def _process_dependency( |
623 |
| - self, |
624 |
| - dependency: Dependency, |
625 |
| - path_lookup: PathLookup, |
626 |
| - inherited_tree: Tree | None, |
627 |
| - ) -> Iterable[T]: |
628 |
| - language = file_language(dependency.path) |
629 |
| - if not language: |
630 |
| - logger.warning(f"Unknown language for {dependency.path}") |
631 |
| - return |
632 |
| - cell_language = CellLanguage.of_language(language) |
633 |
| - source = safe_read_text(dependency.path) |
634 |
| - if not source: |
635 |
| - return |
636 |
| - if is_a_notebook(dependency.path): |
637 |
| - yield from self._collect_from_notebook(source, cell_language, dependency.path, inherited_tree) |
638 |
| - elif dependency.path.is_file(): |
639 |
| - yield from self._collect_from_source(source, cell_language, dependency.path, inherited_tree) |
640 |
| - |
641 |
| - def _collect_from_notebook( |
642 |
| - self, |
643 |
| - source: str, |
644 |
| - language: CellLanguage, |
645 |
| - path: Path, |
646 |
| - inherited_tree: Tree | None, |
647 |
| - ) -> Iterable[T]: |
648 |
| - notebook = Notebook.parse(path, source, language.language) |
649 |
| - src_timestamp = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc) |
650 |
| - src_id = str(path) |
651 |
| - for cell in notebook.cells: |
652 |
| - for item in self._collect_from_source(cell.original_code, cell.language, path, inherited_tree): |
653 |
| - yield item.replace_source(source_id=src_id, source_lineage=self.lineage, source_timestamp=src_timestamp) |
654 |
| - if cell.language is CellLanguage.PYTHON: |
655 |
| - if inherited_tree is None: |
656 |
| - inherited_tree = Tree.new_module() |
657 |
| - maybe_tree = MaybeTree.from_source_code(cell.original_code) |
658 |
| - if maybe_tree.failure: |
659 |
| - logger.warning(maybe_tree.failure.message) |
660 |
| - continue |
661 |
| - assert maybe_tree.tree is not None |
662 |
| - inherited_tree.attach_child_tree(maybe_tree.tree) |
663 |
| - |
664 |
| - def _collect_from_source( |
665 |
| - self, |
666 |
| - source: str, |
667 |
| - language: CellLanguage, |
668 |
| - path: Path, |
669 |
| - inherited_tree: Tree | None, |
670 |
| - ) -> Iterable[T]: |
671 |
| - if language is CellLanguage.PYTHON: |
672 |
| - iterable = self._collect_from_python(source, inherited_tree) |
673 |
| - else: |
674 |
| - fn: Callable[[str], Iterable[T]] | None = getattr(self, f"_collect_from_{language.name.lower()}", None) |
675 |
| - if not fn: |
676 |
| - raise ValueError(f"Language {language.name} not supported yet!") |
677 |
| - # the below is for disabling a false pylint positive |
678 |
| - # pylint: disable=not-callable |
679 |
| - iterable = fn(source) |
680 |
| - src_timestamp = datetime.fromtimestamp(path.stat().st_mtime, timezone.utc) |
681 |
| - src_id = str(path) |
682 |
| - for item in iterable: |
683 |
| - yield item.replace_source(source_id=src_id, source_lineage=self.lineage, source_timestamp=src_timestamp) |
684 |
| - |
685 |
| - @abstractmethod |
686 |
| - def _collect_from_python(self, source: str, inherited_tree: Tree | None) -> Iterable[T]: ... |
687 |
| - |
688 |
| - def _collect_from_sql(self, _source: str) -> Iterable[T]: |
689 |
| - return [] |
690 |
| - |
691 |
| - def _collect_from_r(self, _source: str) -> Iterable[T]: |
692 |
| - logger.warning("Language R not supported yet!") |
693 |
| - return [] |
694 |
| - |
695 |
| - def _collect_from_scala(self, _source: str) -> Iterable[T]: |
696 |
| - logger.warning("Language scala not supported yet!") |
697 |
| - return [] |
698 |
| - |
699 |
| - def _collect_from_shell(self, _source: str) -> Iterable[T]: |
700 |
| - return [] |
701 |
| - |
702 |
| - def _collect_from_markdown(self, _source: str) -> Iterable[T]: |
703 |
| - return [] |
704 |
| - |
705 |
| - def _collect_from_run(self, _source: str) -> Iterable[T]: |
706 |
| - return [] |
707 |
| - |
708 |
| - def _collect_from_pip(self, _source: str) -> Iterable[T]: |
709 |
| - return [] |
710 |
| - |
711 |
| - |
712 |
| -class DfsaCollectorWalker(_CollectorWalker[DirectFsAccess]): |
713 |
| - |
714 |
| - def _collect_from_python(self, source: str, inherited_tree: Tree | None) -> Iterable[DirectFsAccess]: |
715 |
| - collector = self._linter_context.dfsa_collector(Language.PYTHON) |
716 |
| - yield from collector.collect_dfsas(source) |
717 |
| - |
718 |
| - def _collect_from_sql(self, source: str) -> Iterable[DirectFsAccess]: |
719 |
| - collector = self._linter_context.dfsa_collector(Language.SQL) |
720 |
| - yield from collector.collect_dfsas(source) |
721 |
| - |
722 |
| - |
723 |
| -class TablesCollectorWalker(_CollectorWalker[UsedTable]): |
724 |
| - |
725 |
| - def _collect_from_python(self, source: str, inherited_tree: Tree | None) -> Iterable[UsedTable]: |
726 |
| - collector = self._linter_context.tables_collector(Language.PYTHON) |
727 |
| - assert isinstance(collector, PythonSequentialLinter) |
728 |
| - yield from collector.collect_tables(source) |
729 |
| - |
730 |
| - def _collect_from_sql(self, source: str) -> Iterable[UsedTable]: |
731 |
| - collector = self._linter_context.tables_collector(Language.SQL) |
732 |
| - yield from collector.collect_tables(source) |
0 commit comments