Skip to content

Fix missing arguments to WorkflowLinter #2809

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/databricks/labs/ucx/contexts/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from databricks.labs.ucx.recon.schema_comparator import StandardSchemaComparator
from databricks.labs.ucx.source_code.directfs_access import DirectFsAccessCrawler
from databricks.labs.ucx.source_code.python_libraries import PythonLibraryResolver
from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
from databricks.sdk import AccountClient, WorkspaceClient, core
from databricks.sdk.service import sql

Expand Down Expand Up @@ -55,7 +56,6 @@
from databricks.labs.ucx.source_code.known import KnownList
from databricks.labs.ucx.source_code.queries import QueryLinter
from databricks.labs.ucx.source_code.redash import Redash
from databricks.labs.ucx.source_code.used_table import UsedTablesCrawler
from databricks.labs.ucx.workspace_access import generic, redash
from databricks.labs.ucx.workspace_access.groups import GroupManager
from databricks.labs.ucx.workspace_access.manager import PermissionManager
Expand Down Expand Up @@ -423,7 +423,7 @@ def dependency_resolver(self):
)

@cached_property
def workflow_linter(self):
def workflow_linter(self) -> WorkflowLinter:
return WorkflowLinter(
self.workspace_client,
self.dependency_resolver,
Expand Down
20 changes: 10 additions & 10 deletions src/databricks/labs/ucx/source_code/used_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,7 @@

class UsedTablesCrawler(CrawlerBase[UsedTable]):

@classmethod
def for_paths(cls, backend: SqlBackend, schema) -> UsedTablesCrawler:
return UsedTablesCrawler(backend, schema, "used_tables_in_paths")

@classmethod
def for_queries(cls, backend: SqlBackend, schema) -> UsedTablesCrawler:
return UsedTablesCrawler(backend, schema, "used_tables_in_queries")

def __init__(self, backend: SqlBackend, schema: str, table: str):
def __init__(self, backend: SqlBackend, schema: str, table: str) -> None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's bad. For clarity class factory methods should always come before instance methods

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is bad nor good, it is a style issue, this is consistent with the code base

"""
Initializes a DFSACrawler instance.

Expand All @@ -33,7 +25,15 @@ def __init__(self, backend: SqlBackend, schema: str, table: str):
"""
super().__init__(backend=backend, catalog="hive_metastore", schema=schema, table=table, klass=UsedTable)

def dump_all(self, tables: Sequence[UsedTable]):
@classmethod
def for_paths(cls, backend: SqlBackend, schema: str) -> UsedTablesCrawler:
return UsedTablesCrawler(backend, schema, "used_tables_in_paths")

@classmethod
def for_queries(cls, backend: SqlBackend, schema: str) -> UsedTablesCrawler:
return UsedTablesCrawler(backend, schema, "used_tables_in_queries")

def dump_all(self, tables: Sequence[UsedTable]) -> None:
"""This crawler doesn't follow the pull model because the fetcher fetches data for 3 crawlers, not just one
It's not **bad** because all records are pushed at once.
Providing a multi-entity crawler is out-of-scope of this PR
Expand Down
19 changes: 15 additions & 4 deletions tests/unit/contexts/test_application.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from unittest.mock import create_autospec

import pytest
from databricks.labs.lsql.backends import MockBackend

from databricks.labs.ucx.contexts.application import GlobalContext
from databricks.labs.ucx.contexts.workspace_cli import LocalCheckoutContext
Expand All @@ -11,18 +12,28 @@


@pytest.mark.parametrize(
"attribute", ["dependency_resolver", "pip_resolver", "site_packages_path", "notebook_loader", "folder_loader"]
"attribute",
[
"dependency_resolver",
"pip_resolver",
"site_packages_path",
"notebook_loader",
"folder_loader",
"workflow_linter",
"used_tables_crawler_for_paths",
"used_tables_crawler_for_queries",
],
)
def test_global_context_attributes_not_none(attribute: str):
def test_global_context_attributes_not_none(attribute: str) -> None:
"""Attributes should be not None"""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should not be None ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We add -> None to tests to trigger the linter. Actually not necessarily in this case as attribute has a type hint, but more consistent with other tests

# Goal is to improve test coverage
ctx = GlobalContext()
ctx = GlobalContext().replace(workspace_client=mock_workspace_client(), sql_backend=MockBackend())
assert hasattr(ctx, attribute)
assert getattr(ctx, attribute) is not None


@pytest.mark.parametrize("attribute", ["local_file_migrator", "local_code_linter"])
def test_local_context_attributes_not_none(attribute: str):
def test_local_context_attributes_not_none(attribute: str) -> None:
"""Attributes should be not None"""
# Goal is to improve test coverage
client = mock_workspace_client()
Expand Down
Loading