Skip to content

Commit 1e9d334

Browse files
authored
Improve error handling for assess_workflows task (#3255)
This pull request includes several changes to improve error handling and logging in the `databricks/labs/ucx/source_code` module. The most important changes include adding a new error type, handling specific errors in temporary file operations, and updating log levels for various error messages. ### Error Handling: * [`src/databricks/labs/ucx/source_code/jobs.py`](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837L20-R20): Added `DatabricksError` to the list of imported errors and used it to handle exceptions in the `_temporary_copy` method. This change ensures that specific Databricks-related errors are caught and re-raised as `InvalidPath` errors. [[1]](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837L20-R20) [[2]](diffhunk://#diff-e9b8a0fc1055c1e7d799d63ee6c440b3db47fdbcb8dccd5880f1a72db5df5837R168-R176) ### Logging Improvements: * [`src/databricks/labs/ucx/source_code/known.py`](diffhunk://#diff-ed49b49a4bffc221bd19b77b68ce27482cda9417398b40cc1205e16d4d463022L169-R169): Changed the log level from `error` to `warning` for recursion errors in the `_analyze_dist_info` method. * [`src/databricks/labs/ucx/source_code/linters/files.py`](diffhunk://#diff-5dae8c130e55d05b5dfeb64c78ee0f128c7609e530b7a91f359f0008c91bf3a9L231-R231): Changed the log level from `error` to `warning` for Unicode decode errors in the `_apply_file_fix` method. * [`src/databricks/labs/ucx/source_code/linters/from_table.py`](diffhunk://#diff-3a3ae81870927af560ab2e91f35d8b4230d28bd5f45c099f51ac6c4a633d9301L100-R100): Changed the log level from `error` to `warning` for schema determination errors in the `apply` method. * [`src/databricks/labs/ucx/source_code/redash.py`](diffhunk://#diff-d5de4bcdd34eb78885392169c22e798351cf010da05ae546a28cf536f9c648d5L52-R52): Changed the log level from `error` to `warning` for errors when listing dashboards in the `_list_dashboards` method. * [`src/databricks/labs/ucx/source_code/sql/sql_parser.py`](diffhunk://#diff-dc63f875bba70c85a8e1c6f34089b0a53779b7ef68a983d497b0c923e3ce5bb3L55-R55): Changed the log level from `error` to `warning` for schema determination errors in the `_collect_table_info` method.
1 parent 75fe330 commit 1e9d334

File tree

6 files changed

+15
-11
lines changed

6 files changed

+15
-11
lines changed

src/databricks/labs/ucx/source_code/jobs.py

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from databricks.labs.blueprint.paths import DBFSPath
1818
from databricks.labs.lsql.backends import SqlBackend
1919
from databricks.sdk import WorkspaceClient
20-
from databricks.sdk.errors import NotFound, ResourceDoesNotExist, BadRequest, InvalidParameterValue
20+
from databricks.sdk.errors import NotFound, ResourceDoesNotExist, BadRequest, InvalidParameterValue, DatabricksError
2121
from databricks.sdk.service import compute, jobs
2222
from databricks.sdk.service.compute import DataSecurityMode
2323
from databricks.sdk.service.jobs import Source
@@ -165,11 +165,15 @@ def _as_path(self, path: str) -> Path:
165165
@classmethod
166166
@contextmanager
167167
def _temporary_copy(cls, path: Path) -> Generator[Path, None, None]:
168-
with tempfile.TemporaryDirectory() as directory:
169-
temporary_path = Path(directory) / path.name
170-
with path.open("rb") as src, temporary_path.open("wb") as dst:
171-
shutil.copyfileobj(src, dst)
172-
yield temporary_path
168+
try:
169+
with tempfile.TemporaryDirectory() as directory:
170+
temporary_path = Path(directory) / path.name
171+
with path.open("rb") as src, temporary_path.open("wb") as dst:
172+
shutil.copyfileobj(src, dst)
173+
yield temporary_path
174+
except DatabricksError as e:
175+
# Cover cases like `ResourceDoesNotExist: Path (/Volumes/...-py3-none-any.whl) doesn't exist.`
176+
raise InvalidPath(f"Cannot load file: {path}") from e
173177

174178
def _register_library(self, graph: DependencyGraph, library: compute.Library) -> Iterable[DependencyProblem]:
175179
if library.pypi:

src/databricks/labs/ucx/source_code/known.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,7 @@ def _analyze_dist_info(cls, dist_info_folder, known_distributions, library_root)
166166
try:
167167
cls._analyze_file(known_distributions, library_root, dist_info, module_path)
168168
except RecursionError:
169-
logger.error(f"Recursion error in {module_path}")
169+
logger.warning(f"Recursion error in {module_path}")
170170
continue
171171

172172
@classmethod

src/databricks/labs/ucx/source_code/linters/files.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -228,7 +228,7 @@ def _apply_file_fix(self, path: Path):
228228
try:
229229
code = f.read()
230230
except UnicodeDecodeError as e:
231-
logger.error(f"Could not decode file {path}: {e}")
231+
logger.warning(f"Could not decode file {path}: {e}")
232232
return False
233233
applied = False
234234
# Lint the code and apply fixes

src/databricks/labs/ucx/source_code/linters/from_table.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ def apply(self, code: str) -> str:
9797
for old_table in self._dependent_tables(statement):
9898
src_schema = old_table.db if old_table.db else self._session_state.schema
9999
if not src_schema:
100-
logger.error(f"Could not determine schema for table {old_table.name}")
100+
logger.warning(f"Could not determine schema for table {old_table.name}")
101101
continue
102102
dst = self._index.get(src_schema, old_table.name)
103103
if not dst:

src/databricks/labs/ucx/source_code/redash.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ def _list_dashboards(self, dashboard_id: str | None) -> list[Dashboard]:
4949
return list(self._ws.dashboards.list())
5050
return [self._ws.dashboards.get(dashboard_id)]
5151
except DatabricksError as e:
52-
logger.error(f"Cannot list dashboards: {e}")
52+
logger.warning(f"Cannot list dashboards: {e}")
5353
return []
5454

5555
def _fix_query(self, query: LegacyQuery) -> None:

src/databricks/labs/ucx/source_code/sql/sql_parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ def _collect_table_info(
5252
# Sqlglot uses db instead of schema, watch out for that
5353
src_schema = table.db if table.db else session_state.schema
5454
if not src_schema:
55-
logger.error(f"Could not determine schema for table {table.name}")
55+
logger.warning(f"Could not determine schema for table {table.name}")
5656
return None
5757
return UsedTable(
5858
catalog_name=catalog_name,

0 commit comments

Comments
 (0)