Skip to content

Commit 97b9996

Browse files
authored
Strip preliminary comments in pip cells (#2763)
## Changes Current implementation fails when pip command is preceded by non MAGIC comments This PR fixes the issue ### Linked issues None ### Functionality None ### Tests - [x] added unit tests Co-authored-by: Eric Vergnaud <eric.vergnaud@databricks.com>
1 parent d14a2cf commit 97b9996

File tree

2 files changed

+35
-2
lines changed

2 files changed

+35
-2
lines changed

src/databricks/labs/ucx/source_code/notebooks/cells.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -484,9 +484,15 @@ def _split(cls, code: str) -> list[str]:
484484
Sources:
485485
https://docs.databricks.com/en/libraries/notebooks-python-libraries.html#manage-libraries-with-pip-commands
486486
"""
487+
# strip preliminary comments
488+
pip_idx = code.find("pip")
489+
if pip_idx > 0 and code[pip_idx - 1] in {'%', '!'}:
490+
pip_idx -= 1
491+
code = code[pip_idx:]
492+
# look for standalone '\n'
487493
match = cls._splitter.search(code)
488494
if match:
489495
code = code[: match.start()] # Remove code after non-escaped newline
496+
# make single line
490497
code = code.replace("\\\n", " ")
491-
lexer = shlex.split(code, posix=True)
492-
return list(lexer)
498+
return shlex.split(code, posix=True)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from pathlib import Path
2+
3+
from databricks.sdk.service.workspace import Language
4+
5+
from databricks.labs.ucx.source_code.base import CurrentSessionState
6+
from databricks.labs.ucx.source_code.graph import Dependency, DependencyGraph
7+
from databricks.labs.ucx.source_code.linters.files import FileLoader
8+
from databricks.labs.ucx.source_code.notebooks.sources import Notebook
9+
10+
11+
def test_malformed_pip_cell_is_supported(simple_ctx):
12+
source = """# Databricks notebook source
13+
# MAGIC %md This notebook sets up the companion cluster(s) to run the solution accelerator. It also creates the Workflow to illustrate the order of execution. Happy exploring!
14+
15+
# COMMAND ----------
16+
17+
# DBTITLE 0,Install util packages
18+
# MAGIC %pip install git+https://github.com/databricks-academy/dbacademy@v1.0.13 git+https://github.com/databricks-industry-solutions/notebook-solution-companion@safe-print-html --quiet --disable-pip-version-check
19+
20+
"""
21+
notebook = Notebook.parse(Path(""), source=source, default_language=Language.PYTHON)
22+
dependency = Dependency(FileLoader(), Path(""))
23+
parent = DependencyGraph(
24+
dependency, None, simple_ctx.dependency_resolver, simple_ctx.path_lookup, CurrentSessionState()
25+
)
26+
problems = notebook.build_dependency_graph(parent)
27+
assert not problems

0 commit comments

Comments
 (0)