Skip to content

Commit c15e230

Browse files
committed
be more defensive
1 parent b53986a commit c15e230

File tree

2 files changed

+19
-6
lines changed

2 files changed

+19
-6
lines changed

src/databricks/labs/ucx/sequencing/sequencing.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from collections.abc import Iterable
55
from dataclasses import dataclass, field
66

7+
from databricks.sdk import WorkspaceClient
78
from databricks.sdk.service import jobs
89

910
from databricks.labs.ucx.source_code.graph import DependencyGraph
@@ -66,7 +67,8 @@ def find(self, object_type: str, object_id: str) -> MigrationNode | None:
6667

6768
class MigrationSequencer:
6869

69-
def __init__(self):
70+
def __init__(self, ws: WorkspaceClient):
71+
self._ws = ws
7072
self._root = MigrationNode(
7173
node_id=0, object_type="ROOT", object_id="ROOT", object_name="ROOT", object_owner="NONE"
7274
)
@@ -83,7 +85,7 @@ def register_workflow_task(self, task: jobs.Task, job: jobs.Job, _graph: Depende
8385
object_type="TASK",
8486
object_id=task_id,
8587
object_name=task.task_key,
86-
object_owner=job_node.object_owner, # no task owner so use job one
88+
object_owner=job_node.object_owner, # no task owner so use job one
8789
)
8890
job_node.required_steps.append(task_node)
8991
if task.existing_cluster_id:
@@ -127,14 +129,17 @@ def register_cluster(self, cluster_key: str) -> MigrationNode:
127129
cluster_node = self._find_node(object_type="CLUSTER", object_id=cluster_key)
128130
if cluster_node:
129131
return cluster_node
132+
details = self._ws.clusters.get(cluster_key)
133+
object_name = details.cluster_name if details and details.cluster_name else cluster_key
134+
object_owner = details.creator_user_name if details and details.creator_user_name else "<UNKNOWN>"
130135
MigrationNode.last_node_id += 1
131136
cluster_node = MigrationNode(
132137
node_id=MigrationNode.last_node_id,
133138
object_type="CLUSTER",
134139
object_id=cluster_key,
135-
object_name=cluster_key,
136-
object_owner="NONE",
137-
) # TODO object_owner
140+
object_name=object_name,
141+
object_owner=object_owner,
142+
)
138143
# TODO register warehouses and policies
139144
self._root.required_steps.append(cluster_node)
140145
return cluster_node
@@ -155,6 +160,8 @@ def _deduplicate_steps(steps: Iterable[MigrationStep]) -> Iterable[MigrationStep
155160
for step in steps:
156161
existing = best_steps.get(step.step_id, None)
157162
# keep the step with the highest step number
163+
# TODO this possibly affects the step_number of steps that depend on this one
164+
# but it's probably OK to not be 100% accurate initially
158165
if existing and existing.step_number >= step.step_number:
159166
continue
160167
best_steps[step.step_id] = step
Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from databricks.sdk.service import jobs
2+
from databricks.sdk.service.compute import ClusterDetails
23

34
from databricks.labs.ucx.sequencing.sequencing import MigrationSequencer
45
from databricks.labs.ucx.source_code.base import CurrentSessionState
@@ -7,16 +8,21 @@
78

89

910
def test_cluster_from_task_has_children(ws, simple_dependency_resolver, mock_path_lookup):
11+
ws.clusters.get.return_value = ClusterDetails(cluster_name="my-cluster", creator_user_name="John Doe")
1012
task = jobs.Task(task_key="test-task", existing_cluster_id="cluster-123")
1113
settings = jobs.JobSettings(name="test-job", tasks=[task])
1214
job = jobs.Job(job_id=1234, settings=settings)
1315
ws.jobs.get.return_value = job
1416
dependency = WorkflowTask(ws, task, job)
1517
graph = DependencyGraph(dependency, None, simple_dependency_resolver, mock_path_lookup, CurrentSessionState())
16-
sequencer = MigrationSequencer()
18+
sequencer = MigrationSequencer(ws)
1719
sequencer.register_workflow_task(task, job, graph)
1820
steps = list(sequencer.generate_steps())
1921
step = steps[-1]
22+
assert step.step_id
2023
assert step.object_type == "CLUSTER"
2124
assert step.object_id == "cluster-123"
25+
assert step.object_name == "my-cluster"
26+
assert step.object_owner == "John Doe"
2227
assert step.step_number == 3
28+
assert len(step.required_step_ids) == 2

0 commit comments

Comments
 (0)