Skip to content

Commit 7f095fb

Browse files
authored
feat: Make DescriptionMetadata inherit from GraphSerializable (#461)
* make descriptionmetadata inherit from GraphSerializable Signed-off-by: Junda Yang <youngyjd@gmail.com> * fix mypy Signed-off-by: Junda Yang <youngyjd@gmail.com> * address comment Signed-off-by: Junda Yang <youngyjd@gmail.com> * bump up databuilder version Signed-off-by: Junda Yang <youngyjd@gmail.com>
1 parent 4bb4452 commit 7f095fb

File tree

3 files changed

+187
-11
lines changed

3 files changed

+187
-11
lines changed

databuilder/models/table_metadata.py

Lines changed: 62 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ def _create_record_iterator(self) -> Iterator[RDSModel]:
117117

118118

119119
# TODO: this should inherit from ProgrammaticDescription in amundsen-common
120-
class DescriptionMetadata:
120+
class DescriptionMetadata(GraphSerializable):
121121
DESCRIPTION_NODE_LABEL = DESCRIPTION_NODE_LABEL_VAL
122122
PROGRAMMATIC_DESCRIPTION_NODE_LABEL = 'Programmatic_Description'
123123
DESCRIPTION_KEY_FORMAT = '{description}'
@@ -132,7 +132,10 @@ class DescriptionMetadata:
132132

133133
def __init__(self,
134134
text: Optional[str],
135-
source: str = DEFAULT_SOURCE
135+
source: str = DEFAULT_SOURCE,
136+
description_key: Optional[str] = None,
137+
start_label: Optional[str] = None, # Table, Column, Schema
138+
start_key: Optional[str] = None,
136139
):
137140
"""
138141
:param source: The unique source of what is populating this description.
@@ -146,17 +149,28 @@ def __init__(self,
146149
else:
147150
self.label = self.PROGRAMMATIC_DESCRIPTION_NODE_LABEL
148151

152+
self.start_label = start_label
153+
self.start_key = start_key
154+
self.description_key = description_key or self.get_description_default_key(start_key)
155+
156+
self._node_iter = self._create_node_iterator()
157+
self._relation_iter = self._create_relation_iterator()
158+
149159
@staticmethod
150160
def create_description_metadata(text: Union[None, str],
151-
source: Optional[str] = DEFAULT_SOURCE
161+
source: Optional[str] = DEFAULT_SOURCE,
162+
description_key: Optional[str] = None,
163+
start_label: Optional[str] = None, # Table, Column, Schema
164+
start_key: Optional[str] = None,
152165
) -> Optional['DescriptionMetadata']:
153166
# We do not want to create a node if there is no description text!
154167
if text is None:
155168
return None
156-
if not source:
157-
description_node = DescriptionMetadata(text=text, source=DescriptionMetadata.DEFAULT_SOURCE)
158-
else:
159-
description_node = DescriptionMetadata(text=text, source=source)
169+
description_node = DescriptionMetadata(text=text,
170+
source=source or DescriptionMetadata.DEFAULT_SOURCE,
171+
description_key=description_key,
172+
start_label=start_label,
173+
start_key=start_key)
160174
return description_node
161175

162176
def get_description_id(self) -> str:
@@ -165,8 +179,8 @@ def get_description_id(self) -> str:
165179
else:
166180
return "_" + self.source + "_description"
167181

168-
def __repr__(self) -> str:
169-
return f'DescriptionMetadata({self.source!r}, {self.text!r})'
182+
def get_description_default_key(self, start_key: Optional[str]) -> Optional[str]:
183+
return f'{start_key}/{self.get_description_id()}' if start_key else None
170184

171185
def get_node(self, node_key: str) -> GraphNode:
172186
node = GraphNode(
@@ -179,7 +193,11 @@ def get_node(self, node_key: str) -> GraphNode:
179193
)
180194
return node
181195

182-
def get_relation(self, start_node: str, start_key: Any, end_key: Any) -> GraphRelationship:
196+
def get_relation(self,
197+
start_node: str,
198+
start_key: str,
199+
end_key: str,
200+
) -> GraphRelationship:
183201
relationship = GraphRelationship(
184202
start_label=start_node,
185203
start_key=start_key,
@@ -191,6 +209,40 @@ def get_relation(self, start_node: str, start_key: Any, end_key: Any) -> GraphRe
191209
)
192210
return relationship
193211

212+
def create_next_node(self) -> Optional[GraphNode]:
213+
# return the string representation of the data
214+
try:
215+
return next(self._node_iter)
216+
except StopIteration:
217+
return None
218+
219+
def create_next_relation(self) -> Optional[GraphRelationship]:
220+
try:
221+
return next(self._relation_iter)
222+
except StopIteration:
223+
return None
224+
225+
def _create_node_iterator(self) -> Iterator[GraphNode]:
226+
if not self.description_key:
227+
raise Exception('Required description node key cannot be None')
228+
yield self.get_node(self.description_key)
229+
230+
def _create_relation_iterator(self) -> Iterator[GraphRelationship]:
231+
if not self.start_label:
232+
raise Exception('Required relation start node label cannot be None')
233+
if not self.start_key:
234+
raise Exception('Required relation start key cannot be None')
235+
if not self.description_key:
236+
raise Exception('Required relation end key cannot be None')
237+
yield self.get_relation(
238+
start_node=self.start_label,
239+
start_key=self.start_key,
240+
end_key=self.description_key
241+
)
242+
243+
def __repr__(self) -> str:
244+
return f'DescriptionMetadata({self.source!r}, {self.text!r})'
245+
194246

195247
class ColumnMetadata:
196248
COLUMN_NODE_LABEL = 'Column'

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
from setuptools import find_packages, setup
66

7-
__version__ = '4.2.0'
7+
__version__ = '4.2.1'
88

99

1010
requirements = [
Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
# Copyright Contributors to the Amundsen project.
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
import unittest
5+
6+
from databuilder.models.table_metadata import DescriptionMetadata
7+
from databuilder.serializers import neo4_serializer
8+
9+
10+
class TestDescriptionMetadata(unittest.TestCase):
11+
def test_raise_exception_when_missing_data(self) -> None:
12+
# assert raise when missing description node key
13+
self.assertRaises(
14+
Exception,
15+
DescriptionMetadata(text='test_text').next_node
16+
)
17+
DescriptionMetadata(text='test_text', description_key='test_key').next_node()
18+
DescriptionMetadata(text='test_text', start_key='start_key').next_node()
19+
20+
# assert raise when missing relation start label
21+
self.assertRaises(
22+
Exception,
23+
DescriptionMetadata(text='test_text', start_key='start_key').next_relation
24+
)
25+
DescriptionMetadata(text='test_text', start_key='test_key', start_label='Table').next_relation()
26+
27+
# assert raise when missing relation start key
28+
self.assertRaises(
29+
Exception,
30+
DescriptionMetadata(text='test_text', description_key='test_key', start_label='Table').next_relation
31+
)
32+
33+
def test_serialize_table_description_metadata(self) -> None:
34+
description_metadata = DescriptionMetadata(
35+
text='test text 1',
36+
start_label='Table',
37+
start_key='test_start_key'
38+
)
39+
node_row = description_metadata.next_node()
40+
actual = []
41+
while node_row:
42+
node_row_serialized = neo4_serializer.serialize_node(node_row)
43+
actual.append(node_row_serialized)
44+
node_row = description_metadata.next_node()
45+
expected = [
46+
{'description': 'test text 1', 'KEY': 'test_start_key/_description',
47+
'LABEL': 'Description', 'description_source': 'description'},
48+
]
49+
self.assertEqual(actual, expected)
50+
51+
relation_row = description_metadata.next_relation()
52+
actual = []
53+
while relation_row:
54+
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
55+
actual.append(relation_row_serialized)
56+
relation_row = description_metadata.next_relation()
57+
expected = [
58+
{'START_KEY': 'test_start_key', 'START_LABEL': 'Table', 'END_KEY': 'test_start_key/_description',
59+
'END_LABEL': 'Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
60+
]
61+
self.assertEqual(actual, expected)
62+
63+
def test_serialize_column_description_metadata(self) -> None:
64+
description_metadata = DescriptionMetadata(
65+
text='test text 2',
66+
start_label='Column',
67+
start_key='test_start_key',
68+
description_key='customized_key'
69+
)
70+
node_row = description_metadata.next_node()
71+
actual = []
72+
while node_row:
73+
node_row_serialized = neo4_serializer.serialize_node(node_row)
74+
actual.append(node_row_serialized)
75+
node_row = description_metadata.next_node()
76+
expected = [
77+
{'description': 'test text 2', 'KEY': 'customized_key',
78+
'LABEL': 'Description', 'description_source': 'description'},
79+
]
80+
self.assertEqual(actual, expected)
81+
82+
relation_row = description_metadata.next_relation()
83+
actual = []
84+
while relation_row:
85+
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
86+
actual.append(relation_row_serialized)
87+
relation_row = description_metadata.next_relation()
88+
expected = [
89+
{'START_KEY': 'test_start_key', 'START_LABEL': 'Column', 'END_KEY': 'customized_key',
90+
'END_LABEL': 'Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
91+
]
92+
self.assertEqual(actual, expected)
93+
94+
def test_serialize_column_with_source_description_metadata(self) -> None:
95+
description_metadata = DescriptionMetadata(
96+
text='test text 3',
97+
start_label='Column',
98+
start_key='test_start_key',
99+
description_key='customized_key',
100+
source='external',
101+
)
102+
node_row = description_metadata.next_node()
103+
actual = []
104+
while node_row:
105+
node_row_serialized = neo4_serializer.serialize_node(node_row)
106+
actual.append(node_row_serialized)
107+
node_row = description_metadata.next_node()
108+
expected = [
109+
{'description': 'test text 3', 'KEY': 'customized_key',
110+
'LABEL': 'Programmatic_Description', 'description_source': 'external'},
111+
]
112+
self.assertEqual(actual, expected)
113+
114+
relation_row = description_metadata.next_relation()
115+
actual = []
116+
while relation_row:
117+
relation_row_serialized = neo4_serializer.serialize_relationship(relation_row)
118+
actual.append(relation_row_serialized)
119+
relation_row = description_metadata.next_relation()
120+
expected = [
121+
{'START_KEY': 'test_start_key', 'START_LABEL': 'Column', 'END_KEY': 'customized_key',
122+
'END_LABEL': 'Programmatic_Description', 'TYPE': 'DESCRIPTION', 'REVERSE_TYPE': 'DESCRIPTION_OF'}
123+
]
124+
self.assertEqual(actual, expected)

0 commit comments

Comments
 (0)