Skip to content

Commit 3b89a78

Browse files
authored
[DPTOOLS-1903] Remove stale data in ES index (#15)
1 parent 37c700d commit 3b89a78

File tree

5 files changed

+47
-4
lines changed

5 files changed

+47
-4
lines changed

databuilder/extractor/neo4j_extractor.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ def _execute_query(self, tx):
6363
"""
6464
Create an iterator to execute sql.
6565
"""
66+
LOGGER.info('Executing query {}'.format(self.cypher_query))
6667
result = tx.run(self.cypher_query)
6768
return result
6869

databuilder/extractor/neo4j_search_data_extractor.py

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from databuilder import Scoped
77
from databuilder.extractor.base_extractor import Extractor
88
from databuilder.extractor.neo4j_extractor import Neo4jExtractor
9+
from databuilder.publisher.neo4j_csv_publisher import JOB_PUBLISH_TAG
910

1011

1112
class Neo4jSearchDataExtractor(Extractor):
@@ -18,6 +19,7 @@ class Neo4jSearchDataExtractor(Extractor):
1819
DEFAULT_NEO4J_CYPHER_QUERY = textwrap.dedent(
1920
"""
2021
MATCH (db:Database)<-[:CLUSTER_OF]-(cluster:Cluster)<-[:SCHEMA_OF]-(schema:Schema)<-[:TABLE_OF]-(table:Table)
22+
{publish_tag_filter}
2123
OPTIONAL MATCH (table)-[:DESCRIPTION]->(table_description:Description)
2224
OPTIONAL MATCH (table)-[read:READ_BY]->(user:User)
2325
OPTIONAL MATCH (table)-[:COLUMN]->(cols:Column)
@@ -44,8 +46,11 @@ def init(self, conf):
4446
self.conf = conf
4547

4648
# extract cypher query from conf, if specified, else use default query
47-
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY,
48-
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)
49+
if Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY in conf:
50+
self.cypher_query = conf.get_string(Neo4jSearchDataExtractor.CYPHER_QUERY_CONFIG_KEY)
51+
else:
52+
self.cypher_query = self._add_publish_tag_filter(conf.get_string(JOB_PUBLISH_TAG, ''),
53+
Neo4jSearchDataExtractor.DEFAULT_NEO4J_CYPHER_QUERY)
4954

5055
self.neo4j_extractor = Neo4jExtractor()
5156
# write the cypher query in configs in Neo4jExtractor scope
@@ -72,3 +77,18 @@ def extract(self):
7277
def get_scope(self):
7378
# type: () -> str
7479
return 'extractor.search_data'
80+
81+
def _add_publish_tag_filter(self, publish_tag, cypher_query):
82+
"""
83+
Adds publish tag filter into Cypher query
84+
:param publish_tag: value of publish tag.
85+
:param cypher_query:
86+
:return:
87+
"""
88+
# type: (str, str) -> str
89+
if not publish_tag:
90+
publish_tag_filter = ''
91+
else:
92+
publish_tag_filter = """WHERE table.published_tag = '{}'""".format(publish_tag)
93+
94+
return cypher_query.format(publish_tag_filter=publish_tag_filter)

databuilder/extractor/table_column_usage_aggregate_extractor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ class TblColUsgAggExtractor(Extractor):
3535

3636
def init(self, conf):
3737
# type: (ConfigTree) -> None
38-
3938
self._extractor = conf.get(RAW_EXTRACTOR) # type: Extractor
4039
self._extractor.init(Scoped.get_scoped_conf(conf, self._extractor.get_scope()))
4140

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from setuptools import setup, find_packages
22

33

4-
__version__ = '1.0.5'
4+
__version__ = '1.0.6'
55

66

77
setup(
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
import unittest
2+
from databuilder.extractor.neo4j_search_data_extractor import Neo4jSearchDataExtractor
3+
4+
5+
class TestNeo4jExtractor(unittest.TestCase):
6+
7+
def test_adding_filter(self):
8+
# type: (Any) -> None
9+
extractor = Neo4jSearchDataExtractor()
10+
actual = extractor._add_publish_tag_filter('foo', 'MATCH (table:Table) {publish_tag_filter} RETURN table')
11+
12+
self.assertEqual(actual, """MATCH (table:Table) WHERE table.published_tag = 'foo' RETURN table""")
13+
14+
def test_not_adding_filter(self):
15+
# type: (Any) -> None
16+
extractor = Neo4jSearchDataExtractor()
17+
actual = extractor._add_publish_tag_filter('', 'MATCH (table:Table) {publish_tag_filter} RETURN table')
18+
19+
self.assertEqual(actual, """MATCH (table:Table) RETURN table""")
20+
21+
22+
if __name__ == '__main__':
23+
unittest.main()

0 commit comments

Comments
 (0)