Skip to content

Commit 5971043

Browse files
authored
Return the search terms as search highlights for SQLite instead of nothing (#17000)
Fixes #16999 and element-hq/element-android#8729 by returning the search terms as search highlights.
1 parent 9985aa6 commit 5971043

File tree

3 files changed

+31
-14
lines changed

3 files changed

+31
-14
lines changed

changelog.d/17000.bugfix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fixed search feature of Element Android on homesevers using SQLite by returning search terms as search highlights.

synapse/storage/databases/main/search.py

Lines changed: 24 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,8 @@ async def search_msgs(
470470
count_args = args
471471
count_clauses = clauses
472472

473+
sqlite_highlights: List[str] = []
474+
473475
if isinstance(self.database_engine, PostgresEngine):
474476
search_query = search_term
475477
sql = """
@@ -486,7 +488,7 @@ async def search_msgs(
486488
"""
487489
count_args = [search_query] + count_args
488490
elif isinstance(self.database_engine, Sqlite3Engine):
489-
search_query = _parse_query_for_sqlite(search_term)
491+
search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
490492

491493
sql = """
492494
SELECT rank(matchinfo(event_search)) as rank, room_id, event_id
@@ -531,9 +533,11 @@ async def search_msgs(
531533

532534
event_map = {ev.event_id: ev for ev in events}
533535

534-
highlights = None
536+
highlights: Collection[str] = []
535537
if isinstance(self.database_engine, PostgresEngine):
536538
highlights = await self._find_highlights_in_postgres(search_query, events)
539+
else:
540+
highlights = sqlite_highlights
537541

538542
count_sql += " GROUP BY room_id"
539543

@@ -597,6 +601,8 @@ async def search_rooms(
597601
count_args = list(args)
598602
count_clauses = list(clauses)
599603

604+
sqlite_highlights: List[str] = []
605+
600606
if pagination_token:
601607
try:
602608
origin_server_ts_str, stream_str = pagination_token.split(",")
@@ -647,7 +653,7 @@ async def search_rooms(
647653
CROSS JOIN events USING (event_id)
648654
WHERE
649655
"""
650-
search_query = _parse_query_for_sqlite(search_term)
656+
search_query, sqlite_highlights = _parse_query_for_sqlite(search_term)
651657
args = [search_query] + args
652658

653659
count_sql = """
@@ -694,9 +700,11 @@ async def search_rooms(
694700

695701
event_map = {ev.event_id: ev for ev in events}
696702

697-
highlights = None
703+
highlights: Collection[str] = []
698704
if isinstance(self.database_engine, PostgresEngine):
699705
highlights = await self._find_highlights_in_postgres(search_query, events)
706+
else:
707+
highlights = sqlite_highlights
700708

701709
count_sql += " GROUP BY room_id"
702710

@@ -892,19 +900,25 @@ def _tokenize_query(query: str) -> TokenList:
892900
return tokens
893901

894902

895-
def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
903+
def _tokens_to_sqlite_match_query(tokens: TokenList) -> Tuple[str, List[str]]:
896904
"""
897905
Convert the list of tokens to a string suitable for passing to sqlite's MATCH.
898906
Assume sqlite was compiled with enhanced query syntax.
899907
908+
Returns the sqlite-formatted query string and the tokenized search terms
909+
that can be used as highlights.
910+
900911
Ref: https://www.sqlite.org/fts3.html#full_text_index_queries
901912
"""
902913
match_query = []
914+
highlights = []
903915
for token in tokens:
904916
if isinstance(token, str):
905917
match_query.append(token)
918+
highlights.append(token)
906919
elif isinstance(token, Phrase):
907920
match_query.append('"' + " ".join(token.phrase) + '"')
921+
highlights.append(" ".join(token.phrase))
908922
elif token == SearchToken.Not:
909923
# TODO: SQLite treats NOT as a *binary* operator. Hopefully a search
910924
# term has already been added before this.
@@ -916,11 +930,14 @@ def _tokens_to_sqlite_match_query(tokens: TokenList) -> str:
916930
else:
917931
raise ValueError(f"unknown token {token}")
918932

919-
return "".join(match_query)
933+
return "".join(match_query), highlights
920934

921935

922-
def _parse_query_for_sqlite(search_term: str) -> str:
936+
def _parse_query_for_sqlite(search_term: str) -> Tuple[str, List[str]]:
923937
"""Takes a plain unicode string from the user and converts it into a form
924938
that can be passed to sqllite's matchinfo().
939+
940+
Returns the converted query string and the tokenized search terms
941+
that can be used as highlights.
925942
"""
926943
return _tokens_to_sqlite_match_query(_tokenize_query(search_term))

tests/storage/test_room_search.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,16 @@ def test_null_byte(self) -> None:
7171
store.search_msgs([room_id], "hi bob", ["content.body"])
7272
)
7373
self.assertEqual(result.get("count"), 1)
74-
if isinstance(store.database_engine, PostgresEngine):
75-
self.assertIn("hi", result.get("highlights"))
76-
self.assertIn("bob", result.get("highlights"))
74+
self.assertIn("hi", result.get("highlights"))
75+
self.assertIn("bob", result.get("highlights"))
7776

7877
# Check that search works for an unrelated message
7978
result = self.get_success(
8079
store.search_msgs([room_id], "another", ["content.body"])
8180
)
8281
self.assertEqual(result.get("count"), 1)
83-
if isinstance(store.database_engine, PostgresEngine):
84-
self.assertIn("another", result.get("highlights"))
82+
83+
self.assertIn("another", result.get("highlights"))
8584

8685
# Check that search works for a search term that overlaps with the message
8786
# containing a null byte and an unrelated message.
@@ -90,8 +89,8 @@ def test_null_byte(self) -> None:
9089
result = self.get_success(
9190
store.search_msgs([room_id], "hi alice", ["content.body"])
9291
)
93-
if isinstance(store.database_engine, PostgresEngine):
94-
self.assertIn("alice", result.get("highlights"))
92+
93+
self.assertIn("alice", result.get("highlights"))
9594

9695
def test_non_string(self) -> None:
9796
"""Test that non-string `value`s are not inserted into `event_search`.

0 commit comments

Comments
 (0)