Skip to content
This repository was archived by the owner on Feb 22, 2023. It is now read-only.

Commit 4edae23

Browse files
Make quoted queries behave as described in the API documentation (return exact matches only) (#1012)
* Fix quoted audio search example escaping * Make quoted queries behave as described in API documentation * Undo change breaking title match boosting * Fix and future proof tests against additional test data
1 parent 55f55de commit 4edae23

File tree

6 files changed

+45
-7
lines changed

6 files changed

+45
-7
lines changed

api/catalog/api/controllers/search_controller.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -340,18 +340,25 @@ def search(
340340
search_fields = ["tags.name", "title", "description"]
341341
if "q" in search_params.data:
342342
query = _quote_escape(search_params.data["q"])
343+
base_query_kwargs = {
344+
"query": query,
345+
"fields": search_fields,
346+
"default_operator": "AND",
347+
}
348+
349+
if '"' in query:
350+
base_query_kwargs["quote_field_suffix"] = ".exact"
351+
343352
s = s.query(
344353
"simple_query_string",
345-
query=query,
346-
fields=search_fields,
347-
default_operator="AND",
354+
**base_query_kwargs,
348355
)
349-
# Boost exact matches
356+
# Boost exact matches on the title
350357
quotes_stripped = query.replace('"', "")
351358
exact_match_boost = Q(
352359
"simple_query_string",
353360
fields=["title"],
354-
query=f'"{quotes_stripped}"',
361+
query=f"{quotes_stripped}",
355362
boost=10000,
356363
)
357364
s = search_client.query(Q("bool", must=s.query, should=exact_match_boost))

api/catalog/api/examples/audio_requests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
syntax_examples = {
1111
"using single query parameter": "test",
1212
"using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501
13-
"that is an exact match of Giacomo Puccini": '"Giacomo Puccini"',
13+
"that is an exact match of Giacomo Puccini": r"%22Giacomo%20Puccini%22",
1414
"related to both dog and cat": "dog+cat",
1515
"related to dog or cat, but not necessarily both": "dog|cat",
1616
"related to dog but won't include results related to 'pug'": "dog -pug",

api/catalog/api/examples/image_requests.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
syntax_examples = {
1111
"using single query parameter": "test",
1212
"using multiple query parameters": "test&license=pdm,by&categories=illustration&page_size=1&page=1", # noqa: E501
13-
"that are an exact match of Claude Monet": '"Claude Monet"',
13+
"that are an exact match of Claude Monet": "%22Claude%20Monet%22",
1414
"related to both dog and cat": "dog+cat",
1515
"related to dog or cat, but not necessarily both": "dog|cat",
1616
"related to dog but won't include results related to 'pug'": "dog -pug",

api/test/audio_integration_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
search_by_category,
1616
search_consistency,
1717
search_quotes,
18+
search_quotes_exact,
1819
search_source_and_excluded,
1920
search_special_chars,
2021
stats,
@@ -101,6 +102,11 @@ def test_search_quotes():
101102
search_quotes("audio", "love")
102103

103104

105+
def test_search_quotes_exact():
106+
# ``water running`` returns different results when quoted vs unquoted
107+
search_quotes_exact("audio", "water running")
108+
109+
104110
def test_search_with_special_characters():
105111
search_special_chars("audio", "love")
106112

api/test/image_integration_test.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
search_all_excluded,
1616
search_consistency,
1717
search_quotes,
18+
search_quotes_exact,
1819
search_source_and_excluded,
1920
search_special_chars,
2021
stats,
@@ -53,6 +54,11 @@ def test_search_quotes():
5354
search_quotes("images", "dog")
5455

5556

57+
def test_search_quotes_exact():
58+
# ``bird perched`` returns different results when quoted vs unquoted
59+
search_quotes_exact("images", "bird perched")
60+
61+
5662
def test_search_with_special_characters():
5763
search_special_chars("images", "dog")
5864

api/test/media_integration.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,25 @@ def search_quotes(media_path, q="test"):
4545
assert response.status_code == 200
4646

4747

48+
def search_quotes_exact(media_path, q):
49+
"""Only returns exact matches for the given query"""
50+
unquoted_response = requests.get(f"{API_URL}/v1/{media_path}?q={q}", verify=False)
51+
assert unquoted_response.status_code == 200
52+
unquoted_result_count = unquoted_response.json()["result_count"]
53+
assert unquoted_result_count > 0
54+
55+
quoted_response = requests.get(f'{API_URL}/v1/{media_path}?q="{q}"', verify=False)
56+
assert quoted_response.status_code == 200
57+
quoted_result_count = quoted_response.json()["result_count"]
58+
assert quoted_result_count > 0
59+
60+
# The rationale here is that the unquoted results will match more records due
61+
# to the query being overall less strict. Quoting the query will make it more
62+
# strict causing it to return fewer results.
63+
# Above we check that the results are not 0 to confirm that we do still get results back.
64+
assert quoted_result_count < unquoted_result_count
65+
66+
4867
def search_special_chars(media_path, q="test"):
4968
"""Returns a response when query includes special characters."""
5069
response = requests.get(f"{API_URL}/v1/{media_path}?q={q}!", verify=False)

0 commit comments

Comments
 (0)