Skip to content
This repository was archived by the owner on Feb 22, 2023. It is now read-only.

Commit 2ea8704

Browse files
authored
Add option to sort search results by created_on (#916)
1 parent 9f2b831 commit 2ea8704

File tree

11 files changed

+10114
-10011
lines changed

11 files changed

+10114
-10011
lines changed

api/catalog/api/constants/field_order.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
json_fields = [
88
"id",
99
"title",
10+
"indexed_on",
1011
"foreign_landing_url",
1112
"url",
1213
"creator",

api/catalog/api/constants/sorting.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
RELEVANCE = "relevance"
2+
INDEXED_ON = "indexed_on"
3+
SORT_FIELDS = [
4+
(RELEVANCE, "Relevance"), # default
5+
(INDEXED_ON, "Indexing date"), # date on which media was indexed into Openverse
6+
]
7+
8+
DESCENDING = "desc"
9+
ASCENDING = "asc"
10+
SORT_DIRECTIONS = [
11+
(DESCENDING, "Descending"), # default
12+
(ASCENDING, "Ascending"),
13+
]

api/catalog/api/controllers/search_controller.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import pprint
66
from itertools import accumulate
77
from math import ceil
8-
from typing import Any, Literal
8+
from typing import Literal
99

1010
from django.conf import settings
1111
from django.core.cache import cache
@@ -17,6 +17,8 @@
1717
from elasticsearch_dsl.response import Hit, Response
1818

1919
import catalog.api.models as models
20+
from catalog.api.constants.sorting import INDEXED_ON
21+
from catalog.api.serializers import media_serializers
2022
from catalog.api.utils import tallies
2123
from catalog.api.utils.dead_link_mask import get_query_hash, get_query_mask
2224
from catalog.api.utils.validate_images import validate_images
@@ -220,8 +222,7 @@ def _post_process_results(
220222

221223
def _apply_filter(
222224
s: Search,
223-
# Any is used here to avoid a circular import
224-
search_params: Any, # MediaSearchRequestSerializer
225+
search_params: media_serializers.MediaSearchRequestSerializer,
225226
serializer_field: str,
226227
es_field: str | None = None,
227228
behaviour: Literal["filter", "exclude"] = "filter",
@@ -278,8 +279,7 @@ def _exclude_mature_by_param(s: Search, search_params):
278279

279280

280281
def search(
281-
# Any is used here to avoid a circular import
282-
search_params: Any, # MediaSearchRequestSerializer
282+
search_params: media_serializers.MediaSearchRequestSerializer,
283283
index: Literal["image", "audio"],
284284
page_size: int,
285285
ip: int,
@@ -390,6 +390,11 @@ def search(
390390
# Route users to the same Elasticsearch worker node to reduce
391391
# pagination inconsistencies and increase cache hits.
392392
s = s.params(preference=str(ip), request_timeout=7)
393+
394+
# Sort by new
395+
if search_params.validated_data["sort_by"] == INDEXED_ON:
396+
s = s.sort({"created_on": {"order": search_params.validated_data["sort_dir"]}})
397+
393398
# Paginate
394399
start, end = _get_query_slice(s, page_size, page, filter_dead)
395400
s = s[start:end]

api/catalog/api/examples/audio_responses.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
base_audio = {
99
"id": identifier,
1010
"title": "Wish You Were Here",
11+
"indexed_on": "2022-12-06T06:54:25Z",
1112
"foreign_landing_url": "https://www.jamendo.com/track/1214935",
1213
"url": "https://mp3d.jamendo.com/download/track/1214935/mp32",
1314
"creator": "The.madpix.project",

api/catalog/api/examples/image_responses.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
base_image = {
99
"id": identifier,
1010
"title": "Tree Bark Photo",
11+
"indexed_on": "2022-08-27T17:39:48Z",
1112
"foreign_landing_url": "https://stocksnap.io/photo/XNVBVXO3B7",
1213
"url": "https://cdn.stocksnap.io/img-thumbs/960w/XNVBVXO3B7.jpg",
1314
"creator": "Tim Sullivan",

api/catalog/api/serializers/media_serializers.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,12 @@
77
from rest_framework.exceptions import NotAuthenticated
88

99
from catalog.api.constants.licenses import LICENSE_GROUPS
10+
from catalog.api.constants.sorting import (
11+
DESCENDING,
12+
RELEVANCE,
13+
SORT_DIRECTIONS,
14+
SORT_FIELDS,
15+
)
1016
from catalog.api.controllers import search_controller
1117
from catalog.api.models.media import AbstractMedia
1218
from catalog.api.serializers.base import BaseModelSerializer
@@ -42,6 +48,8 @@ class MediaSearchRequestSerializer(serializers.Serializer):
4248
"extension",
4349
"mature",
4450
"qa",
51+
# "unstable__sort_by", # excluding unstable fields
52+
# "unstable__sort_dir", # excluding unstable fields
4553
"page_size",
4654
"page",
4755
]
@@ -109,6 +117,28 @@ class MediaSearchRequestSerializer(serializers.Serializer):
109117
required=False,
110118
default=False,
111119
)
120+
121+
# The ``unstable__`` prefix is used in the query params.
122+
# The validated data does not contain the ``unstable__`` prefix.
123+
# If you rename these fields, update the following references:
124+
# - ``field_names`` in ``MediaSearchRequestSerializer``
125+
# - validators for these fields in ``MediaSearchRequestSerializer``
126+
unstable__sort_by = serializers.ChoiceField(
127+
source="sort_by",
128+
help_text="The field which should be the basis for sorting results.",
129+
choices=SORT_FIELDS,
130+
required=False,
131+
default=RELEVANCE,
132+
)
133+
unstable__sort_dir = serializers.ChoiceField(
134+
source="sort_dir",
135+
help_text="The direction of sorting. Cannot be applied when sorting by "
136+
"`relevance`.",
137+
choices=SORT_DIRECTIONS,
138+
required=False,
139+
default=DESCENDING,
140+
)
141+
112142
page_size = serializers.IntegerField(
113143
label="page_size",
114144
help_text="Number of results to return per page.",
@@ -170,6 +200,16 @@ def validate_tags(self, value):
170200
def validate_title(self, value):
171201
return self._truncate(value)
172202

203+
def validate_unstable__sort_by(self, value):
204+
request = self.context.get("request")
205+
is_anonymous = bool(request and request.user and request.user.is_anonymous)
206+
return RELEVANCE if is_anonymous else value
207+
208+
def validate_unstable__sort_dir(self, value):
209+
request = self.context.get("request")
210+
is_anonymous = bool(request and request.user and request.user.is_anonymous)
211+
return DESCENDING if is_anonymous else value
212+
173213
def validate_page_size(self, value):
174214
request = self.context.get("request")
175215
is_anonymous = bool(request and request.user and request.user.is_anonymous)
@@ -314,6 +354,7 @@ class Meta:
314354
model = AbstractMedia
315355
fields = [
316356
"id",
357+
"indexed_on",
317358
"title",
318359
"foreign_landing_url",
319360
"url",
@@ -345,6 +386,11 @@ class Meta:
345386
source="identifier",
346387
)
347388

389+
indexed_on = serializers.DateTimeField(
390+
source="created_on",
391+
help_text="The timestamp of when the media was indexed by Openverse.",
392+
)
393+
348394
tags = TagSerializer(
349395
allow_null=True, # replaced with ``[]`` in ``to_representation`` below
350396
many=True,

api/catalog/api/views/media_views.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,12 +63,17 @@ def _get_request_serializer(self, request):
6363
return req_serializer
6464

6565
def get_db_results(self, results):
66-
hit_map = {hit.identifier: hit for hit in results}
67-
results = self.get_queryset().filter(identifier__in=hit_map.keys())
68-
for obj in results:
69-
obj.fields_matched = getattr(
70-
hit_map[str(obj.identifier)], "fields_matched", None
71-
)
66+
identifiers = []
67+
hits = []
68+
for hit in results:
69+
identifiers.append(hit.identifier)
70+
hits.append(hit)
71+
72+
results = list(self.get_queryset().filter(identifier__in=identifiers))
73+
results.sort(key=lambda x: identifiers.index(str(x.identifier)))
74+
for result, hit in zip(results, hits):
75+
result.fields_matched = getattr(hit, "fields_matched", None)
76+
7277
return results
7378

7479
# Standard actions

api/catalog/templates/drf-yasg/redoc.html

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,11 @@
1212
img[alt="logo"] {
1313
padding: 20px; /* same as other sidebar items */
1414
}
15+
16+
/* Hide fields that are unstable and likely to change */
17+
td[kind="field"][title^="unstable__"],
18+
td[kind="field"][title^="unstable__"] ~ td {
19+
display: none
20+
}
1521
</style>
1622
{% endblock %}

api/test/auth_test.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,31 @@ def test_auth_rate_limit_reporting(
9696
assert res_data["verified"] is False
9797

9898

99+
@pytest.mark.django_db
100+
@pytest.mark.parametrize(
101+
"sort_dir, exp_indexed_on",
102+
[
103+
("desc", "2022-12-31"),
104+
("asc", "2022-01-01"),
105+
],
106+
)
107+
def test_sorting_authed(
108+
client, monkeypatch, test_auth_token_exchange, sort_dir, exp_indexed_on
109+
):
110+
# Prevent DB lookup for ES results because DB is empty.
111+
monkeypatch.setattr("catalog.api.views.image_views.ImageSerializer.needs_db", False)
112+
113+
time.sleep(1)
114+
token = test_auth_token_exchange["access_token"]
115+
query_params = {"unstable__sort_by": "indexed_on", "unstable__sort_dir": sort_dir}
116+
res = client.get("/v1/images/", query_params, HTTP_AUTHORIZATION=f"Bearer {token}")
117+
assert res.status_code == 200
118+
119+
res_data = res.json()
120+
indexed_on = res_data["results"][0]["indexed_on"][:10] # ``indexed_on`` is ISO.
121+
assert indexed_on == exp_indexed_on
122+
123+
99124
@pytest.mark.django_db
100125
def test_page_size_limit_unauthed(client):
101126
query_params = {"page_size": 20}

0 commit comments

Comments
 (0)