Skip to content

Commit b574e13

Browse files
authored
move search index rebuild to celery (#62)
1 parent 4e05bb7 commit b574e13

File tree

4 files changed

+159
-9
lines changed

4 files changed

+159
-9
lines changed

papermerge/search/search_indexes.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ class DocumentIndex(indexes.SearchIndex, indexes.Indexable):
1010
last_version_text = indexes.CharField()
1111
text = indexes.CharField() # alias for `last_version_text`
1212
tags = indexes.MultiValueField()
13+
highlight = indexes.CharField()
1314
breadcrumb = indexes.MultiValueField()
1415
node_type = indexes.CharField()
1516

papermerge/search/serializers.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,11 +8,7 @@ class SearchResultSerializer(serializers.Serializer):
88
text = serializers.CharField(required=False, default='')
99
title = serializers.CharField()
1010
tags = ColoredTagListSerializerField(required=False)
11-
highlight = serializers.ListField(
12-
child=serializers.CharField(),
13-
required=False,
14-
default=['']
15-
)
11+
highlight = serializers.CharField()
1612
breadcrumb = serializers.ListField(
1713
child=serializers.CharField()
1814
)

papermerge/search/signals.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,45 @@
11
from django.db import models
22
from haystack import signals
3+
from haystack.utils import get_identifier
34

45
from papermerge.core.models import DocumentVersion, Document, Folder
6+
from papermerge.search.tasks import update_index
57

68

79
class SignalProcessor(signals.BaseSignalProcessor):
810
def setup(self):
911
for klass in (DocumentVersion, Document, Folder):
10-
models.signals.post_save.connect(self.handle_save, sender=klass)
11-
models.signals.post_delete.connect(self.handle_delete, sender=klass)
12+
models.signals.post_save.connect(
13+
self.enqueue_save, sender=klass
14+
)
15+
models.signals.post_delete.connect(
16+
self.enqueue_delete, sender=klass
17+
)
1218

1319
def teardown(self):
1420
for klass in (DocumentVersion, Document, Folder):
15-
models.signals.post_save.disconnect(self.handle_save, sender=klass)
21+
models.signals.post_save.disconnect(self.enqueue_save, sender=klass)
1622
models.signals.post_delete.disconnect(
17-
self.handle_delete,
23+
self.enqueue_delete,
1824
sender=klass
1925
)
26+
27+
def enqueue_save(self, sender, instance, **kwargs):
28+
return self.enqueue('save', instance, **kwargs)
29+
30+
def enqueue_delete(self, sender, instance, **kwargs):
31+
return self.enqueue('delete', instance, **kwargs)
32+
33+
def enqueue(self, action, instance, **kwargs):
34+
identifier = get_identifier(instance)
35+
36+
# We index only Document and Folder models, however when
37+
# new DocumentVersion is saved/deleted we need to update its
38+
# associated Document
39+
if isinstance(instance, DocumentVersion):
40+
identifier = get_identifier(instance.document)
41+
42+
update_index.apply_async(kwargs={
43+
'action': action,
44+
'identifier': identifier
45+
})

papermerge/search/tasks.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,127 @@
1+
import logging
2+
3+
from django.core.exceptions import ImproperlyConfigured
4+
from django.apps import apps
5+
from celery import shared_task
6+
from haystack.exceptions import NotHandled as IndexNotFoundException
7+
from haystack import connections, connection_router
8+
9+
10+
logger = logging.getLogger(__name__)
11+
12+
13+
def split_identifier(identifier):
14+
"""
15+
Break down the identifier representing the instance.
16+
Converts 'notes.note.23' into ('notes.note', 23).
17+
"""
18+
bits = identifier.split('.')
19+
20+
if len(bits) < 2:
21+
logger.error("Unable to parse object "
22+
"identifer '%s'. Moving on..." % identifier)
23+
return (None, None)
24+
25+
pk = bits[-1]
26+
# In case Django ever handles full paths...
27+
object_path = '.'.join(bits[:-1])
28+
return (object_path, pk)
29+
30+
31+
def get_instance(model_class, pk):
32+
"""
33+
Fetch the instance in a standarized way.
34+
"""
35+
instance = None
36+
try:
37+
instance = model_class._default_manager.get(pk=pk)
38+
except model_class.DoesNotExist:
39+
logger.error("Couldn't load %s.%s.%s. Somehow it went missing?" %
40+
(model_class._meta.app_label.lower(),
41+
model_class._meta.object_name.lower(), pk))
42+
except model_class.MultipleObjectsReturned:
43+
logger.error("More than one object with pk %s. Oops?" % pk)
44+
return instance
45+
46+
47+
def get_indexes(model_class):
48+
"""
49+
Fetch the model's registered ``SearchIndex`` in a standarized way.
50+
"""
51+
try:
52+
using_backends = connection_router.for_write(
53+
models=[model_class]
54+
)
55+
for using in using_backends:
56+
index_holder = connections[using].get_unified_index()
57+
yield index_holder.get_index(model_class), using
58+
except IndexNotFoundException:
59+
raise ImproperlyConfigured(
60+
"Couldn't find a SearchIndex for %s." % model_class
61+
)
62+
63+
64+
def get_model_class(object_path):
65+
"""
66+
Fetch the model's class in a standarized way.
67+
"""
68+
bits = object_path.split('.')
69+
app_name = '.'.join(bits[:-1])
70+
classname = bits[-1]
71+
model_class = apps.get_model(app_name, classname)
72+
73+
if model_class is None:
74+
raise ImproperlyConfigured("Could not load model '%s'." %
75+
object_path)
76+
return model_class
77+
78+
79+
@shared_task
80+
def update_index(
81+
action,
82+
identifier,
83+
):
84+
object_path, pk = split_identifier(identifier)
85+
if object_path is None or pk is None:
86+
msg = "Couldn't handle object with identifier %s" % identifier
87+
logger.error(msg)
88+
raise ValueError(msg)
89+
90+
# Then get the model class for the object path
91+
model_class = get_model_class(object_path)
92+
for current_index, using in get_indexes(model_class):
93+
current_index_name = ".".join([current_index.__class__.__module__,
94+
current_index.__class__.__name__])
95+
96+
if action == 'delete':
97+
# If the object is gone, we'll use just the identifier
98+
# against the index.
99+
try:
100+
current_index.remove_object(identifier, using=using)
101+
except Exception as exc:
102+
logger.exception(exc)
103+
else:
104+
msg = ("Deleted '%s' (with %s)" %
105+
(identifier, current_index_name))
106+
logger.debug(msg)
107+
elif action == 'save':
108+
# and the instance of the model class with the pk
109+
instance = get_instance(model_class, pk)
110+
if instance is None:
111+
logger.debug("Failed updating '%s' (with %s)" %
112+
(identifier, current_index_name))
113+
raise ValueError("Couldn't load object '%s'" % identifier)
114+
115+
# Call the appropriate handler of the current index and
116+
# handle exception if necessary
117+
try:
118+
current_index.update_object(instance, using=using)
119+
except Exception as exc:
120+
logger.exception(exc)
121+
else:
122+
msg = ("Updated '%s' (with %s)" %
123+
(identifier, current_index_name))
124+
logger.debug(msg)
125+
else:
126+
logger.error("Unrecognized action '%s'. Moving on..." % action)
127+
raise ValueError("Unrecognized action %s" % action)

0 commit comments

Comments
 (0)