Skip to content

Commit 13ab201

Browse files
authored
Adds support for xapian, whoosh, elastic and solr (all via django-haystack). Fixes ciur/papermerge#473
1 parent 6fc81ef commit 13ab201

29 files changed

+413
-545
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,5 @@ queue/
3838
celerybeat-schedule
3939
launch.json
4040
.vscode
41+
xapian_index
42+
whoosh_index

docker/dev/config/settings.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,3 +28,31 @@
2828
LOCALE_PATHS = (
2929
PROJ_ROOT / Path('papermerge'),
3030
)
31+
32+
search_engine = config.get('search', 'engine', default='xapian')
33+
34+
if search_engine == 'xapian':
35+
HAYSTACK_CONNECTIONS['default']['PATH'] = config.get(
36+
'search',
37+
'path',
38+
default=os.path.join(PROJ_ROOT, 'xapian_index')
39+
)
40+
elif search_engine == 'whoosh':
41+
HAYSTACK_CONNECTIONS['default']['PATH'] = config.get(
42+
'search',
43+
'path',
44+
default=os.path.join(PROJ_ROOT, 'whoosh_index')
45+
)
46+
elif search_engine in (
47+
'es7',
48+
'es',
49+
'elasticsearch7',
50+
'elasticsearch',
51+
'elastic',
52+
'elastic7',
53+
'solr'
54+
):
55+
HAYSTACK_CONNECTIONS['default']['URL'] = config.get(
56+
'search',
57+
'url'
58+
)

docker/dev/xapian_install.sh

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
2+
#!/usr/bin/env bash
3+
# first argument of the script is Xapian version (e.g. 1.2.19)
4+
VERSION=$1
5+
6+
# prepare
7+
mkdir $VIRTUAL_ENV/packages && cd $VIRTUAL_ENV/packages
8+
9+
CORE=xapian-core-$VERSION
10+
BINDINGS=xapian-bindings-$VERSION
11+
12+
# download
13+
echo "Downloading source..."
14+
curl -O https://oligarchy.co.uk/xapian/$VERSION/${CORE}.tar.xz
15+
curl -O https://oligarchy.co.uk/xapian/$VERSION/${BINDINGS}.tar.xz
16+
17+
# extract
18+
echo "Extracting source..."
19+
tar xf ${CORE}.tar.xz
20+
tar xf ${BINDINGS}.tar.xz
21+
22+
# install
23+
echo "Installing Xapian-core..."
24+
cd $VIRTUAL_ENV/packages/${CORE}
25+
./configure --prefix=$VIRTUAL_ENV && make && make install
26+
27+
PYV=`python -c "import sys;t='{v[0]}'.format(v=list(sys.version_info[:1]));sys.stdout.write(t)";`
28+
29+
if [ $PYV = "2" ]; then
30+
PYTHON_FLAG=--with-python
31+
else
32+
PYTHON_FLAG=--with-python3
33+
fi
34+
35+
echo "Installing Xapian-bindings..."
36+
cd $VIRTUAL_ENV/packages/${BINDINGS}
37+
./configure --prefix=$VIRTUAL_ENV $PYTHON_FLAG XAPIAN_CONFIG=$VIRTUAL_ENV/bin/xapian-config && make && make install
38+
39+
# clean
40+
# rm -rf $VIRTUAL_ENV/packages
41+
42+
# test
43+
python -c "import xapian"

docker/prod/config/settings.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,31 @@
2424
LOCALE_PATHS = (
2525
PROJ_ROOT / Path('papermerge'),
2626
)
27+
28+
search_engine = config.get('search', 'engine', default='xapian')
29+
30+
if search_engine == 'xapian':
31+
HAYSTACK_CONNECTIONS['default']['PATH'] = config.get(
32+
'search',
33+
'path',
34+
default=os.path.join(PROJ_ROOT, 'xapian_index')
35+
)
36+
elif search_engine == 'whoosh':
37+
HAYSTACK_CONNECTIONS['default']['PATH'] = config.get(
38+
'search',
39+
'path',
40+
default=os.path.join(PROJ_ROOT, 'whoosh_index')
41+
)
42+
elif search_engine in (
43+
'es7',
44+
'es',
45+
'elasticsearch7',
46+
'elasticsearch',
47+
'elastic',
48+
'elastic7',
49+
'solr'
50+
):
51+
HAYSTACK_CONNECTIONS['default']['URL'] = config.get(
52+
'search',
53+
'url'
54+
)
File renamed without changes.

docker/services.yml

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,9 @@ services:
1616
- 6379:6379
1717
volumes:
1818
- redisdata:/data
19-
es:
20-
image: docker.elastic.co/elasticsearch/elasticsearch:7.16.2
21-
environment:
22-
- discovery.type=single-node
23-
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
24-
ports:
25-
- 9200:9200
26-
- 9300:9300
27-
volumes:
28-
- es-data:/usr/share/elasticsearch/data
2919
volumes:
3020
postgres_data2:
3121
redisdata:
32-
es-data:
3322

3423
networks:
3524
my_local:

papermerge/conf/settings.py

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,6 @@
1919
redis_host = config.get('redis', 'host', default='127.0.0.1')
2020
redis_port = config.get('redis', 'port', default=6379)
2121

22-
es_hosts = config.get('elasticsearch', 'hosts', default=None)
23-
es_port = config.get('elasticsearch', 'port', default=None)
24-
2522
CELERY_BROKER_URL = f"redis://{redis_host}:{redis_port}/0"
2623
CELERY_WORKER_HIJACK_ROOT_LOGGER = False
2724
CELERY_ACCEPT_CONTENT = ['json']
@@ -46,17 +43,6 @@
4643

4744
DEFAULT_AUTO_FIELD = 'django.db.models.AutoField'
4845

49-
if es_hosts and es_port:
50-
ELASTICSEARCH_DSL = {
51-
'default': {
52-
'hosts': config.get(
53-
'elasticsearch',
54-
'hosts',
55-
default=f"{es_hosts}:{es_port}"
56-
)
57-
},
58-
}
59-
6046
# Custom signal processor handles connections errors (to elasticsearch)
6147
# and reports them as warnings. This way, even when no connection to ES
6248
# is available, documents, folders, pages etc can still be used
@@ -140,6 +126,7 @@
140126
'django.contrib.messages',
141127
'django.contrib.staticfiles',
142128
'papermerge.core.apps.CoreConfig',
129+
'papermerge.search.apps.SearchConfig',
143130
'papermerge.notifications.apps.NotificationsConfig',
144131
'django.contrib.contenttypes',
145132
'dynamic_preferences',
@@ -148,16 +135,12 @@
148135
'polymorphic',
149136
'mptt',
150137
'channels',
138+
'haystack',
151139
]
152140

153141
# include elasticsearch apps only if PAPERMERGE_ELASTICSEARCH_HOSTS
154142
# and PAPERMERGE_ELASTICSEARCH_PORT are defined
155143
# and have non-empty value
156-
if es_hosts and es_port:
157-
INSTALLED_APPS.extend([
158-
'papermerge.search.apps.SearchConfig',
159-
'django_elasticsearch_dsl'
160-
])
161144

162145
MIDDLEWARE = [
163146
'corsheaders.middleware.CorsMiddleware',
@@ -347,3 +330,26 @@
347330
'VERSION': '2.1.0',
348331
'APPEND_COMPONENTS': JSONAPI_COMPONENTS
349332
}
333+
334+
SEARCH_ENGINES_MAP = {
335+
'elastic': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
336+
'elastic7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
337+
'elasticsearch7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
338+
'elasticsearch': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
339+
'es7': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
340+
'es': 'haystack.backends.elasticsearch7_backend.Elasticsearch7SearchEngine',
341+
'solr': 'haystack.backends.solr_backend.SolrEngine',
342+
'whoosh': 'haystack.backends.whoosh_backend.WhooshEngine',
343+
'xapian': 'xapian_backend.XapianEngine',
344+
}
345+
346+
HAYSTACK_DOCUMENT_FIELD = 'indexed_content'
347+
348+
search_engine = config.get('search', 'engine', default='xapian')
349+
350+
351+
HAYSTACK_CONNECTIONS = {
352+
'default': {
353+
'ENGINE': SEARCH_ENGINES_MAP[search_engine],
354+
},
355+
}

papermerge/search/constants.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
TAGS_OP_ALL = 'all'
2+
TAGS_OP_ANY = 'any'

0 commit comments

Comments
 (0)