3
3
sphinx.search.nl
4
4
~~~~~~~~~~~~~~~~
5
5
6
- Danish search language: includes the JS porter stemmer.
6
+ Dutch search language: includes the JS porter stemmer.
7
7
8
8
:copyright: Copyright 2007-2013 by the Sphinx team, see AUTHORS.
9
9
:license: BSD, see LICENSE for details.
13
13
14
14
import snowballstemmer
15
15
16
- danish_stopwords = parse_stop_word (u'''
17
- | source: http://snowball.tartarus.org/algorithms/danish/stop.txt
18
- og | and
19
- i | in
20
- jeg | I
21
- det | that (dem. pronoun)/it (pers. pronoun)
22
- at | that (in front of a sentence)/to (with infinitive)
23
- en | a/an
24
- den | it (pers. pronoun)/that (dem. pronoun)
25
- til | to/at/for/until/against/by/of/into, more
26
- er | present tense of "to be"
27
- som | who, as
28
- på | on/upon/in/on/at/to/after/of/with/for, on
29
- de | they
30
- med | with/by/in, along
31
- han | he
32
- af | of/by/from/off/for/in/with/on, off
33
- for | at/for/to/from/by/of/ago, in front/before, because
34
- ikke | not
35
- der | who/which, there/those
36
- var | past tense of "to be"
37
- mig | me/myself
38
- sig | oneself/himself/herself/itself/themselves
39
- men | but
40
- et | a/an/one, one (number), someone/somebody/one
41
- har | present tense of "to have"
42
- om | round/about/for/in/a, about/around/down, if
43
- vi | we
44
- min | my
45
- havde | past tense of "to have"
46
- ham | him
47
- hun | she
48
- nu | now
49
- over | over/above/across/by/beyond/past/on/about, over/past
50
- da | then, when/as/since
51
- fra | from/off/since, off, since
52
- du | you
53
- ud | out
54
- sin | his/her/its/one's
55
- dem | them
56
- os | us/ourselves
57
- op | up
58
- man | you/one
59
- hans | his
60
- hvor | where
61
- eller | or
62
- hvad | what
63
- skal | must/shall etc.
64
- selv | myself/youself/herself/ourselves etc., even
65
- her | here
66
- alle | all/everyone/everybody etc.
67
- vil | will (verb)
68
- blev | past tense of "to stay/to remain/to get/to become"
69
- kunne | could
70
- ind | in
71
- når | when
72
- være | present tense of "to be"
73
- dog | however/yet/after all
74
- noget | something
75
- ville | would
76
- jo | you know/you see (adv), yes
77
- deres | their/theirs
78
- efter | after/behind/according to/for/by/from, later/afterwards
79
- ned | down
80
- skulle | should
81
- denne | this
82
- end | than
83
- dette | this
84
- mit | my/mine
85
- også | also
86
- under | under/beneath/below/during, below/underneath
87
- have | have
88
- dig | you
89
- anden | other
90
- hende | her
91
- mine | my
92
- alt | everything
93
- meget | much/very, plenty of
94
- sit | his, her, its, one's
95
- sine | his, her, its, one's
96
- vor | our
97
- mod | against
98
- disse | these
99
- hvis | if
100
- din | your/yours
101
- nogle | some
102
- hos | by/at
103
- blive | be/become
104
- mange | many
105
- ad | by/through
106
- bliver | present tense of "to be/to become"
107
- hendes | her/hers
108
- været | be
109
- thi | for (conj)
110
- jer | you
111
- sådan | such, like this/like that
16
+ dutch_stopwords = parse_stop_word (u'''
17
+ | source: http://snowball.tartarus.org/algorithms/dutch/stop.txt
18
+ de | the
19
+ en | and
20
+ van | of, from
21
+ ik | I, the ego
22
+ te | (1) chez, at etc, (2) to, (3) too
23
+ dat | that, which
24
+ die | that, those, who, which
25
+ in | in, inside
26
+ een | a, an, one
27
+ hij | he
28
+ het | the, it
29
+ niet | not, nothing, naught
30
+ zijn | (1) to be, being, (2) his, one's, its
31
+ is | is
32
+ was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
33
+ op | on, upon, at, in, up, used up
34
+ aan | on, upon, to (as dative)
35
+ met | with, by
36
+ als | like, such as, when
37
+ voor | (1) before, in front of, (2) furrow
38
+ had | had, past tense all persons sing. of 'hebben' (have)
39
+ er | there
40
+ maar | but, only
41
+ om | round, about, for etc
42
+ hem | him
43
+ dan | then
44
+ zou | should/would, past tense all persons sing. of 'zullen'
45
+ of | or, whether, if
46
+ wat | what, something, anything
47
+ mijn | possessive and noun 'mine'
48
+ men | people, 'one'
49
+ dit | this
50
+ zo | so, thus, in this way
51
+ door | through by
52
+ over | over, across
53
+ ze | she, her, they, them
54
+ zich | oneself
55
+ bij | (1) a bee, (2) by, near, at
56
+ ook | also, too
57
+ tot | till, until
58
+ je | you
59
+ mij | me
60
+ uit | out of, from
61
+ der | Old Dutch form of 'van der' still found in surnames
62
+ daar | (1) there, (2) because
63
+ haar | (1) her, their, them, (2) hair
64
+ naar | (1) unpleasant, unwell etc, (2) towards, (3) as
65
+ heb | present first person sing. of 'to have'
66
+ hoe | how, why
67
+ heeft | present third person sing. of 'to have'
68
+ hebben | 'to have' and various parts thereof
69
+ deze | this
70
+ u | you
71
+ want | (1) for, (2) mitten, (3) rigging
72
+ nog | yet, still
73
+ zal | 'shall', first and third person sing. of verb 'zullen' (will)
74
+ me | me
75
+ zij | she, they
76
+ nu | now
77
+ ge | 'thou', still used in Belgium and south Netherlands
78
+ geen | none
79
+ omdat | because
80
+ iets | something, somewhat
81
+ worden | to become, grow, get
82
+ toch | yet, still
83
+ al | all, every, each
84
+ waren | (1) 'were' (2) to wander, (3) wares, (3)
85
+ veel | much, many
86
+ meer | (1) more, (2) lake
87
+ doen | to do, to make
88
+ toen | then, when
89
+ moet | noun 'spot/mote' and present form of 'to must'
90
+ ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
91
+ zonder | without
92
+ kan | noun 'can' and present form of 'to be able'
93
+ hun | their, them
94
+ dus | so, consequently
95
+ alles | all, everything, anything
96
+ onder | under, beneath
97
+ ja | yes, of course
98
+ eens | once, one day
99
+ hier | here
100
+ wie | who
101
+ werd | imperfect third person sing. of 'become'
102
+ altijd | always
103
+ doch | yet, but etc
104
+ wordt | present third person sing. of 'become'
105
+ wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
106
+ kunnen | to be able
107
+ ons | us/our
108
+ zelf | self
109
+ tegen | against, towards, at
110
+ na | after, near
111
+ reeds | already
112
+ wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
113
+ kon | could; past tense of 'to be able'
114
+ niets | nothing
115
+ uw | your
116
+ iemand | somebody
117
+ geweest | been; past participle of 'be'
118
+ andere | other
112
119
''' )
113
120
114
121
js_stemmer = u"""
@@ -122,7 +129,7 @@ class SearchDutch(SearchLanguage):
122
129
language_name = 'Dutch'
123
130
js_stemmer_rawcode = 'dutch-stemmer.js'
124
131
js_stemmer_code = js_stemmer
125
- stopwords = danish_stopwords
132
+ stopwords = dutch_stopwords
126
133
127
134
def init (self , options ):
128
135
self .stemmer = snowballstemmer .stemmer ('dutch' )
0 commit comments