Skip to content

Commit d4f1ace

Browse files
committed
sk: spolocnost & stuffs
1 parent 3bdaa03 commit d4f1ace

File tree

4 files changed

+48
-0
lines changed

4 files changed

+48
-0
lines changed

src/main/java/cz/monitora/elasticsearch/analyzer/slovak/SlovakStemmer.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,23 @@ private int removePrefixes(char[] s, int len) {
9898

9999
@SuppressWarnings({ "NPathComplexity", "CyclomaticComplexity" })
100100
private int removeCase(char[] s, int len) {
101+
if (len >= 9) {
102+
if (endsWith(s, len, "osti") || endsWith(s, len, "ostí")) {
103+
s[len - 2] = 'ť';
104+
return len - 1;
105+
}
106+
if (endsWith(s, len, "osťou")) {
107+
return len - 2;
108+
}
109+
if (endsWith(s, len, "osťami")) {
110+
return len - 3;
111+
}
112+
if (endsWith(s, len, "ostiach")) {
113+
s[len - 5] = 'ť';
114+
return len - 4;
115+
}
116+
}
117+
101118
if (len > 7 && endsWith(s, len, "atoch")) {
102119
return len - 5;
103120
}

src/main/java/cz/monitora/elasticsearch/analyzer/slovak/SlovakStemmerASCIIFold.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,21 @@ private int removePrefixes(char[] s, int len) {
9898

9999
@SuppressWarnings({ "NPathComplexity", "CyclomaticComplexity" })
100100
private int removeCase(char[] s, int len) {
101+
if (len >= 9) {
102+
if (endsWith(s, len, "osti")) {
103+
return len - 1;
104+
}
105+
if (endsWith(s, len, "ostou")) {
106+
return len - 2;
107+
}
108+
if (endsWith(s, len, "ostami")) {
109+
return len - 3;
110+
}
111+
if (endsWith(s, len, "ostiach")) {
112+
return len - 4;
113+
}
114+
}
115+
101116
if (len > 7 && endsWith(s, len, "atoch")) {
102117
return len - 5;
103118
}

src/test/java/cz/monitora/elasticsearch/analyzer/slovak/SlovakStemmerASCIIFoldTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ public class SlovakStemmerASCIIFoldTest {
5050
"lomnica, lomnic",
5151
"lomnice, lomnic",
5252
"lomniciam, lomnic",
53+
54+
// st stuff
55+
"spolocnost, spolocnost",
56+
"spolocnosti, spolocnost",
57+
"spolocnostou, spolocnost",
58+
"spolocnostami, spolocnost",
59+
"spolocnosti, spolocnost",
60+
"spolocnostiach, spolocnost",
5361
})
5462
public void test_stem(String val, String exp) {
5563
final SlovakStemmerASCIIFold stemmer = new SlovakStemmerASCIIFold();

src/test/java/cz/monitora/elasticsearch/analyzer/slovak/SlovakStemmerTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,14 @@ public class SlovakStemmerTest {
5050
"lomnica, lomnic",
5151
"lomnice, lomnic",
5252
"lomniciam, lomnic",
53+
54+
// sť stuff
55+
"spoločnosť, spoločnosť",
56+
"spoločnosti, spoločnosť",
57+
"spoločnosťou, spoločnosť",
58+
"spoločnosťami, spoločnosť",
59+
"spoločností, spoločnosť",
60+
"spoločnostiach, spoločnosť",
5361
})
5462
public void test_stem(String val, String exp) {
5563
final SlovakStemmer stemmer = new SlovakStemmer();

0 commit comments

Comments
 (0)