Skip to content

Commit def5d60

Browse files
committed
Add ability to limit results retrieved from Lucene
Allows the records (rids) retrieved from the Lucene search to be limited, where it is known that the remainder of the query does not require the entire set to be loaded. This is useful when the underlying Lucene query returns many results, but the query overall is only intended to return a small number of them (usually in the ranked order from Lucene). This mode is opt in, by providing a "limit" metadata element to the Lucene search function. A value of "select' uses the skip/limit in the SELECT statement to determine the max hits, and an integral value specifies an explicit max hits (e.g. for a safety margin).
1 parent aaf84ba commit def5d60

File tree

8 files changed

+146
-4
lines changed

8 files changed

+146
-4
lines changed

lucene/src/main/java/com/orientechnologies/lucene/collections/OLuceneResultSet.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import com.orientechnologies.lucene.engine.OLuceneIndexEngineAbstract;
2525
import com.orientechnologies.lucene.engine.OLuceneIndexEngineUtils;
2626
import com.orientechnologies.lucene.exception.OLuceneIndexException;
27+
import com.orientechnologies.lucene.functions.OLuceneFunctionsUtils;
2728
import com.orientechnologies.lucene.query.OLuceneQueryContext;
2829
import com.orientechnologies.lucene.tx.OLuceneTxChangesAbstract;
2930
import com.orientechnologies.orient.core.command.OCommandContext;
@@ -67,6 +68,7 @@ public class OLuceneResultSet implements Set<OIdentifiable> {
6768
private int maxNumFragments;
6869
private TopDocs topDocs;
6970
private long deletedMatchCount = 0;
71+
private long returnedHits = 0;
7072

7173
private boolean closed = false;
7274

@@ -99,6 +101,10 @@ public OLuceneResultSet(
99101
highlighter = new Highlighter(formatter, scorer);
100102

101103
maxNumFragments = (int) Optional.ofNullable(highlight.get("maxNumFragments")).orElse(2);
104+
105+
final Long queryMaxHits = OLuceneFunctionsUtils.getResultLimit(queryContext.getContext());
106+
long maxHits = (queryMaxHits == null) ? Integer.MAX_VALUE : queryMaxHits;
107+
this.returnedHits = Math.min(maxHits, topDocs.totalHits - deletedMatchCount);
102108
}
103109

104110
protected void fetchFirstBatch() {
@@ -180,7 +186,7 @@ protected long calculateDeletedMatch() {
180186

181187
@Override
182188
public int size() {
183-
return (int) Math.max(0, topDocs.totalHits - deletedMatchCount);
189+
return (int) Math.max(0, this.returnedHits);
184190
}
185191

186192
@Override
@@ -201,12 +207,15 @@ public OLuceneResultSetIteratorTx() {
201207
localIndex = 0;
202208
scoreDocs = topDocs.scoreDocs;
203209
OLuceneIndexEngineUtils.sendTotalHits(
204-
indexName, queryContext.getContext(), topDocs.totalHits - deletedMatchCount);
210+
indexName,
211+
queryContext.getContext(),
212+
topDocs.totalHits - deletedMatchCount,
213+
returnedHits);
205214
}
206215

207216
@Override
208217
public boolean hasNext() {
209-
final boolean hasNext = index < (totalHits - deletedMatchCount);
218+
final boolean hasNext = (index < returnedHits);
210219
if (!hasNext && !closed) {
211220
final IndexSearcher searcher = queryContext.getSearcher();
212221
engine.release(searcher);

lucene/src/main/java/com/orientechnologies/lucene/engine/OLuceneIndexEngineUtils.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@
2727
/** Created by frank on 04/05/2017. */
2828
public class OLuceneIndexEngineUtils {
2929

30-
public static void sendTotalHits(String indexName, OCommandContext context, long totalHits) {
30+
public static void sendTotalHits(
31+
String indexName, OCommandContext context, long totalHits, long returnedHits) {
3132
if (context != null) {
3233

3334
if (context.getVariable("totalHits") == null) {
@@ -36,6 +37,12 @@ public static void sendTotalHits(String indexName, OCommandContext context, long
3637
context.setVariable("totalHits", null);
3738
}
3839
context.setVariable((indexName + ".totalHits").replace(".", "_"), totalHits);
40+
if (context.getVariable("returnedHits") == null) {
41+
context.setVariable("returnedHits", returnedHits);
42+
} else {
43+
context.setVariable("returnedHits", null);
44+
}
45+
context.setVariable((indexName + ".returnedHits").replace(".", "_"), returnedHits);
3946
}
4047
}
4148

lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneFunctionsUtils.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,18 @@
55
import com.orientechnologies.orient.core.db.ODatabaseDocumentInternal;
66
import com.orientechnologies.orient.core.db.record.OIdentifiable;
77
import com.orientechnologies.orient.core.metadata.OMetadataInternal;
8+
import com.orientechnologies.orient.core.record.impl.ODocument;
89
import com.orientechnologies.orient.core.sql.parser.OExpression;
10+
import com.orientechnologies.orient.core.sql.parser.OFromClause;
11+
import com.orientechnologies.orient.core.sql.parser.OSelectStatement;
912
import org.apache.lucene.index.memory.MemoryIndex;
1013

1114
/** Created by frank on 13/02/2017. */
1215
public class OLuceneFunctionsUtils {
1316
public static final String MEMORY_INDEX = "_memoryIndex";
1417

18+
private static final String MAX_HITS = "luceneMaxHits";
19+
1520
protected static OLuceneFullTextIndex searchForIndex(OExpression[] args, OCommandContext ctx) {
1621
final String indexName = (String) args[0].execute((OIdentifiable) null, ctx);
1722
return getLuceneFullTextIndex(ctx, indexName);
@@ -57,4 +62,29 @@ public static String doubleEscape(final String s) {
5762
}
5863
return sb.toString();
5964
}
65+
66+
public static void configureResultLimit(
67+
OFromClause target, OCommandContext ctx, ODocument metadata) {
68+
Object limitType = metadata.getProperty("limit");
69+
70+
long maxHits = 0;
71+
if ("select".equals(limitType) && target.jjtGetParent() instanceof OSelectStatement) {
72+
OSelectStatement select = (OSelectStatement) target.jjtGetParent();
73+
if (select.getLimit() != null) {
74+
maxHits += ((Number) select.getLimit().getValue(ctx)).longValue();
75+
}
76+
if (select.getSkip() != null) {
77+
maxHits += ((Number) select.getSkip().getValue(ctx)).longValue();
78+
}
79+
} else if (limitType instanceof Number) {
80+
maxHits = ((Number) limitType).longValue();
81+
}
82+
if (maxHits != 0) {
83+
ctx.setVariable(MAX_HITS, maxHits);
84+
}
85+
}
86+
87+
public static Long getResultLimit(OCommandContext ctx) {
88+
return (Long) ctx.getVariable(OLuceneFunctionsUtils.MAX_HITS);
89+
}
6090
}

lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchMoreLikeThisFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ public Iterable<OIdentifiable> searchFromTarget(
8888
OExpression expression = args[0];
8989

9090
ODocument metadata = parseMetadata(args);
91+
OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata);
9192

9293
List<String> ridsAsString = parseRids(ctx, expression);
9394

lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnClassFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ public Iterable<OIdentifiable> searchFromTarget(
130130
if (index != null) {
131131

132132
ODocument metadata = getMetadata(args, ctx);
133+
OLuceneFunctionsUtils.configureResultLimit(target, ctx, metadata);
133134

134135
List<OIdentifiable> luceneResultSet;
135136
try (Stream<ORID> rids =

lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnFieldsFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ public Iterable<OIdentifiable> searchFromTarget(
117117
if (index != null) {
118118

119119
ODocument meta = getMetadata(args, ctx);
120+
OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta);
120121
Set<OIdentifiable> luceneResultSet;
121122
try (Stream<ORID> rids =
122123
index

lucene/src/main/java/com/orientechnologies/lucene/functions/OLuceneSearchOnIndexFunction.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@ public Iterable<OIdentifiable> searchFromTarget(
123123
if (index != null && query != null) {
124124

125125
ODocument meta = getMetadata(args, ctx);
126+
OLuceneFunctionsUtils.configureResultLimit(target, ctx, meta);
126127

127128
List<OIdentifiable> luceneResultSet;
128129
try (Stream<ORID> rids =
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/*
2+
*
3+
* * Copyright 2010-2016 OrientDB LTD (http://orientdb.com)
4+
* *
5+
* * Licensed under the Apache License, Version 2.0 (the "License");
6+
* * you may not use this file except in compliance with the License.
7+
* * You may obtain a copy of the License at
8+
* *
9+
* * http://www.apache.org/licenses/LICENSE-2.0
10+
* *
11+
* * Unless required by applicable law or agreed to in writing, software
12+
* * distributed under the License is distributed on an "AS IS" BASIS,
13+
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* * See the License for the specific language governing permissions and
15+
* * limitations under the License.
16+
*
17+
*/
18+
19+
package com.orientechnologies.lucene.tests;
20+
21+
import static org.assertj.core.api.Assertions.assertThat;
22+
23+
import com.orientechnologies.orient.core.sql.executor.OResult;
24+
import com.orientechnologies.orient.core.sql.executor.OResultSet;
25+
import java.io.InputStream;
26+
import java.util.List;
27+
import java.util.stream.Collectors;
28+
import org.junit.Before;
29+
import org.junit.Test;
30+
31+
public class OLuceneLimitResultsTest extends OLuceneBaseTest {
32+
33+
@Before
34+
public void init() {
35+
InputStream stream = ClassLoader.getSystemResourceAsStream("testLuceneIndex.sql");
36+
37+
db.execute("sql", getScriptFromStream(stream));
38+
39+
db.command("create index Song.title on Song (title) FULLTEXT ENGINE LUCENE");
40+
}
41+
42+
private void checkSongTitleHits(
43+
String query, int expectedResultSetSize, int expectedTotalHits, int expectedReturnedHits) {
44+
OResultSet docs = db.query(query);
45+
46+
List<OResult> results = docs.stream().collect(Collectors.toList());
47+
assertThat(results).hasSize(expectedResultSetSize);
48+
49+
OResult doc = results.get(0);
50+
System.out.println("doc.toElement().toJSON() = " + doc.toElement().toJSON());
51+
52+
assertThat(doc.<Long>getProperty("$totalHits")).isEqualTo(expectedTotalHits);
53+
assertThat(doc.<Long>getProperty("$Song_title_totalHits")).isEqualTo(expectedTotalHits);
54+
assertThat(doc.<Long>getProperty("$returnedHits")).isEqualTo(expectedReturnedHits);
55+
assertThat(doc.<Long>getProperty("$Song_title_returnedHits")).isEqualTo(expectedReturnedHits);
56+
docs.close();
57+
}
58+
59+
@Test
60+
public void testLimitSelect() {
61+
checkSongTitleHits(
62+
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits "
63+
+ "from Song where search_class('title:man', {\"limit\":\"select\"})= true limit 1",
64+
1,
65+
14,
66+
1);
67+
68+
checkSongTitleHits(
69+
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits "
70+
+ "from Song where search_class('title:man', {\"limit\":\"select\"})= true skip 5 limit 5",
71+
5,
72+
14,
73+
10);
74+
}
75+
76+
@Test
77+
public void testLimitByNumber() {
78+
checkSongTitleHits(
79+
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song "
80+
+ "where search_class('title:man', {\"limit\": 5})= true limit 1",
81+
1,
82+
14,
83+
5);
84+
85+
checkSongTitleHits(
86+
"select *,$totalHits,$Song_title_totalHits,$returnedHits,$Song_title_returnedHits from Song "
87+
+ "where search_class('title:man', {\"limit\": 5})= true limit 10",
88+
5,
89+
14,
90+
5);
91+
}
92+
}

0 commit comments

Comments
 (0)