Skip to content

Commit a6d7d79

Browse files
committed
HSEARCH-3661 Use reducable results in terms collectors
1 parent 409642c commit a6d7d79

24 files changed

+450
-346
lines changed

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java

Lines changed: 6 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -5,34 +5,26 @@
55
package org.hibernate.search.backend.lucene.lowlevel.collector.impl;
66

77
import java.io.IOException;
8-
import java.util.Comparator;
9-
import java.util.LinkedList;
10-
import java.util.List;
118

129
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues;
1310
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource;
14-
import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder;
15-
import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket;
1611

1712
import com.carrotsearch.hppc.LongHashSet;
1813
import com.carrotsearch.hppc.LongObjectHashMap;
1914
import com.carrotsearch.hppc.cursors.LongObjectCursor;
20-
import com.carrotsearch.hppc.procedures.LongObjectProcedure;
2115

2216
import org.apache.lucene.index.LeafReaderContext;
2317
import org.apache.lucene.search.Collector;
2418
import org.apache.lucene.search.CollectorManager;
25-
import org.apache.lucene.search.LeafCollector;
2619
import org.apache.lucene.search.ScoreMode;
2720
import org.apache.lucene.search.SimpleCollector;
28-
import org.apache.lucene.util.PriorityQueue;
2921

3022
public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector {
3123

3224
private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet();
3325

3426
private final LongMultiValuesSource valuesSource;
35-
private final LongObjectHashMap<SegmentValue> segmentValues = new LongObjectHashMap<>();
27+
private final LongObjectHashMap<TermCollectorSegmentValue> segmentValues = new LongObjectHashMap<>();
3628

3729
private final CollectorKey<?, ?>[] keys;
3830
private final CollectorManager<Collector, ?>[] managers;
@@ -57,9 +49,9 @@ public void collect(int doc) throws IOException {
5749
// Each document must be counted only once per range.
5850
long value = values.nextValue();
5951
if ( uniqueLeafIndicesForDocument.add( value ) ) {
60-
SegmentValue segmentValue = segmentValues.get( value );
52+
TermCollectorSegmentValue segmentValue = segmentValues.get( value );
6153
if ( segmentValue == null ) {
62-
segmentValue = new SegmentValue( managers );
54+
segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext );
6355
segmentValues.put( value, segmentValue );
6456
}
6557
segmentValue.collect( doc );
@@ -68,25 +60,6 @@ public void collect(int doc) throws IOException {
6860
}
6961
}
7062

71-
public List<LongBucket> counts(BucketOrder order, int topN, int minDocCount) {
72-
int size = Math.min( topN, segmentValues.size() );
73-
PriorityQueue<LongBucket> pq = new HibernateSearchBucketOrderQueue( order, size );
74-
75-
segmentValues.forEach( (LongObjectProcedure<SegmentValue>) (key, value) -> {
76-
if ( value.count >= minDocCount ) {
77-
pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) );
78-
}
79-
} );
80-
81-
List<LongBucket> buckets = new LinkedList<>();
82-
while ( pq.size() != 0 ) {
83-
LongBucket popped = pq.pop();
84-
buckets.add( 0, popped );
85-
}
86-
87-
return buckets;
88-
}
89-
9063
@Override
9164
public ScoreMode scoreMode() {
9265
return ScoreMode.COMPLETE_NO_SCORES;
@@ -96,7 +69,7 @@ public ScoreMode scoreMode() {
9669
protected void doSetNextReader(LeafReaderContext context) throws IOException {
9770
this.values = valuesSource.getValues( context );
9871
this.leafReaderContext = context;
99-
for ( LongObjectCursor<SegmentValue> value : segmentValues ) {
72+
for ( LongObjectCursor<TermCollectorSegmentValue> value : segmentValues ) {
10073
value.value.resetLeafCollectors( context );
10174
}
10275
}
@@ -116,46 +89,8 @@ public void finish() {
11689
return managers;
11790
}
11891

119-
private static class HibernateSearchBucketOrderQueue extends PriorityQueue<LongBucket> {
120-
private final Comparator<LongBucket> comparator;
121-
122-
public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) {
123-
super( maxSize );
124-
this.comparator = order.toLongBucketComparator();
125-
}
126-
127-
@Override
128-
protected boolean lessThan(LongBucket t1, LongBucket t2) {
129-
return comparator.compare( t1, t2 ) > 0;
130-
}
131-
}
132-
133-
private class SegmentValue {
134-
final Collector[] collectors;
135-
final LeafCollector[] leafCollectors;
136-
long count = 0L;
137-
138-
SegmentValue(CollectorManager<Collector, ?>[] managers) throws IOException {
139-
this.collectors = new Collector[managers.length];
140-
this.leafCollectors = new LeafCollector[managers.length];
141-
for ( int i = 0; i < managers.length; i++ ) {
142-
collectors[i] = managers[i].newCollector();
143-
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
144-
}
145-
}
146-
147-
void collect(int doc) throws IOException {
148-
count++;
149-
for ( LeafCollector collector : leafCollectors ) {
150-
collector.collect( doc );
151-
}
152-
}
153-
154-
void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException {
155-
for ( int i = 0; i < leafCollectors.length; i++ ) {
156-
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
157-
}
158-
}
92+
LongObjectHashMap<TermCollectorSegmentValue> segmentValues() {
93+
return segmentValues;
15994
}
16095

16196
}

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
import org.apache.lucene.search.CollectorManager;
1313

1414
public class NumericTermsCollectorFactory
15-
implements CollectorFactory<NumericTermsCollector, NumericTermsCollector, NumericTermsCollectorManager> {
15+
implements CollectorFactory<NumericTermsCollector, TermResults, NumericTermsCollectorManager> {
1616

17-
public static CollectorFactory<NumericTermsCollector, NumericTermsCollector, NumericTermsCollectorManager> instance(
17+
public static CollectorFactory<NumericTermsCollector, TermResults, NumericTermsCollectorManager> instance(
1818
LongMultiValuesSource valuesSource, List<CollectorFactory<?, ?, ?>> collectorFactories) {
1919
return new NumericTermsCollectorFactory( valuesSource, collectorFactories );
2020
}
2121

22-
private final CollectorKey<NumericTermsCollector, NumericTermsCollector> key = CollectorKey.create();
22+
private final CollectorKey<NumericTermsCollector, TermResults> key = CollectorKey.create();
2323
private final LongMultiValuesSource valuesSource;
2424
private final List<CollectorFactory<?, ?, ?>> collectorFactories;
2525

@@ -45,7 +45,7 @@ public NumericTermsCollectorManager createCollectorManager(CollectorExecutionCon
4545
}
4646

4747
@Override
48-
public CollectorKey<NumericTermsCollector, NumericTermsCollector> getCollectorKey() {
48+
public CollectorKey<NumericTermsCollector, TermResults> getCollectorKey() {
4949
return key;
5050
}
5151
}

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import org.apache.lucene.search.CollectorManager;
1313

1414
public class NumericTermsCollectorManager
15-
implements CollectorManager<NumericTermsCollector, NumericTermsCollector> {
15+
implements CollectorManager<NumericTermsCollector, TermResults> {
1616

1717
private final LongMultiValuesSource valuesSource;
1818
private final CollectorKey<?, ?>[] keys;
@@ -31,8 +31,14 @@ public NumericTermsCollector newCollector() {
3131
}
3232

3333
@Override
34-
public NumericTermsCollector reduce(Collection<NumericTermsCollector> collection) {
35-
// TODO: actually reduce:
36-
return collection.iterator().next();
34+
public TermResults reduce(Collection<NumericTermsCollector> collection) {
35+
if ( collection.isEmpty() ) {
36+
return TermResults.EMPTY;
37+
}
38+
TermResults results = new TermResults( keys, managers );
39+
for ( NumericTermsCollector collector : collection ) {
40+
results.add( collector.segmentValues() );
41+
}
42+
return results;
3743
}
3844
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
* Copyright Red Hat Inc. and Hibernate Authors
4+
*/
5+
package org.hibernate.search.backend.lucene.lowlevel.collector.impl;
6+
7+
import java.io.IOException;
8+
9+
import org.apache.lucene.index.LeafReaderContext;
10+
import org.apache.lucene.search.Collector;
11+
import org.apache.lucene.search.CollectorManager;
12+
import org.apache.lucene.search.LeafCollector;
13+
14+
class TermCollectorSegmentValue {
15+
final Collector[] collectors;
16+
final LeafCollector[] leafCollectors;
17+
long count = 0L;
18+
19+
TermCollectorSegmentValue(CollectorManager<Collector, ?>[] managers, LeafReaderContext leafReaderContext)
20+
throws IOException {
21+
this.collectors = new Collector[managers.length];
22+
this.leafCollectors = new LeafCollector[managers.length];
23+
for ( int i = 0; i < managers.length; i++ ) {
24+
collectors[i] = managers[i].newCollector();
25+
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
26+
}
27+
}
28+
29+
void collect(int doc) throws IOException {
30+
count++;
31+
for ( LeafCollector collector : leafCollectors ) {
32+
collector.collect( doc );
33+
}
34+
}
35+
36+
void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException {
37+
for ( int i = 0; i < leafCollectors.length; i++ ) {
38+
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
39+
}
40+
}
41+
}
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
* Copyright Red Hat Inc. and Hibernate Authors
4+
*/
5+
package org.hibernate.search.backend.lucene.lowlevel.collector.impl;
6+
7+
import java.util.Comparator;
8+
import java.util.LinkedList;
9+
import java.util.List;
10+
11+
import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder;
12+
import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket;
13+
14+
import com.carrotsearch.hppc.LongObjectHashMap;
15+
import com.carrotsearch.hppc.procedures.LongObjectProcedure;
16+
17+
import org.apache.lucene.search.Collector;
18+
import org.apache.lucene.search.CollectorManager;
19+
import org.apache.lucene.util.PriorityQueue;
20+
21+
public class TermResults {
22+
23+
@SuppressWarnings("unchecked")
24+
static final TermResults EMPTY = new TermResults( new CollectorKey[0], new CollectorManager[0] );
25+
26+
private final CollectorKey<?, ?>[] collectorKeys;
27+
private final CollectorManager<Collector, ?>[] managers;
28+
29+
private final LongObjectHashMap<LongBucket> buckets = new LongObjectHashMap<>();
30+
31+
TermResults(CollectorKey<?, ?>[] collectorKeys, CollectorManager<Collector, ?>[] managers) {
32+
this.collectorKeys = collectorKeys;
33+
this.managers = managers;
34+
}
35+
36+
public List<LongBucket> counts(BucketOrder order, int topN, int minDocCount) {
37+
int size = Math.min( topN, buckets.size() );
38+
PriorityQueue<LongBucket> pq = new HibernateSearchBucketOrderQueue( order, size );
39+
40+
buckets.forEach( (LongObjectProcedure<LongBucket>) (key, value) -> {
41+
if ( value.count >= minDocCount ) {
42+
pq.insertWithOverflow( value );
43+
}
44+
} );
45+
46+
List<LongBucket> results = new LinkedList<>();
47+
while ( pq.size() != 0 ) {
48+
LongBucket popped = pq.pop();
49+
results.add( 0, popped );
50+
}
51+
52+
return results;
53+
}
54+
55+
void add(LongObjectHashMap<TermCollectorSegmentValue> segmentValues) {
56+
for ( var segment : segmentValues ) {
57+
LongBucket bucket = buckets.get( segment.key );
58+
if ( bucket == null ) {
59+
bucket = new LongBucket( segment.key, segment.value.collectors, segment.value.count );
60+
buckets.put( segment.key, bucket );
61+
}
62+
else {
63+
bucket.add( segment.value.collectors, segment.value.count );
64+
}
65+
}
66+
}
67+
68+
public void merge(LongObjectHashMap<LongBucket> values) {
69+
for ( var toadd : values ) {
70+
LongBucket bucket = buckets.get( toadd.key );
71+
if ( bucket == null ) {
72+
bucket = new LongBucket( toadd.key, toadd.value.collectors, toadd.value.count );
73+
buckets.put( toadd.key, bucket );
74+
}
75+
else {
76+
bucket.add( toadd.value );
77+
}
78+
}
79+
}
80+
81+
public CollectorKey<?, ?>[] collectorKeys() {
82+
return collectorKeys;
83+
}
84+
85+
public CollectorManager<Collector, ?>[] collectorManagers() {
86+
return managers;
87+
}
88+
89+
private static class HibernateSearchBucketOrderQueue extends PriorityQueue<LongBucket> {
90+
private final Comparator<LongBucket> comparator;
91+
92+
public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) {
93+
super( maxSize );
94+
this.comparator = order.toLongBucketComparator();
95+
}
96+
97+
@Override
98+
protected boolean lessThan(LongBucket t1, LongBucket t2) {
99+
return comparator.compare( t1, t2 ) > 0;
100+
}
101+
}
102+
}

0 commit comments

Comments
 (0)