Skip to content

Commit fa19592

Browse files
committed
HSEARCH-3661 Make necessary adjustments to the terms aggregations to support values in the Lucene backends
1 parent 924a0a5 commit fa19592

File tree

40 files changed

+1286
-488
lines changed

40 files changed

+1286
-488
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
* Copyright Red Hat Inc. and Hibernate Authors
4+
*/
5+
package org.hibernate.search.backend.lucene.lowlevel.collector.impl;
6+
7+
import org.apache.lucene.search.Collector;
8+
import org.apache.lucene.search.CollectorManager;
9+
10+
public interface BaseTermsCollector {
11+
12+
CollectorKey<?, ?>[] keys();
13+
14+
CollectorManager<Collector, ?>[] managers();
15+
16+
}

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java

Lines changed: 70 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,25 +15,36 @@
1515
import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket;
1616

1717
import com.carrotsearch.hppc.LongHashSet;
18-
import com.carrotsearch.hppc.LongIntHashMap;
19-
import com.carrotsearch.hppc.LongIntMap;
20-
import com.carrotsearch.hppc.procedures.LongIntProcedure;
18+
import com.carrotsearch.hppc.LongObjectHashMap;
19+
import com.carrotsearch.hppc.cursors.LongObjectCursor;
20+
import com.carrotsearch.hppc.procedures.LongObjectProcedure;
2121

2222
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.search.Collector;
24+
import org.apache.lucene.search.CollectorManager;
25+
import org.apache.lucene.search.LeafCollector;
2326
import org.apache.lucene.search.ScoreMode;
2427
import org.apache.lucene.search.SimpleCollector;
2528
import org.apache.lucene.util.PriorityQueue;
2629

27-
public class NumericTermsCollector extends SimpleCollector {
30+
public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector {
2831

2932
private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet();
3033

3134
private final LongMultiValuesSource valuesSource;
32-
private final LongIntMap hashCounts = new LongIntHashMap();
35+
private final LongObjectHashMap<SegmentValue> segmentValues = new LongObjectHashMap<>();
36+
37+
private final CollectorKey<?, ?>[] keys;
38+
private final CollectorManager<Collector, ?>[] managers;
39+
3340
private LongMultiValues values;
41+
private LeafReaderContext leafReaderContext;
3442

35-
public NumericTermsCollector(LongMultiValuesSource valuesSource) {
43+
public NumericTermsCollector(LongMultiValuesSource valuesSource, CollectorKey<?, ?>[] keys,
44+
CollectorManager<Collector, ?>[] managers) {
3645
this.valuesSource = valuesSource;
46+
this.keys = keys;
47+
this.managers = managers;
3748
}
3849

3950
@Override
@@ -46,19 +57,24 @@ public void collect(int doc) throws IOException {
4657
// Each document must be counted only once per range.
4758
long value = values.nextValue();
4859
if ( uniqueLeafIndicesForDocument.add( value ) ) {
49-
hashCounts.addTo( value, 1 );
60+
SegmentValue segmentValue = segmentValues.get( value );
61+
if ( segmentValue == null ) {
62+
segmentValue = new SegmentValue( managers );
63+
segmentValues.put( value, segmentValue );
64+
}
65+
segmentValue.collect( doc );
5066
}
5167
}
5268
}
5369
}
5470

5571
public List<LongBucket> counts(BucketOrder order, int topN, int minDocCount) {
56-
int size = Math.min( topN, hashCounts.size() );
72+
int size = Math.min( topN, segmentValues.size() );
5773
PriorityQueue<LongBucket> pq = new HibernateSearchBucketOrderQueue( order, size );
5874

59-
hashCounts.forEach( (LongIntProcedure) (key, value) -> {
60-
if ( value >= minDocCount ) {
61-
pq.insertWithOverflow( new LongBucket( key, value ) );
75+
segmentValues.forEach( (LongObjectProcedure<SegmentValue>) (key, value) -> {
76+
if ( value.count >= minDocCount ) {
77+
pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) );
6278
}
6379
} );
6480

@@ -77,13 +93,27 @@ public ScoreMode scoreMode() {
7793
}
7894

7995
protected void doSetNextReader(LeafReaderContext context) throws IOException {
80-
values = valuesSource.getValues( context );
96+
this.values = valuesSource.getValues( context );
97+
this.leafReaderContext = context;
98+
for ( LongObjectCursor<SegmentValue> value : segmentValues ) {
99+
value.value.resetLeafCollectors( context );
100+
}
81101
}
82102

83103
public void finish() {
84104
values = null;
85105
}
86106

107+
@Override
108+
public CollectorKey<?, ?>[] keys() {
109+
return keys;
110+
}
111+
112+
@Override
113+
public CollectorManager<Collector, ?>[] managers() {
114+
return managers;
115+
}
116+
87117
private static class HibernateSearchBucketOrderQueue extends PriorityQueue<LongBucket> {
88118
private final Comparator<LongBucket> comparator;
89119

@@ -98,4 +128,32 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) {
98128
}
99129
}
100130

131+
private class SegmentValue {
132+
final Collector[] collectors;
133+
final LeafCollector[] leafCollectors;
134+
long count = 0L;
135+
136+
SegmentValue(CollectorManager<Collector, ?>[] managers) throws IOException {
137+
this.collectors = new Collector[managers.length];
138+
this.leafCollectors = new LeafCollector[managers.length];
139+
for ( int i = 0; i < managers.length; i++ ) {
140+
collectors[i] = managers[i].newCollector();
141+
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
142+
}
143+
}
144+
145+
void collect(int doc) throws IOException {
146+
count++;
147+
for ( LeafCollector collector : leafCollectors ) {
148+
collector.collect( doc );
149+
}
150+
}
151+
152+
void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException {
153+
for ( int i = 0; i < leafCollectors.length; i++ ) {
154+
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
155+
}
156+
}
157+
}
158+
101159
}

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,26 +4,44 @@
44
*/
55
package org.hibernate.search.backend.lucene.lowlevel.collector.impl;
66

7+
import java.io.IOException;
8+
import java.util.List;
9+
710
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource;
811

12+
import org.apache.lucene.search.CollectorManager;
13+
914
public class NumericTermsCollectorFactory
1015
implements CollectorFactory<NumericTermsCollector, NumericTermsCollector, NumericTermsCollectorManager> {
1116

1217
public static CollectorFactory<NumericTermsCollector, NumericTermsCollector, NumericTermsCollectorManager> instance(
13-
LongMultiValuesSource valuesSource) {
14-
return new NumericTermsCollectorFactory( valuesSource );
18+
LongMultiValuesSource valuesSource, List<CollectorFactory<?, ?, ?>> collectorFactories) {
19+
return new NumericTermsCollectorFactory( valuesSource, collectorFactories );
1520
}
1621

17-
public final CollectorKey<NumericTermsCollector, NumericTermsCollector> key = CollectorKey.create();
22+
private final CollectorKey<NumericTermsCollector, NumericTermsCollector> key = CollectorKey.create();
1823
private final LongMultiValuesSource valuesSource;
24+
private final List<CollectorFactory<?, ?, ?>> collectorFactories;
1925

20-
public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource) {
26+
public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource,
27+
List<CollectorFactory<?, ?, ?>> collectorFactories) {
2128
this.valuesSource = valuesSource;
29+
this.collectorFactories = collectorFactories;
2230
}
2331

32+
@SuppressWarnings({ "unchecked" })
2433
@Override
25-
public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) {
26-
return new NumericTermsCollectorManager( valuesSource );
34+
public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException {
35+
CollectorKey<?, ?>[] keys = new CollectorKey<?, ?>[collectorFactories.size()];
36+
var managers = new CollectorManager[collectorFactories.size()];
37+
int index = 0;
38+
for ( CollectorFactory<?, ?, ?> factory : collectorFactories ) {
39+
keys[index] = factory.getCollectorKey();
40+
CollectorManager<?, ?> collectorManager = factory.createCollectorManager( context );
41+
managers[index] = collectorManager;
42+
index++;
43+
}
44+
return new NumericTermsCollectorManager( valuesSource, keys, managers );
2745
}
2846

2947
@Override

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,26 @@
88

99
import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource;
1010

11+
import org.apache.lucene.search.Collector;
1112
import org.apache.lucene.search.CollectorManager;
1213

1314
public class NumericTermsCollectorManager
1415
implements CollectorManager<NumericTermsCollector, NumericTermsCollector> {
1516

1617
private final LongMultiValuesSource valuesSource;
18+
private final CollectorKey<?, ?>[] keys;
19+
private final CollectorManager<Collector, ?>[] managers;
1720

18-
public NumericTermsCollectorManager(LongMultiValuesSource valuesSource) {
21+
public NumericTermsCollectorManager(LongMultiValuesSource valuesSource,
22+
CollectorKey<?, ?>[] keys, CollectorManager<Collector, ?>[] managers) {
1923
this.valuesSource = valuesSource;
24+
this.keys = keys;
25+
this.managers = managers;
2026
}
2127

2228
@Override
2329
public NumericTermsCollector newCollector() {
24-
return new NumericTermsCollector( valuesSource );
30+
return new NumericTermsCollector( valuesSource, keys, managers );
2531
}
2632

2733
@Override

backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,34 +15,43 @@
1515
import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket;
1616

1717
import com.carrotsearch.hppc.LongHashSet;
18-
import com.carrotsearch.hppc.LongIntHashMap;
19-
import com.carrotsearch.hppc.LongIntMap;
20-
import com.carrotsearch.hppc.cursors.LongIntCursor;
21-
import com.carrotsearch.hppc.procedures.LongIntProcedure;
18+
import com.carrotsearch.hppc.LongObjectHashMap;
19+
import com.carrotsearch.hppc.cursors.LongObjectCursor;
20+
import com.carrotsearch.hppc.procedures.LongObjectProcedure;
2221

2322
import org.apache.lucene.index.IndexReaderContext;
2423
import org.apache.lucene.index.LeafReaderContext;
2524
import org.apache.lucene.index.MultiDocValues;
2625
import org.apache.lucene.index.SortedSetDocValues;
26+
import org.apache.lucene.search.Collector;
27+
import org.apache.lucene.search.CollectorManager;
28+
import org.apache.lucene.search.LeafCollector;
2729
import org.apache.lucene.search.ScoreMode;
2830
import org.apache.lucene.search.SimpleCollector;
2931
import org.apache.lucene.util.PriorityQueue;
3032

31-
public class TextTermsCollector extends SimpleCollector {
33+
public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector {
3234

3335
private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet();
3436

3537
private final TextMultiValuesSource valuesSource;
36-
private final LongIntMap hashCounts = new LongIntHashMap();
37-
private final LongIntMap segmentCounts = new LongIntHashMap();
38+
private final LongObjectHashMap<LongBucket> hashValues = new LongObjectHashMap<>();
39+
private final LongObjectHashMap<SegmentValue> segmentValues = new LongObjectHashMap<>();
3840
private final String field;
3941
private SortedSetDocValues sortedSetValues;
4042

43+
private final CollectorKey<?, ?>[] keys;
44+
private final CollectorManager<Collector, ?>[] managers;
45+
4146
private TextMultiValues values;
47+
private LeafReaderContext leafReaderContext;
4248

43-
public TextTermsCollector(String field, TextMultiValuesSource valuesSource) {
49+
public TextTermsCollector(String field, TextMultiValuesSource valuesSource,
50+
CollectorKey<?, ?>[] keys, CollectorManager<Collector, ?>[] managers) {
4451
this.field = field;
4552
this.valuesSource = valuesSource;
53+
this.keys = keys;
54+
this.managers = managers;
4655
}
4756

4857
@Override
@@ -55,19 +64,24 @@ public void collect(int doc) throws IOException {
5564
// Each document must be counted only once per range.
5665
long value = values.nextOrd();
5766
if ( uniqueLeafIndicesForDocument.add( value ) ) {
58-
segmentCounts.addTo( value, 1 );
67+
SegmentValue segmentValue = segmentValues.get( value );
68+
if ( segmentValue == null ) {
69+
segmentValue = new SegmentValue( managers );
70+
segmentValues.put( value, segmentValue );
71+
}
72+
segmentValue.collect( doc );
5973
}
6074
}
6175
}
6276
}
6377

64-
public List<LongBucket> counts(BucketOrder order, int topN, int minDocCount) {
65-
int size = Math.min( topN, hashCounts.size() );
78+
public List<LongBucket> results(BucketOrder order, int topN, int minDocCount) {
79+
int size = Math.min( topN, hashValues.size() );
6680
PriorityQueue<LongBucket> pq = new HibernateSearchBucketOrderQueue( order, size );
6781

68-
hashCounts.forEach( (LongIntProcedure) (key, value) -> {
69-
if ( value >= minDocCount ) {
70-
pq.insertWithOverflow( new LongBucket( key, value ) );
82+
hashValues.forEach( (LongObjectProcedure<LongBucket>) (key, value) -> {
83+
if ( value.count >= minDocCount ) {
84+
pq.insertWithOverflow( value );
7185
}
7286
} );
7387

@@ -80,6 +94,16 @@ public List<LongBucket> counts(BucketOrder order, int topN, int minDocCount) {
8094
return buckets;
8195
}
8296

97+
@Override
98+
public CollectorKey<?, ?>[] keys() {
99+
return keys;
100+
}
101+
102+
@Override
103+
public CollectorManager<Collector, ?>[] managers() {
104+
return managers;
105+
}
106+
83107
@Override
84108
public ScoreMode scoreMode() {
85109
return ScoreMode.COMPLETE_NO_SCORES;
@@ -88,17 +112,28 @@ public ScoreMode scoreMode() {
88112
protected void doSetNextReader(LeafReaderContext context) throws IOException {
89113
initRootSortedSetDocValues( context );
90114
this.values = valuesSource.getValues( context );
115+
leafReaderContext = context;
91116
}
92117

93118
public void finish() throws IOException {
94-
for ( LongIntCursor hashCount : segmentCounts ) {
95-
hashCounts.addTo( sortedSetValues.lookupTerm( values.lookupOrd( hashCount.key ) ), hashCount.value );
119+
for ( LongObjectCursor<SegmentValue> value : segmentValues ) {
120+
long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) );
121+
LongBucket bucket = hashValues.get( globalOrd );
122+
if ( bucket == null ) {
123+
bucket = new LongBucket( globalOrd, value.value.collectors, value.value.count );
124+
hashValues.put( globalOrd, bucket );
125+
}
126+
else {
127+
bucket.count += value.value.count;
128+
for ( int i = 0; i < bucket.collectors.length; i++ ) {
129+
bucket.collectors[i].add( value.value.collectors[i] );
130+
}
131+
}
96132
}
97133
this.values = null;
98-
this.segmentCounts.clear();
134+
this.segmentValues.clear();
99135
}
100136

101-
102137
private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException {
103138
if ( sortedSetValues != null || ctx == null ) {
104139
return;
@@ -123,4 +158,25 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) {
123158
}
124159
}
125160

161+
private class SegmentValue {
162+
final Collector[] collectors;
163+
final LeafCollector[] leafCollectors;
164+
long count = 0L;
165+
166+
public SegmentValue(CollectorManager<Collector, ?>[] managers) throws IOException {
167+
this.collectors = new Collector[managers.length];
168+
this.leafCollectors = new LeafCollector[managers.length];
169+
for ( int i = 0; i < managers.length; i++ ) {
170+
collectors[i] = managers[i].newCollector();
171+
leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext );
172+
}
173+
}
174+
175+
public void collect(int doc) throws IOException {
176+
count++;
177+
for ( LeafCollector collector : leafCollectors ) {
178+
collector.collect( doc );
179+
}
180+
}
181+
}
126182
}

0 commit comments

Comments
 (0)