From 30969c21b1ace29a36c2bc8c8631b406ebef6e55 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Wed, 25 Jun 2025 22:13:22 +0200 Subject: [PATCH 01/23] HSEARCH-3661 Rename collector methods --- .../aggregation/impl/AggregationExtractContext.java | 2 +- .../AbstractLuceneFacetsBasedTermsAggregation.java | 2 +- .../AbstractLuceneMetricNumericFieldAggregation.java | 6 +++--- .../AbstractLuceneMetricNumericLongAggregation.java | 2 +- .../impl/LuceneAvgCompensatedSumAggregation.java | 4 ++-- .../impl/LuceneAvgNumericFieldAggregation.java | 12 ++++++------ .../impl/LuceneNumericRangeAggregation.java | 2 +- .../impl/LuceneSumCompensatedSumAggregation.java | 2 +- .../aggregation/impl/AggregationExtractContext.java | 2 +- .../AbstractLuceneFacetsBasedTermsAggregation.java | 2 +- .../AbstractLuceneMetricNumericFieldAggregation.java | 6 +++--- .../AbstractLuceneMetricNumericLongAggregation.java | 2 +- .../impl/LuceneAvgCompensatedSumAggregation.java | 4 ++-- .../impl/LuceneAvgNumericFieldAggregation.java | 12 ++++++------ .../impl/LuceneNumericRangeAggregation.java | 2 +- .../impl/LuceneSumCompensatedSumAggregation.java | 2 +- 16 files changed, 32 insertions(+), 32 deletions(-) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index f8ad0b4fca4..ba8fb98b0b6 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -56,7 +56,7 @@ public FromDocumentValueConvertContext fromDocumentValueConvertContext() { return fromDocumentValueConvertContext; } - public T getFacets(CollectorKey key) { + public T getCollectorResults(CollectorKey key) { return multiCollectedResults.get( key ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java index 566a92e567f..db87cd6bd7d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java @@ -110,7 +110,7 @@ abstract Set collectFirstTerms(IndexReader reader, boolean descending, int li abstract V termToFieldValue(T key); private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java index bcdfae57cf0..5d0a866d193 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java @@ -73,7 +73,7 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); E e = codec.getDomain().sortedDocValueToTerm( collector ); F decode = codec.decode( e ); @@ -111,7 +111,7 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @SuppressWarnings("unchecked") @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); return (K) numericDomain.sortedDocValueToTerm( collector ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java index 0e0c326fe22..67c9fe5da43 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java @@ -44,7 +44,7 @@ public Set indexNames() { private class LuceneNumericMetricLongAggregationExtraction implements Extractor { @Override public Long extract(AggregationExtractContext context) { - return context.getFacets( collectorKey ); + return context.getCollectorResults( collectorKey ); } } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index c392eee44b8..22aa85ff9e8 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -41,8 +41,8 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); - Long counts = context.getFacets( collectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); + Long counts = context.getCollectorResults( collectorKey ); double avg = ( sum / counts ); return numericDomain.doubleToTerm( avg ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 4ab59041635..589e89ea9a8 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -61,8 +61,8 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); @@ -106,8 +106,8 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @Override public E extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); return codec.getDomain().sortedDocValueToTerm( collector ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index a60a074113a..cc7b70ce276 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -77,7 +77,7 @@ private class LuceneNumericRangeAggregationExtractor implements Extractor, Long> extract(AggregationExtractContext context) throws IOException { LuceneNumericDomain numericDomain = codec.getDomain(); - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java index b91a81ca70f..2eee45d884b 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java @@ -36,7 +36,7 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); return numericDomain.doubleToTerm( sum ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index f8ad0b4fca4..ba8fb98b0b6 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -56,7 +56,7 @@ public FromDocumentValueConvertContext fromDocumentValueConvertContext() { return fromDocumentValueConvertContext; } - public T getFacets(CollectorKey key) { + public T getCollectorResults(CollectorKey key) { return multiCollectedResults.get( key ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java index 566a92e567f..db87cd6bd7d 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java @@ -110,7 +110,7 @@ abstract Set collectFirstTerms(IndexReader reader, boolean descending, int li abstract V termToFieldValue(T key); private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java index bcdfae57cf0..5d0a866d193 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java @@ -73,7 +73,7 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); E e = codec.getDomain().sortedDocValueToTerm( collector ); F decode = codec.decode( e ); @@ -111,7 +111,7 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @SuppressWarnings("unchecked") @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); return (K) numericDomain.sortedDocValueToTerm( collector ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java index 0e0c326fe22..67c9fe5da43 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java @@ -44,7 +44,7 @@ public Set indexNames() { private class LuceneNumericMetricLongAggregationExtraction implements Extractor { @Override public Long extract(AggregationExtractContext context) { - return context.getFacets( collectorKey ); + return context.getCollectorResults( collectorKey ); } } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index c392eee44b8..22aa85ff9e8 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -41,8 +41,8 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); - Long counts = context.getFacets( collectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); + Long counts = context.getCollectorResults( collectorKey ); double avg = ( sum / counts ); return numericDomain.doubleToTerm( avg ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 4ab59041635..589e89ea9a8 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -61,8 +61,8 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); @@ -106,8 +106,8 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @Override public E extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); return codec.getDomain().sortedDocValueToTerm( collector ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index a60a074113a..cc7b70ce276 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -77,7 +77,7 @@ private class LuceneNumericRangeAggregationExtractor implements Extractor, Long> extract(AggregationExtractContext context) throws IOException { LuceneNumericDomain numericDomain = codec.getDomain(); - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java index b91a81ca70f..2eee45d884b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java @@ -36,7 +36,7 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); return numericDomain.doubleToTerm( sum ); } From ee81874578b67d978da1689a25f5f41bd87d5384 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Wed, 25 Jun 2025 22:52:41 +0200 Subject: [PATCH 02/23] HSEARCH-3661 Do not rely on facets collector for range aggregations --- .../collector/impl/RangeCollector.java | 328 ++++++++++++++++++ .../collector/impl/RangeCollectorFactory.java | 36 ++ .../collector/impl/RangeCollectorManager.java | 34 ++ .../lowlevel/facet/impl/FacetCountsUtils.java | 63 ++-- .../impl/LongMultiValueRangeFacetCounts.java | 114 ------ .../impl/LuceneNumericRangeAggregation.java | 41 +-- .../types/lowlevel/impl/EffectiveRange.java | 8 + .../lowlevel/impl/LuceneDoubleDomain.java | 22 +- .../lowlevel/impl/LuceneFloatDomain.java | 22 +- .../lowlevel/impl/LuceneIntegerDomain.java | 15 +- .../types/lowlevel/impl/LuceneLongDomain.java | 15 +- .../lowlevel/impl/LuceneNumericDomain.java | 5 +- .../collector/impl/RangeCollector.java | 328 ++++++++++++++++++ .../collector/impl/RangeCollectorFactory.java | 36 ++ .../collector/impl/RangeCollectorManager.java | 34 ++ .../lowlevel/facet/impl/FacetCountsUtils.java | 63 ++-- .../impl/LongMultiValueRangeFacetCounts.java | 114 ------ .../aggregation/impl/EffectiveRange.java | 8 + .../impl/LuceneNumericRangeAggregation.java | 41 +-- .../lowlevel/impl/LuceneDoubleDomain.java | 23 +- .../lowlevel/impl/LuceneFloatDomain.java | 23 +- .../lowlevel/impl/LuceneIntegerDomain.java | 16 +- .../types/lowlevel/impl/LuceneLongDomain.java | 16 +- .../lowlevel/impl/LuceneNumericDomain.java | 6 +- 24 files changed, 953 insertions(+), 458 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java new file mode 100644 index 00000000000..0abcde21cf2 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -0,0 +1,328 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.IntCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class RangeCollector extends SimpleCollector { + + private final LongMultiValuesSource valuesSource; + private final LongRangeNode root; + private final long[] boundaries; + private final long[] countsPerBoundaries; + + private int leafUpto; + private boolean filled = false; + private final long[] counts; + + private LongMultiValues values; + + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + + // Maps all range inclusive endpoints to int flags; 1 + // = start of interval, 2 = end of interval. We need to + // track the start vs end case separately because if a + // given point is both, then it must be its own + // elementary interval: + LongIntMap endsMap = new LongIntHashMap(); + + endsMap.put( Long.MIN_VALUE, 1 ); + endsMap.put( Long.MAX_VALUE, 2 ); + + for ( EffectiveRange range : ranges ) { + long min = range.min(); + long max = range.max(); + int cur = endsMap.get( min ); + if ( cur == 0 ) { + endsMap.put( min, 1 ); + } + else { + endsMap.put( min, cur | 1 ); + } + cur = endsMap.get( max ); + if ( cur == 0 ) { + endsMap.put( max, 2 ); + } + else { + endsMap.put( max, cur | 2 ); + } + } + + LongArrayList endsList = new LongArrayList( endsMap.keys() ); + Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); + + // Build elementaryIntervals (a 1D Venn diagram): + List elementaryIntervals = new ArrayList<>(); + int upto0 = 1; + long v = endsList.get( 0 ); + long prev; + if ( endsMap.get( v ) == 3 ) { + elementaryIntervals.add( new InclusiveRange( v, v ) ); + prev = v + 1; + } + else { + prev = v; + } + + while ( upto0 < endsList.size() ) { + v = endsList.get( upto0 ); + int flags = endsMap.get( v ); + if ( flags == 3 ) { + // This point is both an end and a start; we need to + // separate it: + if ( v > prev ) { + elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + } + elementaryIntervals.add( new InclusiveRange( v, v ) ); + prev = v + 1; + } + else if ( flags == 1 ) { + // This point is only the start of an interval; + // attach it to next interval: + if ( v > prev ) { + elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + } + prev = v; + } + else { + assert flags == 2; + // This point is only the end of an interval; attach + // it to last interval: + elementaryIntervals.add( new InclusiveRange( prev, v ) ); + prev = v + 1; + } + upto0++; + } + + // Build binary tree on top of intervals: + root = split( 0, elementaryIntervals.size(), elementaryIntervals ); + + // Set outputs, so we know which range to output for + // each node in the tree: + for ( int i = 0; i < ranges.length; i++ ) { + root.addOutputs( i, ranges[i] ); + } + + // Set boundaries (ends of each elementary interval): + boundaries = new long[elementaryIntervals.size()]; + for ( int i = 0; i < boundaries.length; i++ ) { + boundaries[i] = elementaryIntervals.get( i ).end; + } + + countsPerBoundaries = new long[boundaries.length]; + counts = new long[ranges.length]; + } + + private void incrementCountForLeafWithIndex(int index) { + countsPerBoundaries[index]++; + } + + private int findLeafIndex(long v) { + // Binary search to find matched elementary range; we + // are guaranteed to find a match because the last + // boundary is Long.MAX_VALUE: + int lo = 0; + int hi = boundaries.length - 1; + while ( true ) { + int mid = ( lo + hi ) >>> 1; + if ( v <= boundaries[mid] ) { + if ( mid == 0 ) { + return 0; + } + else { + hi = mid - 1; + } + } + else if ( v > boundaries[mid + 1] ) { + lo = mid + 1; + } + else { + return mid + 1; + } + } + } + + /** Fills counts corresponding to the original input + * ranges, returning the missing count (how many hits + * didn't match any ranges). */ + private void fillCounts(long[] counts) { + leafUpto = 0; + rollup( root, counts, false ); + } + + private long rollup(LongRangeNode node, long[] counts, boolean sawOutputs) { + long count; + sawOutputs |= node.outputs != null; + if ( node.left != null ) { + count = rollup( node.left, counts, sawOutputs ); + count += rollup( node.right, counts, sawOutputs ); + } + else { + // Leaf: + count = countsPerBoundaries[leafUpto]; + leafUpto++; + } + if ( node.outputs != null ) { + for ( IntCursor rangeIndexCursor : node.outputs ) { + counts[rangeIndexCursor.value] += count; + } + } + return count; + } + + private static LongRangeNode split(int start, int end, List elementaryIntervals) { + if ( start == end - 1 ) { + // leaf + InclusiveRange range = elementaryIntervals.get( start ); + return new LongRangeNode( range.start, range.end, null, null ); + } + else { + int mid = ( start + end ) >>> 1; + LongRangeNode left = split( start, mid, elementaryIntervals ); + LongRangeNode right = split( mid, end, elementaryIntervals ); + return new LongRangeNode( left.start, right.end, left, right ); + } + } + + private record InclusiveRange(long start, long end) { + private InclusiveRange { + assert end >= start; + } + + @Override + public String toString() { + return start + " to " + end; + } + } + + /** Holds one node of the segment tree. */ + private static class LongRangeNode { + private final LongRangeNode left; + private final LongRangeNode right; + + // Our range, inclusive: + private final long start; + private final long end; + + // Which range indices to output when a query goes + // through this node: + IntArrayList outputs; + + public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { + this.start = start; + this.end = end; + this.left = left; + this.right = right; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + toString( sb, 0 ); + return sb.toString(); + } + + static void indent(StringBuilder sb, int depth) { + for ( int i = 0; i < depth; i++ ) { + sb.append( " " ); + } + } + + /** Recursively assigns range outputs to each node. */ + void addOutputs(int index, EffectiveRange range) { + if ( start >= range.min() && end <= range.max() ) { + // Our range is fully included in the incoming + // range; add to our output list: + if ( outputs == null ) { + outputs = new IntArrayList(); + } + outputs.add( index ); + } + else if ( left != null ) { + assert right != null; + // Recurse: + left.addOutputs( index, range ); + right.addOutputs( index, range ); + } + } + + void toString(StringBuilder sb, int depth) { + indent( sb, depth ); + if ( left == null ) { + assert right == null; + sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); + } + else { + sb.append( "node: " ).append( start ).append( " to " ).append( end ); + } + if ( outputs != null ) { + sb.append( " outputs=" ); + sb.append( outputs ); + } + sb.append( '\n' ); + + if ( left != null ) { + assert right != null; + left.toString( sb, depth + 1 ); + right.toString( sb, depth + 1 ); + } + } + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + int leafIndex = findLeafIndex( values.nextValue() ); + if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { + incrementCountForLeafWithIndex( leafIndex ); + } + } + } + } + + public long[] counts() { + if ( !filled ) { + filled = true; + fillCounts( counts ); + } + return counts; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + } + + public void finish() throws IOException { + values = null; + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java new file mode 100644 index 00000000000..cdaba62f538 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +public class RangeCollectorFactory + implements CollectorFactory> { + + public static CollectorFactory> instance( + LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + return new RangeCollectorFactory<>( valuesSource, ranges ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + this.ranges = ranges; + } + + @Override + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new RangeCollectorManager<>( valuesSource, ranges ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java new file mode 100644 index 00000000000..92f5e854504 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorManager implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + this.ranges = ranges; + } + + @Override + public RangeCollector newCollector() { + return new RangeCollector( valuesSource, ranges ); + } + + @Override + public RangeCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java index 1a62ef39013..3a233f63c5d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java @@ -7,50 +7,63 @@ import java.util.Collection; import java.util.function.ToLongFunction; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.hibernate.search.util.common.data.RangeBoundInclusion; -import org.apache.lucene.facet.range.LongRange; - public class FacetCountsUtils { private FacetCountsUtils() { } - public static < - T extends Number> LongRange[] createLongRangesForIntegralValues(Collection> ranges) { - return createLongRanges( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); } - public static LongRange[] createLongRangesForFloatingPointValues(Collection> ranges, + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createLongRanges( ranges, encoder, negativeInfinity, positiveInfinity, true ); + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); } - private static LongRange[] createLongRanges(Collection> ranges, + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, ToLongFunction encoder, T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - LongRange[] longRanges = new LongRange[ranges.size()]; + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; int i = 0; for ( Range range : ranges ) { - T lowerBoundValue = range.lowerBoundValue().orElse( null ); - T upperBoundValue = range.upperBoundValue().orElse( null ); - longRanges[i] = new LongRange( - String.valueOf( i ), - encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ), - // The lower bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.lowerBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && lowerBoundValue == null, - encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ), - // The upper bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.upperBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && upperBoundValue == null + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max ); ++i; } - return longRanges; + return effectiveRanges; } - } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java deleted file mode 100644 index cfaff59829c..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.range.LongRange; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueRangeFacetCounts extends MultiValueRangeFacetCounts { - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - LongRange... ranges) - throws IOException { - this( field, valueSource, hits, null, ranges ); - } - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - Query fastMatchQuery, LongRange... ranges) - throws IOException { - super( field, ranges, fastMatchQuery ); - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = (LongRange[]) this.ranges; - - IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); - LongMultiValueRangeCounter counter = new LongMultiValueRangeCounter( longRanges ); - IntProcedure incrementCountForLeafWithIndex = counter::incrementCountForLeafWithIndex; - - int missingCount = 0; - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context ); - - totCount += hits.totalHits; - final DocIdSetIterator fastMatchDocs; - if ( fastMatchQuery != null ) { - final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext( hits.context ); - final IndexSearcher searcher = new IndexSearcher( topLevelContext ); - searcher.setQueryCache( null ); - final Weight fastMatchWeight = - searcher.createWeight( searcher.rewrite( fastMatchQuery ), ScoreMode.COMPLETE_NO_SCORES, 1 ); - Scorer s = fastMatchWeight.scorer( hits.context ); - if ( s == null ) { - continue; - } - fastMatchDocs = s.iterator(); - } - else { - fastMatchDocs = null; - } - - DocIdSetIterator docs = hits.bits.iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { - if ( fastMatchDocs != null ) { - int fastMatchDoc = fastMatchDocs.docID(); - if ( fastMatchDoc < doc ) { - fastMatchDoc = fastMatchDocs.advance( doc ); - } - - if ( doc != fastMatchDoc ) { - doc = docs.advance( fastMatchDoc ); - continue; - } - } - - if ( fv.advanceExact( doc ) ) { - while ( fv.hasNextValue() ) { - // Each document must be counted only once per range. - int leafIndex = counter.findLeafIndex( fv.nextValue() ); - uniqueLeafIndicesForDocument.add( leafIndex ); - } - - uniqueLeafIndicesForDocument.forEach( incrementCountForLeafWithIndex ); - uniqueLeafIndicesForDocument.clear(); - } - else { - missingCount++; - } - - doc = docs.nextDoc(); - } - } - - int x = counter.fillCounts( counts ); - - missingCount += x; - - totCount -= missingCount; - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index cc7b70ce276..513a8fb379c 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -4,14 +4,16 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -19,15 +21,10 @@ import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; - /** * @param The type of field values. * @param The type of encoded field values. @@ -42,6 +39,8 @@ public class LuceneNumericRangeAggregation private final List> rangesInOrder; private final List> encodedRangesInOrder; + private CollectorKey collectorKey; + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); this.codec = builder.codec; @@ -51,7 +50,15 @@ private LuceneNumericRangeAggregation(Builder builder) { @Override public Extractor, Long>> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + + var rangeFactory = RangeCollectorFactory.instance( source, + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ) ); + collectorKey = rangeFactory.getCollectorKey(); + context.requireCollector( rangeFactory ); return new LuceneNumericRangeAggregationExtractor(); } @@ -74,23 +81,13 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { @Override - public Map, Long> extract(AggregationExtractContext context) throws IOException { - LuceneNumericDomain numericDomain = codec.getDomain(); - - FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - Facets facetsCount = numericDomain.createRangeFacetCounts( - absoluteFieldPath, facetsCollector, encodedRangesInOrder, - nestedDocsProvider - ); - - FacetResult facetResult = facetsCount.getTopChildren( rangesInOrder.size(), absoluteFieldPath ); + public Map, Long> extract(AggregationExtractContext context) { + RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); + long[] counts = rangeCollector.counts(); Map, Long> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), (long) (Integer) facetResult.labelValues[i].value ); + result.put( rangesInOrder.get( i ), counts[i] ); } return result; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java new file mode 100644 index 00000000000..a5eecfc8d27 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.lowlevel.impl; + +public record EffectiveRange(long min, long max) { +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index d4ed1241e9a..cd1fa6e3274 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -14,7 +14,6 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; @@ -106,24 +105,9 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, NumericUtils::doubleToSortableLong, - Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 48a7218ac16..141ecb237e7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -14,7 +14,6 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -107,24 +106,9 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, value -> (long) NumericUtils.floatToSortableInt( value ), - Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index db1a866a90b..3ce7e9e0def 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -14,7 +14,6 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -106,18 +105,8 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index b8df3919af0..409356559c0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -14,7 +14,6 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -106,18 +105,8 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 2b82c7a9807..20c08cf7c3d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -47,10 +47,7 @@ Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCo NestedDocsProvider nestedDocsProvider) throws IOException; - Facets createRangeFacetCounts(String absoluteFieldPath, - FacetsCollector facetsCollector, Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException; + EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java new file mode 100644 index 00000000000..407247ec425 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -0,0 +1,328 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; + +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.IntCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class RangeCollector extends SimpleCollector { + + private final LongMultiValuesSource valuesSource; + private final LongRangeNode root; + private final long[] boundaries; + private final long[] countsPerBoundaries; + + private int leafUpto; + private boolean filled = false; + private final long[] counts; + + private LongMultiValues values; + + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + + // Maps all range inclusive endpoints to int flags; 1 + // = start of interval, 2 = end of interval. We need to + // track the start vs end case separately because if a + // given point is both, then it must be its own + // elementary interval: + LongIntMap endsMap = new LongIntHashMap(); + + endsMap.put( Long.MIN_VALUE, 1 ); + endsMap.put( Long.MAX_VALUE, 2 ); + + for ( EffectiveRange range : ranges ) { + long min = range.min(); + long max = range.max(); + int cur = endsMap.get( min ); + if ( cur == 0 ) { + endsMap.put( min, 1 ); + } + else { + endsMap.put( min, cur | 1 ); + } + cur = endsMap.get( max ); + if ( cur == 0 ) { + endsMap.put( max, 2 ); + } + else { + endsMap.put( max, cur | 2 ); + } + } + + LongArrayList endsList = new LongArrayList( endsMap.keys() ); + Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); + + // Build elementaryIntervals (a 1D Venn diagram): + List elementaryIntervals = new ArrayList<>(); + int upto0 = 1; + long v = endsList.get( 0 ); + long prev; + if ( endsMap.get( v ) == 3 ) { + elementaryIntervals.add( new InclusiveRange( v, v ) ); + prev = v + 1; + } + else { + prev = v; + } + + while ( upto0 < endsList.size() ) { + v = endsList.get( upto0 ); + int flags = endsMap.get( v ); + if ( flags == 3 ) { + // This point is both an end and a start; we need to + // separate it: + if ( v > prev ) { + elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + } + elementaryIntervals.add( new InclusiveRange( v, v ) ); + prev = v + 1; + } + else if ( flags == 1 ) { + // This point is only the start of an interval; + // attach it to next interval: + if ( v > prev ) { + elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + } + prev = v; + } + else { + assert flags == 2; + // This point is only the end of an interval; attach + // it to last interval: + elementaryIntervals.add( new InclusiveRange( prev, v ) ); + prev = v + 1; + } + upto0++; + } + + // Build binary tree on top of intervals: + root = split( 0, elementaryIntervals.size(), elementaryIntervals ); + + // Set outputs, so we know which range to output for + // each node in the tree: + for ( int i = 0; i < ranges.length; i++ ) { + root.addOutputs( i, ranges[i] ); + } + + // Set boundaries (ends of each elementary interval): + boundaries = new long[elementaryIntervals.size()]; + for ( int i = 0; i < boundaries.length; i++ ) { + boundaries[i] = elementaryIntervals.get( i ).end; + } + + countsPerBoundaries = new long[boundaries.length]; + counts = new long[ranges.length]; + } + + private void incrementCountForLeafWithIndex(int index) { + countsPerBoundaries[index]++; + } + + private int findLeafIndex(long v) { + // Binary search to find matched elementary range; we + // are guaranteed to find a match because the last + // boundary is Long.MAX_VALUE: + int lo = 0; + int hi = boundaries.length - 1; + while ( true ) { + int mid = ( lo + hi ) >>> 1; + if ( v <= boundaries[mid] ) { + if ( mid == 0 ) { + return 0; + } + else { + hi = mid - 1; + } + } + else if ( v > boundaries[mid + 1] ) { + lo = mid + 1; + } + else { + return mid + 1; + } + } + } + + /** Fills counts corresponding to the original input + * ranges, returning the missing count (how many hits + * didn't match any ranges). */ + private void fillCounts(long[] counts) { + leafUpto = 0; + rollup( root, counts, false ); + } + + private long rollup(LongRangeNode node, long[] counts, boolean sawOutputs) { + long count; + sawOutputs |= node.outputs != null; + if ( node.left != null ) { + count = rollup( node.left, counts, sawOutputs ); + count += rollup( node.right, counts, sawOutputs ); + } + else { + // Leaf: + count = countsPerBoundaries[leafUpto]; + leafUpto++; + } + if ( node.outputs != null ) { + for ( IntCursor rangeIndexCursor : node.outputs ) { + counts[rangeIndexCursor.value] += count; + } + } + return count; + } + + private static LongRangeNode split(int start, int end, List elementaryIntervals) { + if ( start == end - 1 ) { + // leaf + InclusiveRange range = elementaryIntervals.get( start ); + return new LongRangeNode( range.start, range.end, null, null ); + } + else { + int mid = ( start + end ) >>> 1; + LongRangeNode left = split( start, mid, elementaryIntervals ); + LongRangeNode right = split( mid, end, elementaryIntervals ); + return new LongRangeNode( left.start, right.end, left, right ); + } + } + + private record InclusiveRange(long start, long end) { + private InclusiveRange { + assert end >= start; + } + + @Override + public String toString() { + return start + " to " + end; + } + } + + /** Holds one node of the segment tree. */ + private static class LongRangeNode { + private final LongRangeNode left; + private final LongRangeNode right; + + // Our range, inclusive: + private final long start; + private final long end; + + // Which range indices to output when a query goes + // through this node: + IntArrayList outputs; + + public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { + this.start = start; + this.end = end; + this.left = left; + this.right = right; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + toString( sb, 0 ); + return sb.toString(); + } + + static void indent(StringBuilder sb, int depth) { + for ( int i = 0; i < depth; i++ ) { + sb.append( " " ); + } + } + + /** Recursively assigns range outputs to each node. */ + void addOutputs(int index, EffectiveRange range) { + if ( start >= range.min() && end <= range.max() ) { + // Our range is fully included in the incoming + // range; add to our output list: + if ( outputs == null ) { + outputs = new IntArrayList(); + } + outputs.add( index ); + } + else if ( left != null ) { + assert right != null; + // Recurse: + left.addOutputs( index, range ); + right.addOutputs( index, range ); + } + } + + void toString(StringBuilder sb, int depth) { + indent( sb, depth ); + if ( left == null ) { + assert right == null; + sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); + } + else { + sb.append( "node: " ).append( start ).append( " to " ).append( end ); + } + if ( outputs != null ) { + sb.append( " outputs=" ); + sb.append( outputs ); + } + sb.append( '\n' ); + + if ( left != null ) { + assert right != null; + left.toString( sb, depth + 1 ); + right.toString( sb, depth + 1 ); + } + } + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + int leafIndex = findLeafIndex( values.nextValue() ); + if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { + incrementCountForLeafWithIndex( leafIndex ); + } + } + } + } + + public long[] counts() { + if ( !filled ) { + filled = true; + fillCounts( counts ); + } + return counts; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + } + + public void finish() throws IOException { + values = null; + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java new file mode 100644 index 00000000000..3894c3ed3a3 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; + +public class RangeCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + return new RangeCollectorFactory( valuesSource, ranges ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + this.ranges = ranges; + } + + @Override + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new RangeCollectorManager( valuesSource, ranges ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java new file mode 100644 index 00000000000..0e142551b6c --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; + +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorManager implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + this.valuesSource = valuesSource; + this.ranges = ranges; + } + + @Override + public RangeCollector newCollector() { + return new RangeCollector( valuesSource, ranges ); + } + + @Override + public RangeCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java index 1a62ef39013..037a74ac3e3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java @@ -7,50 +7,63 @@ import java.util.Collection; import java.util.function.ToLongFunction; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.hibernate.search.util.common.data.RangeBoundInclusion; -import org.apache.lucene.facet.range.LongRange; - public class FacetCountsUtils { private FacetCountsUtils() { } - public static < - T extends Number> LongRange[] createLongRangesForIntegralValues(Collection> ranges) { - return createLongRanges( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); } - public static LongRange[] createLongRangesForFloatingPointValues(Collection> ranges, + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createLongRanges( ranges, encoder, negativeInfinity, positiveInfinity, true ); + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); } - private static LongRange[] createLongRanges(Collection> ranges, + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, ToLongFunction encoder, T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - LongRange[] longRanges = new LongRange[ranges.size()]; + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; int i = 0; for ( Range range : ranges ) { - T lowerBoundValue = range.lowerBoundValue().orElse( null ); - T upperBoundValue = range.upperBoundValue().orElse( null ); - longRanges[i] = new LongRange( - String.valueOf( i ), - encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ), - // The lower bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.lowerBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && lowerBoundValue == null, - encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ), - // The upper bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.upperBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && upperBoundValue == null + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max ); ++i; } - return longRanges; + return effectiveRanges; } - } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java deleted file mode 100644 index a1f34798ab8..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.range.LongRange; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueRangeFacetCounts extends MultiValueRangeFacetCounts { - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - LongRange... ranges) - throws IOException { - this( field, valueSource, hits, null, ranges ); - } - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - Query fastMatchQuery, LongRange... ranges) - throws IOException { - super( field, ranges, fastMatchQuery ); - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = (LongRange[]) this.ranges; - - IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); - LongMultiValueRangeCounter counter = new LongMultiValueRangeCounter( longRanges ); - IntProcedure incrementCountForLeafWithIndex = counter::incrementCountForLeafWithIndex; - - int missingCount = 0; - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context() ); - - totCount += hits.totalHits(); - final DocIdSetIterator fastMatchDocs; - if ( fastMatchQuery != null ) { - final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext( hits.context() ); - final IndexSearcher searcher = new IndexSearcher( topLevelContext ); - searcher.setQueryCache( null ); - final Weight fastMatchWeight = - searcher.createWeight( searcher.rewrite( fastMatchQuery ), ScoreMode.COMPLETE_NO_SCORES, 1 ); - Scorer s = fastMatchWeight.scorer( hits.context() ); - if ( s == null ) { - continue; - } - fastMatchDocs = s.iterator(); - } - else { - fastMatchDocs = null; - } - - DocIdSetIterator docs = hits.bits().iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { - if ( fastMatchDocs != null ) { - int fastMatchDoc = fastMatchDocs.docID(); - if ( fastMatchDoc < doc ) { - fastMatchDoc = fastMatchDocs.advance( doc ); - } - - if ( doc != fastMatchDoc ) { - doc = docs.advance( fastMatchDoc ); - continue; - } - } - - if ( fv.advanceExact( doc ) ) { - while ( fv.hasNextValue() ) { - // Each document must be counted only once per range. - int leafIndex = counter.findLeafIndex( fv.nextValue() ); - uniqueLeafIndicesForDocument.add( leafIndex ); - } - - uniqueLeafIndicesForDocument.forEach( incrementCountForLeafWithIndex ); - uniqueLeafIndicesForDocument.clear(); - } - else { - missingCount++; - } - - doc = docs.nextDoc(); - } - } - - int x = counter.fillCounts( counts ); - - missingCount += x; - - totCount -= missingCount; - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java new file mode 100644 index 00000000000..042446b36d5 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +public record EffectiveRange(long min, long max) { +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index cc7b70ce276..513a8fb379c 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -4,14 +4,16 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -19,15 +21,10 @@ import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; - /** * @param The type of field values. * @param The type of encoded field values. @@ -42,6 +39,8 @@ public class LuceneNumericRangeAggregation private final List> rangesInOrder; private final List> encodedRangesInOrder; + private CollectorKey collectorKey; + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); this.codec = builder.codec; @@ -51,7 +50,15 @@ private LuceneNumericRangeAggregation(Builder builder) { @Override public Extractor, Long>> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + + var rangeFactory = RangeCollectorFactory.instance( source, + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ) ); + collectorKey = rangeFactory.getCollectorKey(); + context.requireCollector( rangeFactory ); return new LuceneNumericRangeAggregationExtractor(); } @@ -74,23 +81,13 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { @Override - public Map, Long> extract(AggregationExtractContext context) throws IOException { - LuceneNumericDomain numericDomain = codec.getDomain(); - - FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - Facets facetsCount = numericDomain.createRangeFacetCounts( - absoluteFieldPath, facetsCollector, encodedRangesInOrder, - nestedDocsProvider - ); - - FacetResult facetResult = facetsCount.getTopChildren( rangesInOrder.size(), absoluteFieldPath ); + public Map, Long> extract(AggregationExtractContext context) { + RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); + long[] counts = rangeCollector.counts(); Map, Long> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), (long) (Integer) facetResult.labelValues[i].value ); + result.put( rangesInOrder.get( i ), counts[i] ); } return result; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index d4ed1241e9a..12fc6822048 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -14,8 +14,8 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; @@ -106,24 +106,9 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, NumericUtils::doubleToSortableLong, - Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 48a7218ac16..e16beb2f696 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -14,8 +14,8 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -107,24 +107,9 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, value -> (long) NumericUtils.floatToSortableInt( value ), - Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index db1a866a90b..feb6d9de5f8 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -14,8 +14,8 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -106,18 +106,8 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index b8df3919af0..d5430ed50e3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -14,8 +14,8 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; @@ -106,18 +106,8 @@ public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector f } @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 2b82c7a9807..f9ba185e9bb 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -10,6 +10,7 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.facet.Facets; @@ -47,10 +48,7 @@ Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCo NestedDocsProvider nestedDocsProvider) throws IOException; - Facets createRangeFacetCounts(String absoluteFieldPath, - FacetsCollector facetsCollector, Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException; + EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); From 2fb2212764a535d855180563d4c9c1ce341caf2c Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Thu, 26 Jun 2025 21:59:43 +0200 Subject: [PATCH 03/23] HSEARCH-3661 Do not rely on facets collector for terms aggregations --- .../collector/impl/NumericTermsCollector.java | 101 ++++++++++++++ .../impl/NumericTermsCollectorFactory.java | 33 +++++ .../impl/NumericTermsCollectorManager.java | 32 +++++ .../collector/impl/TextTermsCollector.java | 126 ++++++++++++++++++ .../impl/TextTermsCollectorFactory.java | 36 +++++ .../impl/TextTermsCollectorManager.java | 34 +++++ .../docvalues/impl/TextMultiValues.java | 13 ++ ...actLuceneMultivaluedTermsAggregation.java} | 99 +++----------- .../lucene/types/aggregation/impl/Bucket.java | 9 +- .../types/aggregation/impl/BucketOrder.java | 49 +++++-- .../types/aggregation/impl/LongBucket.java | 8 ++ .../impl/LuceneNumericTermsAggregation.java | 86 ++++++++---- .../impl/LuceneTextTermsAggregation.java | 57 +++++--- .../search/aggregation/AggregationDslIT.java | 12 ++ .../lucene/cfg/LuceneBackendSettings.java | 4 +- .../lucene/cfg/LuceneIndexSettings.java | 8 +- .../collector/impl/NumericTermsCollector.java | 101 ++++++++++++++ .../impl/NumericTermsCollectorFactory.java | 33 +++++ .../impl/NumericTermsCollectorManager.java | 32 +++++ .../collector/impl/RangeCollector.java | 2 +- .../collector/impl/RangeCollectorFactory.java | 14 +- .../collector/impl/RangeCollectorManager.java | 4 +- .../collector/impl/TextTermsCollector.java | 126 ++++++++++++++++++ .../impl/TextTermsCollectorFactory.java | 36 +++++ .../impl/TextTermsCollectorManager.java | 34 +++++ .../LongMultiValuesToSingleValuesSource.java | 2 +- .../impl/SortedNumericDoubleDocValues.java | 4 +- .../docvalues/impl/TextMultiValues.java | 13 ++ .../TextMultiValuesToSingleValuesSource.java | 5 +- .../lowlevel/facet/impl/FacetCountsUtils.java | 2 +- .../impl/LuceneUnifiedSearchHighlighter.java | 2 +- .../predicate/impl/LuceneNamedPredicate.java | 1 - .../impl/LuceneDistanceToFieldProjection.java | 2 +- .../impl/LuceneFieldProjection.java | 4 +- .../search/query/LuceneSearchQuery.java | 6 +- .../query/impl/LuceneSearchQueryBuilder.java | 3 +- .../sort/impl/LuceneSearchSortCollector.java | 2 +- ...actLuceneMultivaluedTermsAggregation.java} | 99 +++----------- .../lucene/types/aggregation/impl/Bucket.java | 9 +- .../types/aggregation/impl/BucketOrder.java | 49 +++++-- .../types/aggregation/impl/LongBucket.java | 8 ++ .../impl/LuceneNumericRangeAggregation.java | 2 +- .../impl/LuceneNumericTermsAggregation.java | 86 ++++++++---- .../impl/LuceneTextTermsAggregation.java | 57 +++++--- .../types/lowlevel/impl/EffectiveRange.java | 8 ++ .../lowlevel/impl/LuceneDoubleDomain.java | 1 - .../lowlevel/impl/LuceneFloatDomain.java | 1 - .../lowlevel/impl/LuceneIntegerDomain.java | 1 - .../types/lowlevel/impl/LuceneLongDomain.java | 1 - .../lowlevel/impl/LuceneNumericDomain.java | 1 - 50 files changed, 1122 insertions(+), 336 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/{AbstractLuceneFacetsBasedTermsAggregation.java => AbstractLuceneMultivaluedTermsAggregation.java} (54%) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java rename lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/{AbstractLuceneFacetsBasedTermsAggregation.java => AbstractLuceneMultivaluedTermsAggregation.java} (54%) create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java new file mode 100644 index 00000000000..2977d48b37a --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.procedures.LongIntProcedure; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.util.PriorityQueue; + +public class NumericTermsCollector extends SimpleCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final LongMultiValuesSource valuesSource; + private final LongIntMap hashCounts = new LongIntHashMap(); + private LongMultiValues values; + + public NumericTermsCollector(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextValue(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + hashCounts.addTo( value, 1 ); + } + } + } + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashCounts.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + hashCounts.forEach( (LongIntProcedure) (key, value) -> { + if ( value >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value ) ); + } + } ); + + List buckets = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + buckets.add( 0, popped ); + } + + return buckets; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + } + + public void finish() { + values = null; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java new file mode 100644 index 00000000000..a99503ef62a --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +public class NumericTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource) { + return new NumericTermsCollectorFactory( valuesSource ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new NumericTermsCollectorManager( valuesSource ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java new file mode 100644 index 00000000000..afdeab71207 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorManager + implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public NumericTermsCollector newCollector() { + return new NumericTermsCollector( valuesSource ); + } + + @Override + public NumericTermsCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java new file mode 100644 index 00000000000..c6efffdd032 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.LongIntCursor; +import com.carrotsearch.hppc.procedures.LongIntProcedure; + +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.util.PriorityQueue; + +public class TextTermsCollector extends SimpleCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final TextMultiValuesSource valuesSource; + private final LongIntMap hashCounts = new LongIntHashMap(); + private final LongIntMap segmentCounts = new LongIntHashMap(); + private final String field; + private SortedSetDocValues sortedSetValues; + + private TextMultiValues values; + + public TextTermsCollector(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextOrd(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + segmentCounts.addTo( value, 1 ); + } + } + } + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashCounts.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + hashCounts.forEach( (LongIntProcedure) (key, value) -> { + if ( value >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value ) ); + } + } ); + + List buckets = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + buckets.add( 0, popped ); + } + + return buckets; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + initRootSortedSetDocValues( context ); + this.values = valuesSource.getValues( context ); + } + + public void finish() throws IOException { + for ( LongIntCursor hashCount : segmentCounts ) { + hashCounts.addTo( sortedSetValues.lookupTerm( values.lookupOrd( hashCount.key ) ), hashCount.value ); + } + this.values = null; + this.segmentCounts.clear(); + } + + + private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { + if ( sortedSetValues != null || ctx == null ) { + return; + } + if ( ctx.isTopLevel ) { + this.sortedSetValues = MultiDocValues.getSortedSetValues( ctx.reader(), field ); + } + initRootSortedSetDocValues( ctx.parent ); + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java new file mode 100644 index 00000000000..11489538470 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +public class TextTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + String field, TextMultiValuesSource valuesSource) { + return new TextTermsCollectorFactory( field, valuesSource ); + } + + + public final CollectorKey key = CollectorKey.create(); + private final TextMultiValuesSource valuesSource; + private final String field; + + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new TextTermsCollectorManager( field, valuesSource ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java new file mode 100644 index 00000000000..b309f509b05 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorManager + implements CollectorManager { + + private final TextMultiValuesSource valuesSource; + private final String field; + + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public TextTermsCollector newCollector() { + return new TextTermsCollector( field, valuesSource ); + } + + @Override + public TextTermsCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java index 0c50566780d..0749fff4f99 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java @@ -7,6 +7,7 @@ import java.io.IOException; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; /** * A per-document, unordered sequence of text ordinals. @@ -46,6 +47,8 @@ protected TextMultiValues() { */ public abstract long nextOrd() throws IOException; + public abstract BytesRef lookupOrd(long ord) throws IOException; + /** * Returns the number of unique values. * @return number of unique values in this SortedDocValues. This is @@ -77,6 +80,11 @@ public long nextOrd() { throw new UnsupportedOperationException(); } + @Override + public BytesRef lookupOrd(long ord) { + throw new UnsupportedOperationException(); + } + @Override public long getValueCount() { return 0; @@ -113,6 +121,11 @@ public long nextOrd() throws IOException { return values.nextOrd(); } + @Override + public BytesRef lookupOrd(long ord) throws IOException { + return values.lookupOrd( ord ); + } + @Override public long getValueCount() { return values.getValueCount(); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java similarity index 54% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index db87cd6bd7d..b80563a010d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -5,15 +5,12 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; -import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -23,9 +20,6 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.index.IndexReader; /** @@ -34,16 +28,16 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public abstract class AbstractLuceneFacetsBasedTermsAggregation +public abstract class AbstractLuceneMultivaluedTermsAggregation extends AbstractLuceneBucketAggregation { - private final ProjectionConverter fromFieldValueConverter; + protected final ProjectionConverter fromFieldValueConverter; - private final BucketOrder order; - private final int maxTermCount; - private final int minDocCount; + protected final BucketOrder order; + protected final int maxTermCount; + protected final int minDocCount; - AbstractLuceneFacetsBasedTermsAggregation(AbstractBuilder builder) { + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { super( builder ); this.fromFieldValueConverter = builder.fromFieldValueConverter; this.order = builder.order; @@ -51,103 +45,40 @@ public abstract class AbstractLuceneFacetsBasedTermsAggregation this.minDocCount = builder.minDocCount; } - @Override - public Extractor> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); - - return extractor( context ); - } - protected abstract Extractor> extractor(AggregationRequestContext context); protected abstract class AbstractExtractor implements Extractor> { @Override public final Map extract(AggregationExtractContext context) throws IOException { - FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); - List> buckets = getTopBuckets( context ); - if ( BucketOrder.COUNT_DESC.equals( order ) && ( minDocCount > 0 || buckets.size() >= maxTermCount ) ) { - /* - * Optimization: in this case, minDocCount and sorting can be safely ignored. - * We already have all the buckets we need, and they are already sorted. - */ - return toMap( convertContext, buckets ); - } - - if ( minDocCount <= 0 ) { + if ( minDocCount == 0 && buckets.size() < maxTermCount ) { Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - // If some of the first terms are already in non-zero buckets, ignore them in the next step for ( Bucket bucket : buckets ) { - firstTerms.remove( bucket.term ); + firstTerms.remove( bucket.term() ); } - // Complete the list of buckets with zero-count terms - for ( T term : firstTerms ) { - buckets.add( new Bucket<>( term, 0L ) ); - } - } - - // Sort the list of buckets and trim it if necessary (there may be more buckets than we want in some cases) - buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); - if ( buckets.size() > maxTermCount ) { - buckets.subList( maxTermCount, buckets.size() ).clear(); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0 ) ) ); + buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); } - return toMap( convertContext, buckets ); + return toMap( context.fromDocumentValueConvertContext(), buckets ); } - abstract FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException; - abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException; abstract Comparator getAscendingTermComparator(); - abstract T labelToTerm(String label); - abstract V termToFieldValue(T key); - private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - /* - * TODO HSEARCH-3666 What if the sort order is by term value? - * Lucene returns facets in descending count order. - * If that's what we need, then we can ask Lucene to apply the "maxTermCount" limit directly. - * This is what we do here. - * But if we need a different sort, then having to retrieve the "top N" facets by document count - * becomes clearly sub-optimal: to properly implement this, we would need to retrieve - * *all* facets, and Lucene would allocate an array of Integer.MAX_VALUE elements. - * To improve on this, we would need to re-implement the facet collections. - */ - int limit = maxTermCount; - FacetResult facetResult = getTopChildren( context.getIndexReader(), facetsCollector, nestedDocsProvider, limit ); - - List> buckets = new ArrayList<>(); - - if ( facetResult != null ) { - // Add results for matching documents - for ( LabelAndValue labelAndValue : facetResult.labelValues ) { - long count = (Integer) labelAndValue.value; - if ( count >= minDocCount ) { - buckets.add( new Bucket<>( labelToTerm( labelAndValue.label ), count ) ); - } - } - } - - return buckets; - } + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering for ( Bucket bucket : buckets ) { - V decoded = termToFieldValue( bucket.term ); + V decoded = termToFieldValue( bucket.term() ); K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count ); + result.put( key, bucket.count() ); } return result; } @@ -213,7 +144,7 @@ public void maxTermCount(int maxTermCount) { } @Override - public abstract AbstractLuceneFacetsBasedTermsAggregation build(); + public abstract AbstractLuceneMultivaluedTermsAggregation build(); protected final void order(BucketOrder order) { this.order = order; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index 6c1c27c662a..ce9b0a13ce3 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,12 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -final class Bucket { - final F term; - final long count; - - Bucket(F term, long count) { - this.term = term; - this.count = count; - } +public record Bucket(F term, long count) { } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 4767879901c..31233e5ad39 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -6,29 +6,46 @@ import java.util.Comparator; -enum BucketOrder { +public enum BucketOrder { COUNT_ASC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( left.count, right.count ); + int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); return order; }; } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::term ); + } }, COUNT_DESC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( right.count, left.count ); // reversed, because desc + int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); + return order; + }; + } + + @Override + Comparator toLongBucketComparatorInternal() { + return (left, right) -> { + int order = Long.compare( right.count(), left.count() ); // reversed, because desc + if ( order != 0 ) { + return order; + } + order = Long.compare( left.term(), right.term() ); return order; }; } @@ -36,7 +53,12 @@ Comparator> toBucketComparatorInternal(Comparator termComparato TERM_ASC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::term ); } }, TERM_DESC { @@ -47,17 +69,28 @@ boolean isTermOrderDescending() { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::term ).reversed(); } }; - Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } + public Comparator toLongBucketComparator() { + return toLongBucketComparatorInternal(); + } + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + abstract Comparator toLongBucketComparatorInternal(); + boolean isTermOrderDescending() { return false; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java new file mode 100644 index 00000000000..4d980d2c5ab --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +public record LongBucket(long term, long count) { +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index e32d2da2d29..5ec21a7054f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -5,13 +5,21 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -22,14 +30,12 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.PriorityQueue; /** * @param The type of field values. @@ -37,12 +43,13 @@ * or a different type if value converters are used. */ public class LuceneNumericTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; private final Comparator termComparator; private final Function decoder; + private CollectorKey collectorKey; private LuceneNumericTermsAggregation(Builder builder) { super( builder ); @@ -51,6 +58,19 @@ private LuceneNumericTermsAggregation(Builder builder) { this.decoder = builder.decoder; } + @Override + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + var termsCollectorFactory = NumericTermsCollectorFactory.instance( source ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return extractor( context ); + } + @Override protected Extractor> extractor(AggregationRequestContext context) { return new LuceneNumericTermsAggregationExtractor(); @@ -73,20 +93,33 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco } private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - Facets facetCounts = numericDomain.createTermsFacetCounts( - absoluteFieldPath, facetsCollector, nestedDocsProvider - ); - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + Comparator getAscendingTermComparator() { + return termComparator; + } + + @Override + V termToFieldValue(E key) { + return decoder.apply( key ); + } + + @Override + List> getTopBuckets(AggregationExtractContext context) { + var termsCollector = context.getCollectorResults( collectorKey ); + + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.term() ), bucket.count() ) ); + } + return buckets; } @Override - SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit) + Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { - TreeSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); + SortedSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); for ( LeafReaderContext leaf : reader.leaves() ) { final LeafReader atomicReader = leaf.reader(); SortedNumericDocValues docValues = atomicReader.getSortedNumericDocValues( absoluteFieldPath ); @@ -107,20 +140,6 @@ SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit return collectedTerms; } - @Override - Comparator getAscendingTermComparator() { - return termComparator; - } - - @Override - E labelToTerm(String termAsString) { - return numericDomain.sortedDocValueToTerm( Long.parseLong( termAsString ) ); - } - - @Override - V termToFieldValue(E term) { - return decoder.apply( term ); - } } private static class TypeSelector extends AbstractTypeSelector { @@ -171,4 +190,17 @@ public LuceneNumericTermsAggregation build() { } } + private static class HibernateSearchTermsQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchTermsQueue(Comparator comparator, int maxSize) { + super( maxSize ); + this.comparator = comparator; + } + + @Override + protected boolean lessThan(E t1, E t2) { + return comparator.compare( t1, t2 ) > 0; + } + } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index f8428ade788..bf20eff0559 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -5,14 +5,19 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.TextMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -21,11 +26,10 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; /** @@ -33,33 +37,35 @@ * or a different type if value converters are used. */ public class LuceneTextTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); + private CollectorKey collectorKey; + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } + @Override + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( + absoluteFieldPath, nestedDocsProvider + ); + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return extractor( context ); + } + @Override protected Extractor> extractor(AggregationRequestContext context) { return new LuceneTextTermsAggregationExtractor(); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { - @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - JoiningTextMultiValuesSource valueSource = JoiningTextMultiValuesSource.fromField( - absoluteFieldPath, nestedDocsProvider - ); - TextMultiValueFacetCounts facetCounts = new TextMultiValueFacetCounts( - reader, absoluteFieldPath, valueSource, facetsCollector - ); - - return facetCounts.getTopChildren( limit, absoluteFieldPath ); - } @Override Set collectFirstTerms(IndexReader reader, boolean descending, int limit) @@ -95,13 +101,22 @@ Comparator getAscendingTermComparator() { } @Override - String labelToTerm(String label) { - return label; + String termToFieldValue(String key) { + return key; } @Override - String termToFieldValue(String key) { - return key; + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termsCollector = context.getCollectorResults( collectorKey ); + + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + + var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + buckets.add( new Bucket<>( dv.lookupOrd( bucket.term() ).utf8ToString(), bucket.count() ) ); + } + return buckets; } } diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index fb1e2ef83a1..aa761363377 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -248,6 +248,18 @@ void terms() { entry( Genre.SCIENCE_FICTION, 3L ) ); } ); + + withinSearchSession( searchSession -> { + AggregationKey> countsByGenreKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( countsByGenreKey, f -> f.terms() + .field( "price", Double.class ) + .orderByCountAscending() ) + .fetch( 20 ); + Map countsByGenre = result.aggregation( countsByGenreKey ); + System.err.println( countsByGenre ); + } ); } @Test diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java index 5ba2a7e108e..43c7c070297 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java @@ -36,8 +36,8 @@ private LuceneBackendSettings() { *

* This should be set in order to get consistent behavior when Lucene is upgraded. *

- * Expects a Lucene {@link org.apache.lucene.util.Version} object, - * or a String accepted by {@link org.apache.lucene.util.Version#parseLeniently(java.lang.String)} + * Expects a Lucene {@link Version} object, + * or a String accepted by {@link Version#parseLeniently(String)} *

* Defaults to {@link Defaults#LUCENE_VERSION}, which may change when Hibernate Search or Lucene is upgraded, * and therefore does not offer any backwards-compatibility guarantees. diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java index b98bb46e226..a0a5f322ed6 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java @@ -87,7 +87,7 @@ private LuceneIndexSettings() { *

* Expects a {@link IOStrategyName} value, or a String representation of such value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_STRATEGY}. + * Defaults to {@link Defaults#IO_STRATEGY}. */ public static final String IO_STRATEGY = IO_PREFIX + IORadicals.STRATEGY; @@ -115,7 +115,7 @@ private LuceneIndexSettings() { * Expects a positive Integer value in milliseconds, such as {@code 1000}, * or a String that can be parsed into such Integer value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_COMMIT_INTERVAL}. + * Defaults to {@link Defaults#IO_COMMIT_INTERVAL}. */ public static final String IO_COMMIT_INTERVAL = IO_PREFIX + IORadicals.COMMIT_INTERVAL; @@ -140,7 +140,7 @@ private LuceneIndexSettings() { * Expects a positive Integer value in milliseconds, such as {@code 1000}, * or a String that can be parsed into such Integer value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_REFRESH_INTERVAL}. + * Defaults to {@link Defaults#IO_REFRESH_INTERVAL}. */ public static final String IO_REFRESH_INTERVAL = IO_PREFIX + IORadicals.REFRESH_INTERVAL; @@ -276,7 +276,7 @@ private LuceneIndexSettings() { * Expects a String, such as "hash". * See the reference documentation for a list of available values. *

- * Defaults to {@link LuceneIndexSettings.Defaults#SHARDING_STRATEGY} (no sharding). + * Defaults to {@link Defaults#SHARDING_STRATEGY} (no sharding). */ public static final String SHARDING_STRATEGY = SHARDING_PREFIX + ShardingRadicals.STRATEGY; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java new file mode 100644 index 00000000000..641ba3265d0 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -0,0 +1,101 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.procedures.LongIntProcedure; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.util.PriorityQueue; + +public class NumericTermsCollector extends SimpleCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final LongMultiValuesSource valuesSource; + private final LongIntMap hashCounts = new LongIntHashMap(); + private LongMultiValues values; + + public NumericTermsCollector(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextValue(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + hashCounts.addTo( value, 1 ); + } + } + } + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashCounts.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + hashCounts.forEach( (LongIntProcedure) (key, value) -> { + if ( value >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value ) ); + } + } ); + + List buckets = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + buckets.addFirst( popped ); + } + + return buckets; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + } + + public void finish() { + values = null; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java new file mode 100644 index 00000000000..a99503ef62a --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +public class NumericTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource) { + return new NumericTermsCollectorFactory( valuesSource ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new NumericTermsCollectorManager( valuesSource ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java new file mode 100644 index 00000000000..afdeab71207 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorManager + implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource) { + this.valuesSource = valuesSource; + } + + @Override + public NumericTermsCollector newCollector() { + return new NumericTermsCollector( valuesSource ); + } + + @Override + public NumericTermsCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 407247ec425..0abcde21cf2 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -11,7 +11,7 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.IntHashSet; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index 3894c3ed3a3..cdaba62f538 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -5,14 +5,14 @@ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; -public class RangeCollectorFactory - implements CollectorFactory { +public class RangeCollectorFactory + implements CollectorFactory> { - public static CollectorFactory instance( + public static CollectorFactory> instance( LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { - return new RangeCollectorFactory( valuesSource, ranges ); + return new RangeCollectorFactory<>( valuesSource, ranges ); } public final CollectorKey key = CollectorKey.create(); @@ -25,8 +25,8 @@ public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[ } @Override - public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new RangeCollectorManager( valuesSource, ranges ); + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new RangeCollectorManager<>( valuesSource, ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index 0e142551b6c..92f5e854504 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -7,11 +7,11 @@ import java.util.Collection; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorManager implements CollectorManager { +public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java new file mode 100644 index 00000000000..e72f44900be --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.LongIntCursor; +import com.carrotsearch.hppc.procedures.LongIntProcedure; + +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; +import org.apache.lucene.util.PriorityQueue; + +public class TextTermsCollector extends SimpleCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final TextMultiValuesSource valuesSource; + private final LongIntMap hashCounts = new LongIntHashMap(); + private final LongIntMap segmentCounts = new LongIntHashMap(); + private final String field; + private SortedSetDocValues sortedSetValues; + + private TextMultiValues values; + + public TextTermsCollector(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextOrd(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + segmentCounts.addTo( value, 1 ); + } + } + } + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashCounts.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + hashCounts.forEach( (LongIntProcedure) (key, value) -> { + if ( value >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value ) ); + } + } ); + + List buckets = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + buckets.addFirst( popped ); + } + + return buckets; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + protected void doSetNextReader(LeafReaderContext context) throws IOException { + initRootSortedSetDocValues( context ); + this.values = valuesSource.getValues( context ); + } + + public void finish() throws IOException { + for ( LongIntCursor hashCount : segmentCounts ) { + hashCounts.addTo( sortedSetValues.lookupTerm( values.lookupOrd( hashCount.key ) ), hashCount.value ); + } + this.values = null; + this.segmentCounts.clear(); + } + + + private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { + if ( sortedSetValues != null || ctx == null ) { + return; + } + if ( ctx.isTopLevel ) { + this.sortedSetValues = MultiDocValues.getSortedSetValues( ctx.reader(), field ); + } + initRootSortedSetDocValues( ctx.parent ); + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java new file mode 100644 index 00000000000..11489538470 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +public class TextTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + String field, TextMultiValuesSource valuesSource) { + return new TextTermsCollectorFactory( field, valuesSource ); + } + + + public final CollectorKey key = CollectorKey.create(); + private final TextMultiValuesSource valuesSource; + private final String field; + + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new TextTermsCollectorManager( field, valuesSource ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java new file mode 100644 index 00000000000..b309f509b05 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorManager + implements CollectorManager { + + private final TextMultiValuesSource valuesSource; + private final String field; + + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource) { + this.field = field; + this.valuesSource = valuesSource; + } + + @Override + public TextTermsCollector newCollector() { + return new TextTermsCollector( field, valuesSource ); + } + + @Override + public TextTermsCollector reduce(Collection collection) { + // TODO: actually reduce: + return collection.iterator().next(); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java index 1414e8d59a5..0f652963463 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java @@ -106,7 +106,7 @@ public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws I * @param ctx the ctx * @param scores the scores * @return NumericDocValues - * @throws java.io.IOException + * @throws IOException */ public NumericDocValues getRawNumericDocValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return new RawNumericDocValues( getValues( ctx, scores ) ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java index d4087aaf4f1..8d2e4086645 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java @@ -37,7 +37,7 @@ protected SortedNumericDoubleDocValues() { * * @param target the target * @return the next value - * @throws java.io.IOException + * @throws IOException */ public abstract boolean advanceExact(int target) throws IOException; @@ -46,7 +46,7 @@ protected SortedNumericDoubleDocValues() { * {@link #docValueCount} times for the document. * * @return next value - * @throws java.io.IOException + * @throws IOException */ public abstract double nextValue() throws IOException; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java index 0c50566780d..0749fff4f99 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java @@ -7,6 +7,7 @@ import java.io.IOException; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; /** * A per-document, unordered sequence of text ordinals. @@ -46,6 +47,8 @@ protected TextMultiValues() { */ public abstract long nextOrd() throws IOException; + public abstract BytesRef lookupOrd(long ord) throws IOException; + /** * Returns the number of unique values. * @return number of unique values in this SortedDocValues. This is @@ -77,6 +80,11 @@ public long nextOrd() { throw new UnsupportedOperationException(); } + @Override + public BytesRef lookupOrd(long ord) { + throw new UnsupportedOperationException(); + } + @Override public long getValueCount() { return 0; @@ -113,6 +121,11 @@ public long nextOrd() throws IOException { return values.nextOrd(); } + @Override + public BytesRef lookupOrd(long ord) throws IOException { + return values.lookupOrd( ord ); + } + @Override public long getValueCount() { return values.getValueCount(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java index 2fc3bfc90f6..b75253df044 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java @@ -17,7 +17,7 @@ import org.apache.lucene.util.BytesRef; /** - * A source of {@link org.apache.lucene.index.SortedDocValues} (text doc values) with multiple values per document, + * A source of {@link SortedDocValues} (text doc values) with multiple values per document, * where multiple values are "aggregated" into a single value * according to a given {@link MultiValueMode}. *

@@ -136,15 +136,14 @@ public boolean advanceExact(int parentDoc) throws IOException { if ( parentDoc == lastSeenParentDoc ) { return result; } - lastSeenParentDoc = parentDoc; + if ( !childDocsWithValues.advanceExactParent( parentDoc ) ) { // No child of this parent has a value result = false; return false; } - lastSeenParentDoc = parentDoc; lastEmittedOrd = (int) mode.pick( values, childDocsWithValues ); result = true; return true; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java index 037a74ac3e3..3a233f63c5d 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java @@ -7,7 +7,7 @@ import java.util.Collection; import java.util.function.ToLongFunction; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.hibernate.search.util.common.data.RangeBoundInclusion; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java index a4a601f9488..220ca48d8cc 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java @@ -166,7 +166,7 @@ public List highlight(int doc) throws IOException { private static class MultiValueUnifiedHighlighter extends UnifiedHighlighter { - private MultiValueUnifiedHighlighter(MultiValueUnifiedHighlighter.Builder builder) { + private MultiValueUnifiedHighlighter(Builder builder) { super( builder ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java index 570cc97fb71..61a8ba5850b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java @@ -131,7 +131,6 @@ private static class BasicBuilder extends Builder { this.definition = definition; } - @SuppressWarnings("unchecked") @Override public void factory(ExtendedSearchPredicateFactory factory) { this.factory = factory.withScopeRoot( NonStaticMetamodelScope.class ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java index c0ab5665c16..ac91547cefe 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java @@ -196,7 +196,7 @@ public Builder create(LuceneSearchIndexScope scope, LuceneSearchIndexValueFie } } - public static class Builder extends AbstractLuceneProjection.AbstractBuilder + public static class Builder extends AbstractBuilder implements DistanceToFieldProjectionBuilder { private final LuceneFieldCodec codec; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java index 3cb1e06b5ea..373d86d1d7f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java @@ -86,7 +86,7 @@ public ValueFieldExtractor request(ProjectionRequestContext context) { /** * @param The type of the temporary storage for accumulated values, before and after being transformed. */ - private class ValueFieldExtractor implements LuceneSearchProjection.Extractor { + private class ValueFieldExtractor implements Extractor { private final String contextAbsoluteFieldPath; private final ProjectionCollector collector; @@ -193,7 +193,7 @@ private TypeSelector(LuceneFieldCodec codec, } } - private static class Builder extends AbstractLuceneProjection.AbstractBuilder + private static class Builder extends AbstractBuilder implements FieldProjectionBuilder { private final Function decodeFunction; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java index d3cafdfbc97..35eca043321 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java @@ -20,7 +20,7 @@ public interface LuceneSearchQuery * @param id The id of the entity whose score should be explained. * This is the entity ID, which may be of any type ({@code long}, ...), * not the document ID which is always a string. - * @return An {@link org.apache.lucene.search.Explanation} describing the score computation for the hit. + * @return An {@link Explanation} describing the score computation for the hit. * @throws org.hibernate.search.util.common.SearchException If the query targets multiple mapped types, * or if the explain request fails. */ @@ -35,14 +35,14 @@ public interface LuceneSearchQuery * @param id The id of the entity whose score should be explained. * This is the entity ID, which may be of any type ({@code long}, ...), * not the document ID which is always a string. - * @return An {@link org.apache.lucene.search.Explanation} describing the score computation for the hit. + * @return An {@link Explanation} describing the score computation for the hit. * @throws org.hibernate.search.util.common.SearchException If the given index name does not refer to a mapped name targeted by this query, * or if the explain request fails. */ Explanation explain(String typeName, Object id); /** - * @return The Lucene {@link org.apache.lucene.search.Sort} this query relies on. + * @return The Lucene {@link Sort} this query relies on. */ Sort luceneSort(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 6bcf85e9ac6..2d1fb77dd7b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -42,7 +42,6 @@ import org.hibernate.search.engine.search.sort.SearchSort; import org.hibernate.search.engine.search.timeout.spi.TimeoutManager; -import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; @@ -214,7 +213,7 @@ public LuceneSearchQuery build() { Query filter = scope.filterOrNull( sessionContext.tenantIdentifier() ); if ( filter != null ) { - luceneQueryBuilder.add( filter, BooleanClause.Occur.FILTER ); + luceneQueryBuilder.add( filter, Occur.FILTER ); } Query definitiveLuceneQuery = luceneQueryBuilder.build(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java index c04a9ef22e6..7a9e2adac8b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java @@ -11,7 +11,7 @@ *

* Used by Lucene-specific sort contributors. * - * @see LuceneSearchSort#toSortFields(org.hibernate.search.backend.lucene.search.sort.impl.LuceneSearchSortCollector) + * @see LuceneSearchSort#toSortFields(LuceneSearchSortCollector) */ public interface LuceneSearchSortCollector extends SortRequestContext { diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java similarity index 54% rename from lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java rename to lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index db87cd6bd7d..b80563a010d 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -5,15 +5,12 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; -import java.util.ArrayList; import java.util.Comparator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -23,9 +20,6 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.index.IndexReader; /** @@ -34,16 +28,16 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public abstract class AbstractLuceneFacetsBasedTermsAggregation +public abstract class AbstractLuceneMultivaluedTermsAggregation extends AbstractLuceneBucketAggregation { - private final ProjectionConverter fromFieldValueConverter; + protected final ProjectionConverter fromFieldValueConverter; - private final BucketOrder order; - private final int maxTermCount; - private final int minDocCount; + protected final BucketOrder order; + protected final int maxTermCount; + protected final int minDocCount; - AbstractLuceneFacetsBasedTermsAggregation(AbstractBuilder builder) { + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { super( builder ); this.fromFieldValueConverter = builder.fromFieldValueConverter; this.order = builder.order; @@ -51,103 +45,40 @@ public abstract class AbstractLuceneFacetsBasedTermsAggregation this.minDocCount = builder.minDocCount; } - @Override - public Extractor> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); - - return extractor( context ); - } - protected abstract Extractor> extractor(AggregationRequestContext context); protected abstract class AbstractExtractor implements Extractor> { @Override public final Map extract(AggregationExtractContext context) throws IOException { - FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); - List> buckets = getTopBuckets( context ); - if ( BucketOrder.COUNT_DESC.equals( order ) && ( minDocCount > 0 || buckets.size() >= maxTermCount ) ) { - /* - * Optimization: in this case, minDocCount and sorting can be safely ignored. - * We already have all the buckets we need, and they are already sorted. - */ - return toMap( convertContext, buckets ); - } - - if ( minDocCount <= 0 ) { + if ( minDocCount == 0 && buckets.size() < maxTermCount ) { Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - // If some of the first terms are already in non-zero buckets, ignore them in the next step for ( Bucket bucket : buckets ) { - firstTerms.remove( bucket.term ); + firstTerms.remove( bucket.term() ); } - // Complete the list of buckets with zero-count terms - for ( T term : firstTerms ) { - buckets.add( new Bucket<>( term, 0L ) ); - } - } - - // Sort the list of buckets and trim it if necessary (there may be more buckets than we want in some cases) - buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); - if ( buckets.size() > maxTermCount ) { - buckets.subList( maxTermCount, buckets.size() ).clear(); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0 ) ) ); + buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); } - return toMap( convertContext, buckets ); + return toMap( context.fromDocumentValueConvertContext(), buckets ); } - abstract FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException; - abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException; abstract Comparator getAscendingTermComparator(); - abstract T labelToTerm(String label); - abstract V termToFieldValue(T key); - private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getCollectorResults( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - /* - * TODO HSEARCH-3666 What if the sort order is by term value? - * Lucene returns facets in descending count order. - * If that's what we need, then we can ask Lucene to apply the "maxTermCount" limit directly. - * This is what we do here. - * But if we need a different sort, then having to retrieve the "top N" facets by document count - * becomes clearly sub-optimal: to properly implement this, we would need to retrieve - * *all* facets, and Lucene would allocate an array of Integer.MAX_VALUE elements. - * To improve on this, we would need to re-implement the facet collections. - */ - int limit = maxTermCount; - FacetResult facetResult = getTopChildren( context.getIndexReader(), facetsCollector, nestedDocsProvider, limit ); - - List> buckets = new ArrayList<>(); - - if ( facetResult != null ) { - // Add results for matching documents - for ( LabelAndValue labelAndValue : facetResult.labelValues ) { - long count = (Integer) labelAndValue.value; - if ( count >= minDocCount ) { - buckets.add( new Bucket<>( labelToTerm( labelAndValue.label ), count ) ); - } - } - } - - return buckets; - } + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering for ( Bucket bucket : buckets ) { - V decoded = termToFieldValue( bucket.term ); + V decoded = termToFieldValue( bucket.term() ); K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count ); + result.put( key, bucket.count() ); } return result; } @@ -213,7 +144,7 @@ public void maxTermCount(int maxTermCount) { } @Override - public abstract AbstractLuceneFacetsBasedTermsAggregation build(); + public abstract AbstractLuceneMultivaluedTermsAggregation build(); protected final void order(BucketOrder order) { this.order = order; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index 6c1c27c662a..ce9b0a13ce3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,12 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -final class Bucket { - final F term; - final long count; - - Bucket(F term, long count) { - this.term = term; - this.count = count; - } +public record Bucket(F term, long count) { } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 4767879901c..31233e5ad39 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -6,29 +6,46 @@ import java.util.Comparator; -enum BucketOrder { +public enum BucketOrder { COUNT_ASC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( left.count, right.count ); + int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); return order; }; } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::term ); + } }, COUNT_DESC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( right.count, left.count ); // reversed, because desc + int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); + return order; + }; + } + + @Override + Comparator toLongBucketComparatorInternal() { + return (left, right) -> { + int order = Long.compare( right.count(), left.count() ); // reversed, because desc + if ( order != 0 ) { + return order; + } + order = Long.compare( left.term(), right.term() ); return order; }; } @@ -36,7 +53,12 @@ Comparator> toBucketComparatorInternal(Comparator termComparato TERM_ASC { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::term ); } }, TERM_DESC { @@ -47,17 +69,28 @@ boolean isTermOrderDescending() { @Override Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::term ).reversed(); } }; - Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } + public Comparator toLongBucketComparator() { + return toLongBucketComparatorInternal(); + } + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + abstract Comparator toLongBucketComparatorInternal(); + boolean isTermOrderDescending() { return false; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java new file mode 100644 index 00000000000..4d980d2c5ab --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +public record LongBucket(long term, long count) { +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 513a8fb379c..4d639ecf19c 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -115,7 +115,7 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } public static class Builder - extends AbstractLuceneBucketAggregation.AbstractBuilder, Long> + extends AbstractBuilder, Long> implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index e32d2da2d29..5ec21a7054f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -5,13 +5,21 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -22,14 +30,12 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.PriorityQueue; /** * @param The type of field values. @@ -37,12 +43,13 @@ * or a different type if value converters are used. */ public class LuceneNumericTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; private final Comparator termComparator; private final Function decoder; + private CollectorKey collectorKey; private LuceneNumericTermsAggregation(Builder builder) { super( builder ); @@ -51,6 +58,19 @@ private LuceneNumericTermsAggregation(Builder builder) { this.decoder = builder.decoder; } + @Override + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + var termsCollectorFactory = NumericTermsCollectorFactory.instance( source ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return extractor( context ); + } + @Override protected Extractor> extractor(AggregationRequestContext context) { return new LuceneNumericTermsAggregationExtractor(); @@ -73,20 +93,33 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco } private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - Facets facetCounts = numericDomain.createTermsFacetCounts( - absoluteFieldPath, facetsCollector, nestedDocsProvider - ); - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + Comparator getAscendingTermComparator() { + return termComparator; + } + + @Override + V termToFieldValue(E key) { + return decoder.apply( key ); + } + + @Override + List> getTopBuckets(AggregationExtractContext context) { + var termsCollector = context.getCollectorResults( collectorKey ); + + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.term() ), bucket.count() ) ); + } + return buckets; } @Override - SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit) + Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { - TreeSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); + SortedSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); for ( LeafReaderContext leaf : reader.leaves() ) { final LeafReader atomicReader = leaf.reader(); SortedNumericDocValues docValues = atomicReader.getSortedNumericDocValues( absoluteFieldPath ); @@ -107,20 +140,6 @@ SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit return collectedTerms; } - @Override - Comparator getAscendingTermComparator() { - return termComparator; - } - - @Override - E labelToTerm(String termAsString) { - return numericDomain.sortedDocValueToTerm( Long.parseLong( termAsString ) ); - } - - @Override - V termToFieldValue(E term) { - return decoder.apply( term ); - } } private static class TypeSelector extends AbstractTypeSelector { @@ -171,4 +190,17 @@ public LuceneNumericTermsAggregation build() { } } + private static class HibernateSearchTermsQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchTermsQueue(Comparator comparator, int maxSize) { + super( maxSize ); + this.comparator = comparator; + } + + @Override + protected boolean lessThan(E t1, E t2) { + return comparator.compare( t1, t2 ) > 0; + } + } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index f8428ade788..bf20eff0559 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -5,14 +5,19 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.TextMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -21,11 +26,10 @@ import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; /** @@ -33,33 +37,35 @@ * or a different type if value converters are used. */ public class LuceneTextTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); + private CollectorKey collectorKey; + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } + @Override + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( + absoluteFieldPath, nestedDocsProvider + ); + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return extractor( context ); + } + @Override protected Extractor> extractor(AggregationRequestContext context) { return new LuceneTextTermsAggregationExtractor(); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { - @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - JoiningTextMultiValuesSource valueSource = JoiningTextMultiValuesSource.fromField( - absoluteFieldPath, nestedDocsProvider - ); - TextMultiValueFacetCounts facetCounts = new TextMultiValueFacetCounts( - reader, absoluteFieldPath, valueSource, facetsCollector - ); - - return facetCounts.getTopChildren( limit, absoluteFieldPath ); - } @Override Set collectFirstTerms(IndexReader reader, boolean descending, int limit) @@ -95,13 +101,22 @@ Comparator getAscendingTermComparator() { } @Override - String labelToTerm(String label) { - return label; + String termToFieldValue(String key) { + return key; } @Override - String termToFieldValue(String key) { - return key; + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termsCollector = context.getCollectorResults( collectorKey ); + + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + + var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + buckets.add( new Bucket<>( dv.lookupOrd( bucket.term() ).utf8ToString(), bucket.count() ) ); + } + return buckets; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java new file mode 100644 index 00000000000..a5eecfc8d27 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -0,0 +1,8 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.lowlevel.impl; + +public record EffectiveRange(long min, long max) { +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index 12fc6822048..cd1fa6e3274 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -15,7 +15,6 @@ import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index e16beb2f696..141ecb237e7 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -15,7 +15,6 @@ import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index feb6d9de5f8..3ce7e9e0def 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -15,7 +15,6 @@ import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index d5430ed50e3..409356559c0 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -15,7 +15,6 @@ import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index f9ba185e9bb..20c08cf7c3d 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -10,7 +10,6 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.types.aggregation.impl.EffectiveRange; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.facet.Facets; From 6703b18a086026a82986408b574857e3f2172a40 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 27 Jun 2025 12:09:58 +0200 Subject: [PATCH 04/23] HSEARCH-3661 Remove more code dependant on Lucene facets and lucene-facet dependency overall --- backend/lucene/pom.xml | 4 - .../impl/FacetsCollectorFactory.java | 25 -- .../lowlevel/facet/impl/FacetCountsUtils.java | 69 ----- .../facet/impl/LongMultiValueFacetCounts.java | 194 ------------ .../impl/LongMultiValueRangeCounter.java | 293 ------------------ .../impl/MultiValueRangeFacetCounts.java | 83 ----- .../facet/impl/TextMultiValueFacetCounts.java | 258 --------------- .../types/lowlevel/impl/EffectiveRange.java | 56 ++++ .../lowlevel/impl/LuceneDoubleDomain.java | 21 +- .../lowlevel/impl/LuceneFloatDomain.java | 21 +- .../lowlevel/impl/LuceneIntegerDomain.java | 21 +- .../types/lowlevel/impl/LuceneLongDomain.java | 21 +- .../lowlevel/impl/LuceneNumericDomain.java | 7 - build/parents/build/pom.xml | 5 - lucene-next/backend/lucene/pom.xml | 4 - .../impl/FacetsCollectorFactory.java | 25 -- .../lowlevel/facet/impl/FacetCountsUtils.java | 69 ----- .../facet/impl/LongMultiValueFacetCounts.java | 194 ------------ .../impl/LongMultiValueRangeCounter.java | 293 ------------------ .../impl/MultiValueRangeFacetCounts.java | 83 ----- .../facet/impl/TextMultiValueFacetCounts.java | 275 ---------------- .../aggregation/impl/EffectiveRange.java | 8 - .../types/lowlevel/impl/EffectiveRange.java | 56 ++++ .../lowlevel/impl/LuceneDoubleDomain.java | 21 +- .../lowlevel/impl/LuceneFloatDomain.java | 21 +- .../lowlevel/impl/LuceneIntegerDomain.java | 21 +- .../types/lowlevel/impl/LuceneLongDomain.java | 21 +- .../lowlevel/impl/LuceneNumericDomain.java | 7 - .../build/parents/integrationtest/pom.xml | 5 - lucene-next/build/parents/internal/pom.xml | 5 - lucene-next/build/parents/public/pom.xml | 5 - lucene-next/build/parents/springtest/pom.xml | 5 - 32 files changed, 120 insertions(+), 2076 deletions(-) delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java delete mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java delete mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java diff --git a/backend/lucene/pom.xml b/backend/lucene/pom.xml index 1be33671f77..07551706e0b 100644 --- a/backend/lucene/pom.xml +++ b/backend/lucene/pom.xml @@ -42,10 +42,6 @@ org.apache.lucene lucene-join - - org.apache.lucene - lucene-facet - com.carrotsearch hppc diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java deleted file mode 100644 index ec3bf698880..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.collector.impl; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollectorManager; - -public class FacetsCollectorFactory implements CollectorFactory { - public static final CollectorKey KEY = CollectorKey.create(); - - public static final CollectorFactory INSTANCE = - new FacetsCollectorFactory(); - - @Override - public FacetsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new FacetsCollectorManager(); - } - - @Override - public CollectorKey getCollectorKey() { - return KEY; - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java deleted file mode 100644 index 3a233f63c5d..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.Collection; -import java.util.function.ToLongFunction; - -import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; -import org.hibernate.search.util.common.data.Range; -import org.hibernate.search.util.common.data.RangeBoundInclusion; - -public class FacetCountsUtils { - - private FacetCountsUtils() { - } - - public static EffectiveRange[] createEffectiveRangesForIntegralValues( - Collection> ranges) { - return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); - } - - public static EffectiveRange[] createEffectiveRangesForIntegralValues( - Collection> ranges, - ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); - } - - private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, - ToLongFunction encoder, - T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; - int i = 0; - for ( Range range : ranges ) { - final T lowerBoundValue = range.lowerBoundValue().orElse( null ); - final T upperBoundValue = range.upperBoundValue().orElse( null ); - - - long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); - long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); - - // The lower bound is included if it is explicitly included - // ... or if it is infinity but infinity cannot be represented - // so if it's none of the above we exclude the boundary by ++ it. - if ( - RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) - && ( extremaAreInfinity || lowerBoundValue != null ) ) { - ++min; - } - - // The upper bound is included if it is explicitly included - // ... or if it is infinity but infinity cannot be represented - // so if it's none of the above we exclude the boundary by -- it. - if ( - RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) - && ( extremaAreInfinity || upperBoundValue != null ) ) { - --max; - } - - effectiveRanges[i] = new EffectiveRange( - min, - max - ); - ++i; - } - return effectiveRanges; - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java deleted file mode 100644 index f1662ffdb12..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.LongValueFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueFacetCounts extends Facets { - - private final int[] counts = new int[1024]; - - private final LongIntMap hashCounts = new LongIntHashMap(); - - private final String field; - - private int totCount; - - public LongMultiValueFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits) throws IOException { - this.field = field; - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongHashSet uniqueValuesForDocument = new LongHashSet(); - LongProcedure incrementCountForDocumentId = this::increment; - - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context ); - - DocIdSetIterator docs = hits.bits.iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( fv.advanceExact( doc ) ) { - totCount++; - while ( fv.hasNextValue() ) { - // Each document must be counted only once per value. - uniqueValuesForDocument.add( fv.nextValue() ); - } - - uniqueValuesForDocument.forEach( incrementCountForDocumentId ); - uniqueValuesForDocument.clear(); - } - } - } - } - - private void increment(long value) { - if ( value >= 0 && value < counts.length ) { - counts[(int) value]++; - } - else { - hashCounts.addTo( value, 1 ); - } - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private static class Entry { - int count; - long value; - } - - public FacetResult getTopChildrenSortByCount(int topN) { - PriorityQueue pq = new PriorityQueue( Math.min( topN, counts.length + hashCounts.size() ) ) { - @Override - protected boolean lessThan(Entry a, Entry b) { - // sort by count descending, breaking ties by value ascending: - return a.count < b.count || ( a.count == b.count && a.value > b.value ); - } - }; - - int childCount = 0; - Entry e = null; - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - childCount++; - if ( e == null ) { - e = new Entry(); - } - e.value = i; - e.count = counts[i]; - e = pq.insertWithOverflow( e ); - } - } - - if ( hashCounts.size() != 0 ) { - childCount += hashCounts.size(); - for ( LongIntCursor c : hashCounts ) { - int count = c.value; - if ( count != 0 ) { - e = insertEntry( pq, e, c, count ); - } - } - } - - LabelAndValue[] results = new LabelAndValue[pq.size()]; - while ( pq.size() != 0 ) { - Entry entry = pq.pop(); - results[pq.size()] = new LabelAndValue( Long.toString( entry.value ), entry.count ); - } - - return new FacetResult( field, new String[0], totCount, results, childCount ); - } - - private Entry insertEntry(PriorityQueue pq, - Entry e, LongIntCursor c, int count) { - if ( e == null ) { - e = new Entry(); - } - e.value = c.key; - e.count = count; - e = pq.insertWithOverflow( e ); - return e; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "LongValueFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - b.append( " " ); - b.append( i ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - } - - if ( hashCounts.size() != 0 ) { - for ( LongIntCursor c : hashCounts ) { - if ( c.value != 0 ) { - b.append( " " ); - b.append( c.key ); - b.append( " -> count=" ); - b.append( c.value ); - b.append( '\n' ); - } - } - } - - return b.toString(); - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java deleted file mode 100644 index b5f8673b46f..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.LongArrayList; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.IntCursor; - -import org.apache.lucene.facet.range.LongRange; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeCounter} - * of Apache Lucene project. - */ -class LongMultiValueRangeCounter { - - final LongRangeNode root; - final long[] boundaries; - final int[] leafCounts; - - // Used during rollup - private int leafUpto; - private int missingCount; - - public LongMultiValueRangeCounter(LongRange[] ranges) { - // Maps all range inclusive endpoints to int flags; 1 - // = start of interval, 2 = end of interval. We need to - // track the start vs end case separately because if a - // given point is both, then it must be its own - // elementary interval: - LongIntMap endsMap = new LongIntHashMap(); - - endsMap.put( Long.MIN_VALUE, 1 ); - endsMap.put( Long.MAX_VALUE, 2 ); - - for ( LongRange range : ranges ) { - int cur = endsMap.get( range.min ); - if ( cur == 0 ) { - endsMap.put( range.min, 1 ); - } - else { - endsMap.put( range.min, cur | 1 ); - } - cur = endsMap.get( range.max ); - if ( cur == 0 ) { - endsMap.put( range.max, 2 ); - } - else { - endsMap.put( range.max, cur | 2 ); - } - } - - LongArrayList endsList = new LongArrayList( endsMap.keys() ); - Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); - - // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); - int upto0 = 1; - long v = endsList.get( 0 ); - long prev; - if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else { - prev = v; - } - - while ( upto0 < endsList.size() ) { - v = endsList.get( upto0 ); - int flags = endsMap.get( v ); - if ( flags == 3 ) { - // This point is both an end and a start; we need to - // separate it: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else if ( flags == 1 ) { - // This point is only the start of an interval; - // attach it to next interval: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - prev = v; - } - else { - assert flags == 2; - // This point is only the end of an interval; attach - // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); - prev = v + 1; - } - upto0++; - } - - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - - // Set boundaries (ends of each elementary interval): - boundaries = new long[elementaryIntervals.size()]; - for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; - } - - leafCounts = new int[boundaries.length]; - } - - public void incrementCountForLeafWithIndex(int index) { - leafCounts[index]++; - } - - public int findLeafIndex(long v) { - // Binary search to find matched elementary range; we - // are guaranteed to find a match because the last - // boundary is Long.MAX_VALUE: - int lo = 0; - int hi = boundaries.length - 1; - while ( true ) { - int mid = ( lo + hi ) >>> 1; - if ( v <= boundaries[mid] ) { - if ( mid == 0 ) { - return 0; - } - else { - hi = mid - 1; - } - } - else if ( v > boundaries[mid + 1] ) { - lo = mid + 1; - } - else { - return mid + 1; - } - } - } - - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - public int fillCounts(int[] counts) { - missingCount = 0; - leafUpto = 0; - rollup( root, counts, false ); - return missingCount; - } - - private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { - int count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = leafCounts[leafUpto]; - leafUpto++; - if ( !sawOutputs ) { - // This is a missing count (no output ranges were - // seen "above" us): - missingCount += count; - } - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private static final class InclusiveRange { - public final long start; - public final long end; - - public InclusiveRange(long start, long end) { - assert end >= start; - this.start = start; - this.end = end; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - public static final class LongRangeNode { - final LongRangeNode left; - final LongRangeNode right; - - // Our range, inclusive: - final long start; - final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, LongRange range) { - if ( start >= range.min && end <= range.max ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java deleted file mode 100644 index 43cf7269294..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.range.Range; -import org.apache.lucene.search.Query; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.RangeFacetCounts} - * of Apache Lucene project. - */ -public class MultiValueRangeFacetCounts extends Facets { - - protected final Range[] ranges; - protected final int[] counts; - protected final Query fastMatchQuery; - protected final String field; - protected int totCount; - - protected MultiValueRangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) { - this.field = field; - this.ranges = ranges; - this.fastMatchQuery = fastMatchQuery; - counts = new int[ranges.length]; - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for ( int i = 0; i < counts.length; i++ ) { - labelValues[i] = new LabelAndValue( ranges[i].label, counts[i] ); - } - return new FacetResult( dim, path, totCount, labelValues, labelValues.length ); - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "MultiValueRangeFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < ranges.length; i++ ) { - b.append( " " ); - b.append( ranges[i].label ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - return b.toString(); - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java deleted file mode 100644 index ee9d41c575e..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java +++ /dev/null @@ -1,258 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.TopOrdAndIntQueue; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LongValues; - -/** - * Copied with some changes from {@code org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts} - * of Apache Lucene project. - */ -public class TextMultiValueFacetCounts extends Facets { - - final SortedSetDocValues dv; - final String field; - final int ordCount; - final int[] counts; - - public TextMultiValueFacetCounts(IndexReader reader, String field, TextMultiValuesSource valuesSource, FacetsCollector hits) - throws IOException { - this.field = field; - dv = MultiDocValues.getSortedSetValues( reader, field ); - if ( dv != null && dv.getValueCount() > Integer.MAX_VALUE ) { - // We may want to remove this limitation? - // Note that DefaultSortedSetDocValuesReaderState has the same limitation, - // so this is no worse than the "legacy" facets from Search 5. - throw new IllegalStateException( - "Cannot aggregate when more than " + Integer.MAX_VALUE + " terms are indexed" ); - } - ordCount = dv == null ? 0 : (int) dv.getValueCount(); - counts = new int[ordCount]; - count( reader, valuesSource, hits.getMatchingDocs() ); - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - if ( topN <= 0 ) { - throw new IllegalArgumentException( "topN must be > 0 (got: " + topN + ")" ); - } - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private FacetResult getTopChildrenSortByCount(int topN) throws IOException { - if ( topN > ordCount ) { - // HSEARCH-4544 Avoid OutOfMemoryError when passing crazy high topN values - // We know there will never be more than "ordCount" values anyway. - topN = ordCount; - } - - TopOrdAndIntQueue q = null; - - int bottomCount = 0; - - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndValue reuse = null; - - for ( int ord = 0; ord < ordCount; ord++ ) { - if ( counts[ord] > 0 ) { - totCount += counts[ord]; - childCount++; - if ( counts[ord] > bottomCount ) { - if ( reuse == null ) { - reuse = new TopOrdAndIntQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - if ( q == null ) { - // Lazy init, so we don't create this for the - // sparse case unnecessarily - q = new TopOrdAndIntQueue( topN ); - } - reuse = q.insertWithOverflow( reuse ); - if ( q.size() == topN ) { - bottomCount = q.top().value; - } - } - } - } - - if ( q == null ) { - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for ( int i = labelValues.length - 1; i >= 0; i-- ) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - final BytesRef term = dv.lookupOrd( ordAndValue.ord ); - labelValues[i] = new LabelAndValue( term.utf8ToString(), ordAndValue.value ); - } - - return new FacetResult( field, new String[0], totCount, labelValues, childCount ); - } - - private void countOneSegment(OrdinalMap ordinalMap, TextMultiValues segValues, int segOrd, MatchingDocs hits) - throws IOException { - if ( segValues == null ) { - // nothing to count - return; - } - IntHashSet uniqueOrdinalsForDocument = new IntHashSet(); - - DocIdSetIterator docs = hits.bits.iterator(); - - // TODO: yet another option is to count all segs - // first, only in seg-ord space, and then do a - // merge-sort-PQ in the end to only "resolve to - // global" those seg ords that can compete, if we know - // we just want top K? ie, this is the same algo - // that'd be used for merging facets across shards - // (distributed faceting). but this has much higher - // temp ram req'ts (sum of number of ords across all - // segs) - if ( ordinalMap != null ) { - final LongValues ordMap = ordinalMap.getGlobalOrds( segOrd ); - - int numSegOrds = (int) segValues.getValueCount(); - - if ( hits.totalHits < numSegOrds / 10 ) { - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - // Remap every ord to global ord as we iterate: - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - int globalOrd = (int) ordMap.get( term ); - uniqueOrdinalsForDocument.add( globalOrd ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - else { - // First count in seg-ord space: - final int[] segCounts = new int[numSegOrds]; - IntProcedure incrementCountForOrdinal = ord -> segCounts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - - // Then, migrate to global ords: - for ( int ord = 0; ord < numSegOrds; ord++ ) { - int count = segCounts[ord]; - if ( count != 0 ) { - counts[(int) ordMap.get( ord )] += count; - } - } - } - } - else { - // No ord mapping (e.g., single segment index): - // just aggregate directly into counts. - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - } - - /** - * Does all the "real work" of tallying up the counts. - */ - private void count(IndexReader reader, TextMultiValuesSource valuesSource, List matchingDocs) - throws IOException { - OrdinalMap ordinalMap; - - // TODO: is this right? really, we need a way to - // verify that this ordinalMap "matches" the leaves in - // matchingDocs... - if ( dv instanceof MultiSortedSetDocValues && matchingDocs.size() > 1 ) { - ordinalMap = ( (MultiSortedSetDocValues) dv ).mapping; - } - else { - ordinalMap = null; - } - - for ( MatchingDocs hits : matchingDocs ) { - - // LUCENE-5090: make sure the provided reader context "matches" - // the top-level reader passed to the - // SortedSetDocValuesReaderState, else cryptic - // AIOOBE can happen: - if ( ReaderUtil.getTopLevelContext( hits.context ).reader() != reader ) { - throw new IllegalStateException( - "the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader" ); - } - - countOneSegment( ordinalMap, valuesSource.getValues( hits.context ), hits.context.ord, hits ); - } - } - - @Override - public Number getSpecificValue(String dim, String... path) { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java index a5eecfc8d27..f04c116c3d5 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -4,5 +4,61 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; +import java.util.Collection; +import java.util.function.ToLongFunction; + +import org.hibernate.search.util.common.data.Range; +import org.hibernate.search.util.common.data.RangeBoundInclusion; + public record EffectiveRange(long min, long max) { + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + } + + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, + ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); + } + + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, + ToLongFunction encoder, + T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; + int i = 0; + for ( Range range : ranges ) { + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max + ); + ++i; + } + return effectiveRanges; + } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index cd1fa6e3274..12f319907b2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -4,23 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.DoubleValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public Double doubleToTerm(double doubleValue) { return doubleValue; } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 141ecb237e7..333f1799939 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.FloatValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -92,22 +86,9 @@ public Float doubleToTerm(double doubleValue) { return NumberUtils.toFloat( doubleValue ); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index 3ce7e9e0def..e57cb0f0ae0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.IntValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public double sortedDocValueToDouble(long longValue) { return sortedDocValueToTerm( longValue ).doubleValue(); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index 409356559c0..6aab8e3a5d5 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.LongValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public Long doubleToTerm(double doubleValue) { return NumberUtils.toLong( doubleValue ); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 20c08cf7c3d..a25ade3387a 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -4,7 +4,6 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -12,8 +11,6 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -43,10 +40,6 @@ public interface LuceneNumericDomain { E doubleToTerm(double doubleValue); - Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException; - EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); diff --git a/build/parents/build/pom.xml b/build/parents/build/pom.xml index 39bc988ac80..7aeb35bb7b1 100644 --- a/build/parents/build/pom.xml +++ b/build/parents/build/pom.xml @@ -448,11 +448,6 @@ lucene-join ${version.org.apache.lucene} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/backend/lucene/pom.xml b/lucene-next/backend/lucene/pom.xml index 04ab0dc105f..048b6e89136 100644 --- a/lucene-next/backend/lucene/pom.xml +++ b/lucene-next/backend/lucene/pom.xml @@ -44,10 +44,6 @@ org.apache.lucene lucene-join - - org.apache.lucene - lucene-facet - com.carrotsearch hppc diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java deleted file mode 100644 index ec3bf698880..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.collector.impl; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollectorManager; - -public class FacetsCollectorFactory implements CollectorFactory { - public static final CollectorKey KEY = CollectorKey.create(); - - public static final CollectorFactory INSTANCE = - new FacetsCollectorFactory(); - - @Override - public FacetsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new FacetsCollectorManager(); - } - - @Override - public CollectorKey getCollectorKey() { - return KEY; - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java deleted file mode 100644 index 3a233f63c5d..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.Collection; -import java.util.function.ToLongFunction; - -import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; -import org.hibernate.search.util.common.data.Range; -import org.hibernate.search.util.common.data.RangeBoundInclusion; - -public class FacetCountsUtils { - - private FacetCountsUtils() { - } - - public static EffectiveRange[] createEffectiveRangesForIntegralValues( - Collection> ranges) { - return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); - } - - public static EffectiveRange[] createEffectiveRangesForIntegralValues( - Collection> ranges, - ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); - } - - private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, - ToLongFunction encoder, - T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; - int i = 0; - for ( Range range : ranges ) { - final T lowerBoundValue = range.lowerBoundValue().orElse( null ); - final T upperBoundValue = range.upperBoundValue().orElse( null ); - - - long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); - long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); - - // The lower bound is included if it is explicitly included - // ... or if it is infinity but infinity cannot be represented - // so if it's none of the above we exclude the boundary by ++ it. - if ( - RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) - && ( extremaAreInfinity || lowerBoundValue != null ) ) { - ++min; - } - - // The upper bound is included if it is explicitly included - // ... or if it is infinity but infinity cannot be represented - // so if it's none of the above we exclude the boundary by -- it. - if ( - RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) - && ( extremaAreInfinity || upperBoundValue != null ) ) { - --max; - } - - effectiveRanges[i] = new EffectiveRange( - min, - max - ); - ++i; - } - return effectiveRanges; - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java deleted file mode 100644 index f6b1ca94c25..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.LongValueFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueFacetCounts extends Facets { - - private final int[] counts = new int[1024]; - - private final LongIntMap hashCounts = new LongIntHashMap(); - - private final String field; - - private int totCount; - - public LongMultiValueFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits) throws IOException { - this.field = field; - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongHashSet uniqueValuesForDocument = new LongHashSet(); - LongProcedure incrementCountForDocumentId = this::increment; - - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context() ); - - DocIdSetIterator docs = hits.bits().iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( fv.advanceExact( doc ) ) { - totCount++; - while ( fv.hasNextValue() ) { - // Each document must be counted only once per value. - uniqueValuesForDocument.add( fv.nextValue() ); - } - - uniqueValuesForDocument.forEach( incrementCountForDocumentId ); - uniqueValuesForDocument.clear(); - } - } - } - } - - private void increment(long value) { - if ( value >= 0 && value < counts.length ) { - counts[(int) value]++; - } - else { - hashCounts.addTo( value, 1 ); - } - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private static class Entry { - int count; - long value; - } - - public FacetResult getTopChildrenSortByCount(int topN) { - PriorityQueue pq = new PriorityQueue( Math.min( topN, counts.length + hashCounts.size() ) ) { - @Override - protected boolean lessThan(Entry a, Entry b) { - // sort by count descending, breaking ties by value ascending: - return a.count < b.count || ( a.count == b.count && a.value > b.value ); - } - }; - - int childCount = 0; - Entry e = null; - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - childCount++; - if ( e == null ) { - e = new Entry(); - } - e.value = i; - e.count = counts[i]; - e = pq.insertWithOverflow( e ); - } - } - - if ( hashCounts.size() != 0 ) { - childCount += hashCounts.size(); - for ( LongIntCursor c : hashCounts ) { - int count = c.value; - if ( count != 0 ) { - e = insertEntry( pq, e, c, count ); - } - } - } - - LabelAndValue[] results = new LabelAndValue[pq.size()]; - while ( pq.size() != 0 ) { - Entry entry = pq.pop(); - results[pq.size()] = new LabelAndValue( Long.toString( entry.value ), entry.count ); - } - - return new FacetResult( field, new String[0], totCount, results, childCount ); - } - - private Entry insertEntry(PriorityQueue pq, - Entry e, LongIntCursor c, int count) { - if ( e == null ) { - e = new Entry(); - } - e.value = c.key; - e.count = count; - e = pq.insertWithOverflow( e ); - return e; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "LongValueFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - b.append( " " ); - b.append( i ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - } - - if ( hashCounts.size() != 0 ) { - for ( LongIntCursor c : hashCounts ) { - if ( c.value != 0 ) { - b.append( " " ); - b.append( c.key ); - b.append( " -> count=" ); - b.append( c.value ); - b.append( '\n' ); - } - } - } - - return b.toString(); - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java deleted file mode 100644 index b5f8673b46f..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.LongArrayList; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.IntCursor; - -import org.apache.lucene.facet.range.LongRange; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeCounter} - * of Apache Lucene project. - */ -class LongMultiValueRangeCounter { - - final LongRangeNode root; - final long[] boundaries; - final int[] leafCounts; - - // Used during rollup - private int leafUpto; - private int missingCount; - - public LongMultiValueRangeCounter(LongRange[] ranges) { - // Maps all range inclusive endpoints to int flags; 1 - // = start of interval, 2 = end of interval. We need to - // track the start vs end case separately because if a - // given point is both, then it must be its own - // elementary interval: - LongIntMap endsMap = new LongIntHashMap(); - - endsMap.put( Long.MIN_VALUE, 1 ); - endsMap.put( Long.MAX_VALUE, 2 ); - - for ( LongRange range : ranges ) { - int cur = endsMap.get( range.min ); - if ( cur == 0 ) { - endsMap.put( range.min, 1 ); - } - else { - endsMap.put( range.min, cur | 1 ); - } - cur = endsMap.get( range.max ); - if ( cur == 0 ) { - endsMap.put( range.max, 2 ); - } - else { - endsMap.put( range.max, cur | 2 ); - } - } - - LongArrayList endsList = new LongArrayList( endsMap.keys() ); - Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); - - // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); - int upto0 = 1; - long v = endsList.get( 0 ); - long prev; - if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else { - prev = v; - } - - while ( upto0 < endsList.size() ) { - v = endsList.get( upto0 ); - int flags = endsMap.get( v ); - if ( flags == 3 ) { - // This point is both an end and a start; we need to - // separate it: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else if ( flags == 1 ) { - // This point is only the start of an interval; - // attach it to next interval: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - prev = v; - } - else { - assert flags == 2; - // This point is only the end of an interval; attach - // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); - prev = v + 1; - } - upto0++; - } - - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - - // Set boundaries (ends of each elementary interval): - boundaries = new long[elementaryIntervals.size()]; - for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; - } - - leafCounts = new int[boundaries.length]; - } - - public void incrementCountForLeafWithIndex(int index) { - leafCounts[index]++; - } - - public int findLeafIndex(long v) { - // Binary search to find matched elementary range; we - // are guaranteed to find a match because the last - // boundary is Long.MAX_VALUE: - int lo = 0; - int hi = boundaries.length - 1; - while ( true ) { - int mid = ( lo + hi ) >>> 1; - if ( v <= boundaries[mid] ) { - if ( mid == 0 ) { - return 0; - } - else { - hi = mid - 1; - } - } - else if ( v > boundaries[mid + 1] ) { - lo = mid + 1; - } - else { - return mid + 1; - } - } - } - - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - public int fillCounts(int[] counts) { - missingCount = 0; - leafUpto = 0; - rollup( root, counts, false ); - return missingCount; - } - - private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { - int count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = leafCounts[leafUpto]; - leafUpto++; - if ( !sawOutputs ) { - // This is a missing count (no output ranges were - // seen "above" us): - missingCount += count; - } - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private static final class InclusiveRange { - public final long start; - public final long end; - - public InclusiveRange(long start, long end) { - assert end >= start; - this.start = start; - this.end = end; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - public static final class LongRangeNode { - final LongRangeNode left; - final LongRangeNode right; - - // Our range, inclusive: - final long start; - final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, LongRange range) { - if ( start >= range.min && end <= range.max ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java deleted file mode 100644 index 43cf7269294..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.range.Range; -import org.apache.lucene.search.Query; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.RangeFacetCounts} - * of Apache Lucene project. - */ -public class MultiValueRangeFacetCounts extends Facets { - - protected final Range[] ranges; - protected final int[] counts; - protected final Query fastMatchQuery; - protected final String field; - protected int totCount; - - protected MultiValueRangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) { - this.field = field; - this.ranges = ranges; - this.fastMatchQuery = fastMatchQuery; - counts = new int[ranges.length]; - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for ( int i = 0; i < counts.length; i++ ) { - labelValues[i] = new LabelAndValue( ranges[i].label, counts[i] ); - } - return new FacetResult( dim, path, totCount, labelValues, labelValues.length ); - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "MultiValueRangeFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < ranges.length; i++ ) { - b.append( " " ); - b.append( ranges[i].label ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - return b.toString(); - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java deleted file mode 100644 index 89db37ffcd3..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.TopOrdAndIntQueue; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LongValues; -import org.apache.lucene.util.PriorityQueue; - -/** - * Copied with some changes from {@code org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts} - * of Apache Lucene project. - */ -public class TextMultiValueFacetCounts extends Facets { - - final SortedSetDocValues dv; - final String field; - final int ordCount; - final int[] counts; - - public TextMultiValueFacetCounts(IndexReader reader, String field, TextMultiValuesSource valuesSource, FacetsCollector hits) - throws IOException { - this.field = field; - dv = MultiDocValues.getSortedSetValues( reader, field ); - if ( dv != null && dv.getValueCount() > Integer.MAX_VALUE ) { - // We may want to remove this limitation? - // Note that DefaultSortedSetDocValuesReaderState has the same limitation, - // so this is no worse than the "legacy" facets from Search 5. - throw new IllegalStateException( - "Cannot aggregate when more than " + Integer.MAX_VALUE + " terms are indexed" ); - } - ordCount = dv == null ? 0 : (int) dv.getValueCount(); - counts = new int[ordCount]; - count( reader, valuesSource, hits.getMatchingDocs() ); - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - if ( topN <= 0 ) { - throw new IllegalArgumentException( "topN must be > 0 (got: " + topN + ")" ); - } - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private FacetResult getTopChildrenSortByCount(int topN) throws IOException { - if ( topN > ordCount ) { - // HSEARCH-4544 Avoid OutOfMemoryError when passing crazy high topN values - // We know there will never be more than "ordCount" values anyway. - topN = ordCount; - } - - HibernateSearchTopOrdAndIntQueue q = null; - - int bottomCount = 0; - - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndInt reuse = null; - - for ( int ord = 0; ord < ordCount; ord++ ) { - if ( counts[ord] > 0 ) { - totCount += counts[ord]; - childCount++; - if ( counts[ord] > bottomCount ) { - if ( reuse == null ) { - reuse = new TopOrdAndIntQueue.OrdAndInt(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - if ( q == null ) { - // Lazy init, so we don't create this for the - // sparse case unnecessarily - q = new HibernateSearchTopOrdAndIntQueue( topN ); - } - reuse = q.insertWithOverflow( reuse ); - if ( q.size() == topN ) { - bottomCount = ( q.top() ).value; - } - } - } - } - - if ( q == null ) { - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for ( int i = labelValues.length - 1; i >= 0; i-- ) { - TopOrdAndIntQueue.OrdAndInt ordAndValue = q.pop(); - final BytesRef term = dv.lookupOrd( ordAndValue.ord ); - labelValues[i] = new LabelAndValue( term.utf8ToString(), ordAndValue.value ); - } - - return new FacetResult( field, new String[0], totCount, labelValues, childCount ); - } - - private void countOneSegment(OrdinalMap ordinalMap, TextMultiValues segValues, int segOrd, MatchingDocs hits) - throws IOException { - if ( segValues == null ) { - // nothing to count - return; - } - IntHashSet uniqueOrdinalsForDocument = new IntHashSet(); - - DocIdSetIterator docs = hits.bits().iterator(); - - // TODO: yet another option is to count all segs - // first, only in seg-ord space, and then do a - // merge-sort-PQ in the end to only "resolve to - // global" those seg ords that can compete, if we know - // we just want top K? ie, this is the same algo - // that'd be used for merging facets across shards - // (distributed faceting). but this has much higher - // temp ram req'ts (sum of number of ords across all - // segs) - if ( ordinalMap != null ) { - final LongValues ordMap = ordinalMap.getGlobalOrds( segOrd ); - - int numSegOrds = (int) segValues.getValueCount(); - - if ( hits.totalHits() < numSegOrds / 10 ) { - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - // Remap every ord to global ord as we iterate: - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - int globalOrd = (int) ordMap.get( term ); - uniqueOrdinalsForDocument.add( globalOrd ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - else { - // First count in seg-ord space: - final int[] segCounts = new int[numSegOrds]; - IntProcedure incrementCountForOrdinal = ord -> segCounts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - - // Then, migrate to global ords: - for ( int ord = 0; ord < numSegOrds; ord++ ) { - int count = segCounts[ord]; - if ( count != 0 ) { - counts[(int) ordMap.get( ord )] += count; - } - } - } - } - else { - // No ord mapping (e.g., single segment index): - // just aggregate directly into counts. - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - } - - /** - * Does all the "real work" of tallying up the counts. - */ - private void count(IndexReader reader, TextMultiValuesSource valuesSource, List matchingDocs) - throws IOException { - OrdinalMap ordinalMap; - - // TODO: is this right? really, we need a way to - // verify that this ordinalMap "matches" the leaves in - // matchingDocs... - if ( dv instanceof MultiSortedSetDocValues && matchingDocs.size() > 1 ) { - ordinalMap = ( (MultiSortedSetDocValues) dv ).mapping; - } - else { - ordinalMap = null; - } - - for ( MatchingDocs hits : matchingDocs ) { - - // LUCENE-5090: make sure the provided reader context "matches" - // the top-level reader passed to the - // SortedSetDocValuesReaderState, else cryptic - // AIOOBE can happen: - if ( ReaderUtil.getTopLevelContext( hits.context() ).reader() != reader ) { - throw new IllegalStateException( - "the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader" ); - } - - countOneSegment( ordinalMap, valuesSource.getValues( hits.context() ), hits.context().ord, hits ); - } - } - - @Override - public Number getSpecificValue(String dim, String... path) { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - /** - * While there is a `TopOrdAndIntQueue` in Lucene, unfortunately it works with OrdAndValue objects (in API). - * And there's no access to the value, leading to casting any type value has to be accessed. Hence, this impl: - */ - private static class HibernateSearchTopOrdAndIntQueue extends PriorityQueue { - - public HibernateSearchTopOrdAndIntQueue(int maxSize) { - super( maxSize ); - } - - @Override - protected boolean lessThan(TopOrdAndIntQueue.OrdAndInt a, TopOrdAndIntQueue.OrdAndInt b) { - return a.lessThan( b ); - } - } - -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java deleted file mode 100644 index 042446b36d5..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/EffectiveRange.java +++ /dev/null @@ -1,8 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.types.aggregation.impl; - -public record EffectiveRange(long min, long max) { -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java index a5eecfc8d27..f04c116c3d5 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -4,5 +4,61 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; +import java.util.Collection; +import java.util.function.ToLongFunction; + +import org.hibernate.search.util.common.data.Range; +import org.hibernate.search.util.common.data.RangeBoundInclusion; + public record EffectiveRange(long min, long max) { + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + } + + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, + ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); + } + + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, + ToLongFunction encoder, + T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; + int i = 0; + for ( Range range : ranges ) { + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max + ); + ++i; + } + return effectiveRanges; + } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index cd1fa6e3274..12f319907b2 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -4,23 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.DoubleValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public Double doubleToTerm(double doubleValue) { return doubleValue; } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 141ecb237e7..333f1799939 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.FloatValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -92,22 +86,9 @@ public Float doubleToTerm(double doubleValue) { return NumberUtils.toFloat( doubleValue ); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index 3ce7e9e0def..e57cb0f0ae0 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.IntValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public double sortedDocValueToDouble(long longValue) { return sortedDocValueToTerm( longValue ).doubleValue(); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index 409356559c0..6aab8e3a5d5 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -4,24 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.LongValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -91,22 +85,9 @@ public Long doubleToTerm(double doubleValue) { return NumberUtils.toLong( doubleValue ); } - @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - @Override public EffectiveRange[] createEffectiveRanges(Collection> ranges) { - return FacetCountsUtils.createEffectiveRangesForIntegralValues( ranges ); + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 20c08cf7c3d..a25ade3387a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -4,7 +4,6 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -12,8 +11,6 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -43,10 +40,6 @@ public interface LuceneNumericDomain { E doubleToTerm(double doubleValue); - Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException; - EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); diff --git a/lucene-next/build/parents/integrationtest/pom.xml b/lucene-next/build/parents/integrationtest/pom.xml index 893e99dbf36..bab17b24ea3 100644 --- a/lucene-next/build/parents/integrationtest/pom.xml +++ b/lucene-next/build/parents/integrationtest/pom.xml @@ -61,11 +61,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/internal/pom.xml b/lucene-next/build/parents/internal/pom.xml index 21b7292c444..24564511e08 100644 --- a/lucene-next/build/parents/internal/pom.xml +++ b/lucene-next/build/parents/internal/pom.xml @@ -49,11 +49,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/public/pom.xml b/lucene-next/build/parents/public/pom.xml index fee8d321c27..4783c83132c 100644 --- a/lucene-next/build/parents/public/pom.xml +++ b/lucene-next/build/parents/public/pom.xml @@ -49,11 +49,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/springtest/pom.xml b/lucene-next/build/parents/springtest/pom.xml index 15df0a75b5a..cd1be1c3e31 100644 --- a/lucene-next/build/parents/springtest/pom.xml +++ b/lucene-next/build/parents/springtest/pom.xml @@ -60,11 +60,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter From e411cea7fa0f957bab3e5898e91481bd091af4cc Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 27 Jun 2025 16:26:31 +0200 Subject: [PATCH 05/23] HSEARCH-3661 Do not do rollup for range aggregation --- .../collector/impl/RangeCollector.java | 173 ++---------------- .../collector/impl/RangeCollector.java | 173 ++---------------- 2 files changed, 40 insertions(+), 306 deletions(-) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 0abcde21cf2..7f4a6627eda 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -27,12 +27,9 @@ public class RangeCollector extends SimpleCollector { private final LongMultiValuesSource valuesSource; - private final LongRangeNode root; private final long[] boundaries; - private final long[] countsPerBoundaries; + private final IntArrayList[] countsPerBoundaries; - private int leafUpto; - private boolean filled = false; private final long[] counts; private LongMultiValues values; @@ -73,12 +70,12 @@ public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] range Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); + List elementaryIntervals = new ArrayList<>(); int upto0 = 1; long v = endsList.get( 0 ); long prev; if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); + elementaryIntervals.add( new EffectiveRange( v, v ) ); prev = v + 1; } else { @@ -92,16 +89,16 @@ public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] range // This point is both an end and a start; we need to // separate it: if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); } - elementaryIntervals.add( new InclusiveRange( v, v ) ); + elementaryIntervals.add( new EffectiveRange( v, v ) ); prev = v + 1; } else if ( flags == 1 ) { // This point is only the start of an interval; // attach it to next interval: if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); } prev = v; } @@ -109,33 +106,34 @@ else if ( flags == 1 ) { assert flags == 2; // This point is only the end of an interval; attach // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); + elementaryIntervals.add( new EffectiveRange( prev, v ) ); prev = v + 1; } upto0++; } - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - // Set boundaries (ends of each elementary interval): boundaries = new long[elementaryIntervals.size()]; + countsPerBoundaries = new IntArrayList[boundaries.length]; for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; + EffectiveRange interval = elementaryIntervals.get( i ); + boundaries[i] = interval.max(); + IntArrayList list = new IntArrayList(); + countsPerBoundaries[i] = list; + for ( int j = 0; j < ranges.length; j++ ) { + if ( interval.min() >= ranges[j].min() && interval.max() <= ranges[j].max() ) { + list.add( j ); + } + } } - countsPerBoundaries = new long[boundaries.length]; counts = new long[ranges.length]; } private void incrementCountForLeafWithIndex(int index) { - countsPerBoundaries[index]++; + for ( IntCursor cursor : countsPerBoundaries[index] ) { + counts[cursor.value]++; + } } private int findLeafIndex(long v) { @@ -163,133 +161,6 @@ else if ( v > boundaries[mid + 1] ) { } } - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - private void fillCounts(long[] counts) { - leafUpto = 0; - rollup( root, counts, false ); - } - - private long rollup(LongRangeNode node, long[] counts, boolean sawOutputs) { - long count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = countsPerBoundaries[leafUpto]; - leafUpto++; - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private record InclusiveRange(long start, long end) { - private InclusiveRange { - assert end >= start; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - private static class LongRangeNode { - private final LongRangeNode left; - private final LongRangeNode right; - - // Our range, inclusive: - private final long start; - private final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, EffectiveRange range) { - if ( start >= range.min() && end <= range.max() ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } - @Override public void collect(int doc) throws IOException { if ( values.advanceExact( doc ) ) { @@ -305,10 +176,6 @@ public void collect(int doc) throws IOException { } public long[] counts() { - if ( !filled ) { - filled = true; - fillCounts( counts ); - } return counts; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 0abcde21cf2..7f4a6627eda 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -27,12 +27,9 @@ public class RangeCollector extends SimpleCollector { private final LongMultiValuesSource valuesSource; - private final LongRangeNode root; private final long[] boundaries; - private final long[] countsPerBoundaries; + private final IntArrayList[] countsPerBoundaries; - private int leafUpto; - private boolean filled = false; private final long[] counts; private LongMultiValues values; @@ -73,12 +70,12 @@ public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] range Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); + List elementaryIntervals = new ArrayList<>(); int upto0 = 1; long v = endsList.get( 0 ); long prev; if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); + elementaryIntervals.add( new EffectiveRange( v, v ) ); prev = v + 1; } else { @@ -92,16 +89,16 @@ public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] range // This point is both an end and a start; we need to // separate it: if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); } - elementaryIntervals.add( new InclusiveRange( v, v ) ); + elementaryIntervals.add( new EffectiveRange( v, v ) ); prev = v + 1; } else if ( flags == 1 ) { // This point is only the start of an interval; // attach it to next interval: if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); } prev = v; } @@ -109,33 +106,34 @@ else if ( flags == 1 ) { assert flags == 2; // This point is only the end of an interval; attach // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); + elementaryIntervals.add( new EffectiveRange( prev, v ) ); prev = v + 1; } upto0++; } - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - // Set boundaries (ends of each elementary interval): boundaries = new long[elementaryIntervals.size()]; + countsPerBoundaries = new IntArrayList[boundaries.length]; for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; + EffectiveRange interval = elementaryIntervals.get( i ); + boundaries[i] = interval.max(); + IntArrayList list = new IntArrayList(); + countsPerBoundaries[i] = list; + for ( int j = 0; j < ranges.length; j++ ) { + if ( interval.min() >= ranges[j].min() && interval.max() <= ranges[j].max() ) { + list.add( j ); + } + } } - countsPerBoundaries = new long[boundaries.length]; counts = new long[ranges.length]; } private void incrementCountForLeafWithIndex(int index) { - countsPerBoundaries[index]++; + for ( IntCursor cursor : countsPerBoundaries[index] ) { + counts[cursor.value]++; + } } private int findLeafIndex(long v) { @@ -163,133 +161,6 @@ else if ( v > boundaries[mid + 1] ) { } } - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - private void fillCounts(long[] counts) { - leafUpto = 0; - rollup( root, counts, false ); - } - - private long rollup(LongRangeNode node, long[] counts, boolean sawOutputs) { - long count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = countsPerBoundaries[leafUpto]; - leafUpto++; - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private record InclusiveRange(long start, long end) { - private InclusiveRange { - assert end >= start; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - private static class LongRangeNode { - private final LongRangeNode left; - private final LongRangeNode right; - - // Our range, inclusive: - private final long start; - private final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, EffectiveRange range) { - if ( start >= range.min() && end <= range.max() ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } - @Override public void collect(int doc) throws IOException { if ( values.advanceExact( doc ) ) { @@ -305,10 +176,6 @@ public void collect(int doc) throws IOException { } public long[] counts() { - if ( !filled ) { - filled = true; - fillCounts( counts ); - } return counts; } From 75be8ac4a72b4d2bd919d2a607224777fb77ec82 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Mon, 30 Jun 2025 16:04:54 +0200 Subject: [PATCH 06/23] HSEARCH-3661 WIP making aggregation value work --- .../impl/ElasticsearchRangeAggregation.java | 8 +- .../collector/impl/RangeCollector.java | 41 +++- .../collector/impl/RangeCollectorFactory.java | 35 +++- .../collector/impl/RangeCollectorManager.java | 12 +- .../impl/AggregationExtractContext.java | 52 +---- .../impl/AggregationExtractContextImpl.java | 72 +++++++ .../impl/AggregationRequestContext.java | 45 +---- .../impl/AggregationRequestContextImpl.java | 63 ++++++ .../impl/LuceneExtractableSearchResult.java | 3 +- .../query/impl/LuceneSearchQueryBuilder.java | 3 +- .../impl/LuceneNumericRangeAggregation.java | 186 +++++++++++++++--- .../search/aggregation/AggregationDslIT.java | 41 +++- .../dsl/RangeAggregationFieldStep.java | 9 +- .../dsl/RangeAggregationRangeMoreStep.java | 14 +- .../dsl/RangeAggregationRangeStep.java | 6 +- .../dsl/RangeAggregationRangeValueStep.java | 32 +++ .../impl/RangeAggregationFieldStepImpl.java | 4 +- .../impl/RangeAggregationRangeStepImpl.java | 30 +-- .../spi/RangeAggregationBuilder.java | 7 +- .../SingleFieldAggregationBaseIT.java | 7 +- .../impl/StubSearchAggregation.java | 11 +- .../query/dsl/impl/RangeFacetRequest.java | 4 +- 22 files changed, 527 insertions(+), 158 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java create mode 100644 engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java index 2ae01c81718..985309b8831 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java @@ -16,6 +16,7 @@ import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext; import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate; import org.hibernate.search.backend.elasticsearch.types.codec.impl.ElasticsearchFieldCodec; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; @@ -113,7 +114,7 @@ protected Map, Long> doExtract(AggregationExtractContext context, JsonE } private static class Builder extends AbstractBuilder, Long> - implements RangeAggregationBuilder { + implements RangeAggregationBuilder { private final Function encoder; @@ -150,6 +151,11 @@ public void range(Range range) { rangesJson.add( rangeJson ); } + @Override + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + throw new UnsupportedOperationException(); + } + @Override public ElasticsearchRangeAggregation build() { return new ElasticsearchRangeAggregation<>( this ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 7f4a6627eda..93cf5ccda06 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -21,6 +21,9 @@ import com.carrotsearch.hppc.cursors.IntCursor; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; @@ -31,11 +34,19 @@ public class RangeCollector extends SimpleCollector { private final IntArrayList[] countsPerBoundaries; private final long[] counts; + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final LeafCollector[][] leafCollectors; + private final CollectorManager[] managers; private LongMultiValues values; - public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; + this.collectors = collectors; + this.keys = keys; + this.managers = managers; // Maps all range inclusive endpoints to int flags; 1 // = start of interval, 2 = end of interval. We need to @@ -128,11 +139,18 @@ else if ( flags == 1 ) { } counts = new long[ranges.length]; + leafCollectors = new LeafCollector[keys.length][]; + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = new LeafCollector[ranges.length]; + } } - private void incrementCountForLeafWithIndex(int index) { + private void processLeafWithIndex(int index, int doc) throws IOException { for ( IntCursor cursor : countsPerBoundaries[index] ) { counts[cursor.value]++; + for ( int i = 0; i < keys.length; i++ ) { + leafCollectors[i][cursor.value].collect( doc ); + } } } @@ -169,7 +187,7 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. int leafIndex = findLeafIndex( values.nextValue() ); if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { - incrementCountForLeafWithIndex( leafIndex ); + processLeafWithIndex( leafIndex, doc ); } } } @@ -179,6 +197,18 @@ public long[] counts() { return counts; } + public Collector[][] collectors() { + return collectors; + } + + public CollectorKey[] collectorKeys() { + return keys; + } + + public CollectorManager[] managers() { + return managers; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -186,6 +216,11 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { values = valuesSource.getValues( context ); + for ( int i = 0; i < collectors.length; i++ ) { + for ( int j = 0; j < collectors[i].length; j++ ) { + leafCollectors[i][j] = collectors[i][j].getLeafCollector( context ); + } + } } public void finish() throws IOException { diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index cdaba62f538..f3a6c606b90 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -4,29 +4,54 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + public class RangeCollectorFactory implements CollectorFactory> { public static CollectorFactory> instance( - LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { - return new RangeCollectorFactory<>( valuesSource, ranges ); + LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { + return new RangeCollectorFactory<>( valuesSource, ranges, collectorFactories ); } public final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; + private final List> collectorFactories; - public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + List> collectorFactories) { this.valuesSource = valuesSource; this.ranges = ranges; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "rawtypes", "unchecked" }) @Override - public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new RangeCollectorManager<>( valuesSource, ranges ); + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + Collector[][] collectors = new Collector[collectorFactories.size()][]; + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory collectorFactory : collectorFactories ) { + CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); + keys[index] = collectorFactory.getCollectorKey(); + managers[index] = collectorManager; + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int i = 0; i < c.length; i++ ) { + c[i] = collectorManager.newCollector(); + } + index++; + } + return new RangeCollectorManager<>( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index 92f5e854504..d1056204096 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -9,21 +9,29 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; this.ranges = ranges; + this.collectors = collectors; + this.keys = keys; + this.managers = managers; } @Override public RangeCollector newCollector() { - return new RangeCollector( valuesSource, ranges ); + return new RangeCollector( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index ba8fb98b0b6..edb3f90d4f3 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -4,63 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -public class AggregationExtractContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final IndexReader indexReader; - private final FromDocumentValueConvertContext fromDocumentValueConvertContext; - private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; - private final Set routingKeys; - private final QueryParameters parameters; - - public AggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - IndexReader indexReader, - FromDocumentValueConvertContext fromDocumentValueConvertContext, - HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.indexReader = indexReader; - this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; - this.multiCollectedResults = multiCollectedResults; - this.routingKeys = routingKeys; - this.parameters = parameters; - } - - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } +public interface AggregationExtractContext { + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public IndexReader getIndexReader() { - return indexReader; - } + IndexReader getIndexReader(); - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return fromDocumentValueConvertContext; - } + FromDocumentValueConvertContext fromDocumentValueConvertContext(); - public T getCollectorResults(CollectorKey key) { - return multiCollectedResults.get( key ); - } + T getCollectorResults(CollectorKey key); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java new file mode 100644 index 00000000000..55f9021edad --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +public class AggregationExtractContextImpl implements AggregationExtractContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final IndexReader indexReader; + private final FromDocumentValueConvertContext fromDocumentValueConvertContext; + private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; + private final Set routingKeys; + private final QueryParameters parameters; + + public AggregationExtractContextImpl(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + IndexReader indexReader, + FromDocumentValueConvertContext fromDocumentValueConvertContext, + HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.indexReader = indexReader; + this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; + this.multiCollectedResults = multiCollectedResults; + this.routingKeys = routingKeys; + this.parameters = parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return indexReader; + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return fromDocumentValueConvertContext; + } + + @Override + public T getCollectorResults(CollectorKey key) { + return multiCollectedResults.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java index b7b87fc573a..fbd865f17b7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java @@ -4,54 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.search.common.NamedValues; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.Query; -public final class AggregationRequestContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final Set routingKeys; - private final ExtractionRequirements.Builder extractionRequirementsBuilder; - private final QueryParameters parameters; - - public AggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.routingKeys = routingKeys; - this.extractionRequirementsBuilder = extractionRequirementsBuilder; - this.parameters = parameters; - } - - public > void requireCollector( - CollectorFactory collectorFactory) { - extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); - } +public interface AggregationRequestContext { + > void requireCollector( + CollectorFactory collectorFactory + ); - public NamedValues queryParameters() { - return parameters; - } + NamedValues queryParameters(); - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java new file mode 100644 index 00000000000..43cdc046a45 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.search.common.NamedValues; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +public final class AggregationRequestContextImpl implements AggregationRequestContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final Set routingKeys; + private final ExtractionRequirements.Builder extractionRequirementsBuilder; + private final QueryParameters parameters; + + public AggregationRequestContextImpl(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.routingKeys = routingKeys; + this.extractionRequirementsBuilder = extractionRequirementsBuilder; + this.parameters = parameters; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java index 9b84c33d833..119a51f1042 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java @@ -14,6 +14,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollectorExecutionContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContextImpl; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.extraction.impl.LuceneCollectors; import org.hibernate.search.backend.lucene.search.projection.impl.LuceneSearchProjection; @@ -112,7 +113,7 @@ private List extractHits(ProjectionHitMapper projectionHitMapper, int } private Map, ?> extractAggregations() throws IOException { - AggregationExtractContext aggregationExtractContext = new AggregationExtractContext( + AggregationExtractContext aggregationExtractContext = new AggregationExtractContextImpl( requestContext.getQueryIndexScope(), requestContext.getSessionContext(), indexSearcher.getIndexReader(), diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 6bcf85e9ac6..9bf310b041d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -19,6 +19,7 @@ import org.hibernate.search.backend.lucene.lowlevel.query.impl.Queries; import org.hibernate.search.backend.lucene.orchestration.impl.LuceneSyncWorkOrchestrator; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContextImpl; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.highlighter.impl.LuceneAbstractSearchHighlighter; @@ -266,7 +267,7 @@ public LuceneSearchQuery build() { if ( aggregations != null ) { aggregationExtractors = new LinkedHashMap<>(); AggregationRequestContext aggregationRequestContext = - new AggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, + new AggregationRequestContextImpl( scope, sessionContext, routingKeys, extractionRequirementsBuilder, parameters ); for ( Map.Entry, LuceneSearchAggregation> entry : aggregations.entrySet() ) { aggregationExtractors.put( entry.getKey(), entry.getValue().request( aggregationRequestContext ) ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 513a8fb379c..7be366efd53 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -4,12 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; +import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; @@ -17,23 +22,35 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; +import org.hibernate.search.engine.search.common.NamedValues; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + /** * @param The type of field values. * @param The type of encoded field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class LuceneNumericRangeAggregation - extends AbstractLuceneBucketAggregation, Long> { +public class LuceneNumericRangeAggregation + extends AbstractLuceneBucketAggregation, V> { + private final LuceneSearchAggregation aggregation; private final AbstractLuceneNumericFieldCodec codec; private final List> rangesInOrder; @@ -41,26 +58,32 @@ public class LuceneNumericRangeAggregation private CollectorKey collectorKey; - private LuceneNumericRangeAggregation(Builder builder) { + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); + this.aggregation = builder.aggregation; this.codec = builder.codec; this.rangesInOrder = builder.rangesInOrder; this.encodedRangesInOrder = builder.encodedRangesInOrder; } @Override - public Extractor, Long>> request(AggregationRequestContext context) { + public Extractor, V>> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( absoluteFieldPath, nestedDocsProvider ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + var rangeFactory = RangeCollectorFactory.instance( source, - codec.getDomain().createEffectiveRanges( encodedRangesInOrder ) ); + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ), + localAggregationContext.localCollectorFactories() ); + collectorKey = rangeFactory.getCollectorKey(); context.requireCollector( rangeFactory ); - return new LuceneNumericRangeAggregationExtractor(); + return new LuceneNumericRangeAggregationExtractor( extractor ); } public static class Factory @@ -78,20 +101,38 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { } } - private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { + private class LuceneNumericRangeAggregationExtractor implements Extractor, V>> { + private final Extractor extractor; + + public LuceneNumericRangeAggregationExtractor(Extractor extractor) { + this.extractor = extractor; + } @Override - public Map, Long> extract(AggregationExtractContext context) { + public Map, V> extract(AggregationExtractContext context) throws IOException { RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); - long[] counts = rangeCollector.counts(); - Map, Long> result = new LinkedHashMap<>(); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), counts[i] ); + localContext.setResults( prepareResults( i, rangeCollector ) ); + result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } + + private Map, Object> prepareResults(int index, RangeCollector rangeCollector) throws IOException { + Map, Object> result = new HashMap<>(); + Collector[][] collectors = rangeCollector.collectors(); + CollectorKey[] collectorKeys = rangeCollector.collectorKeys(); + CollectorManager[] managers = rangeCollector.managers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( List.of( collectors[i][index] ) ) ); + } + return result; + } } public static class TypeSelector implements RangeAggregationBuilder.TypeSelector { @@ -107,28 +148,34 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( codec, - field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), - scope, field ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( + codec, field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), + scope, field + ); } } - public static class Builder - extends AbstractLuceneBucketAggregation.AbstractBuilder, Long> - implements RangeAggregationBuilder { + public static class Builder + extends AbstractLuceneBucketAggregation.AbstractBuilder, V> + implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function convertAndEncode; - private final List> rangesInOrder = new ArrayList<>(); - private final List> encodedRangesInOrder = new ArrayList<>(); + private LuceneSearchAggregation aggregation; + private final List> rangesInOrder; + private final List> encodedRangesInOrder; - public Builder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, - LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + protected Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchAggregation aggregation, List> rangesInOrder, List> encodedRangesInOrder) { super( scope, field ); this.codec = codec; this.convertAndEncode = convertAndEncode; + this.aggregation = aggregation; + this.rangesInOrder = rangesInOrder; + this.encodedRangesInOrder = encodedRangesInOrder; } @Override @@ -138,8 +185,101 @@ public void range(Range range) { } @Override - public LuceneNumericRangeAggregation build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, aggregation ), + new ArrayList<>( rangesInOrder ), new ArrayList<>( encodedRangesInOrder ) ); + } + + @Override + public LuceneNumericRangeAggregation build() { return new LuceneNumericRangeAggregation<>( this ); } } + + public static class CountBuilder extends Builder { + + protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + super( scope, field, codec, convertAndEncode, + LuceneSearchAggregation.from( scope, + LuceneCountNumericLongAggregation.factory( codec ).create( scope, field ).build() ), + new ArrayList<>(), new ArrayList<>() ); + } + } + + private static class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + private LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + } + + private static class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + private LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } + } } diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index aa761363377..3b8ea1a763f 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -250,15 +250,48 @@ void terms() { } ); withinSearchSession( searchSession -> { - AggregationKey> countsByGenreKey = AggregationKey.of( "countsByPrice" ); + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.matchAll() ) - .aggregation( countsByGenreKey, f -> f.terms() + .aggregation( countsByPriceKey, f -> f.terms() .field( "price", Double.class ) .orderByCountAscending() ) .fetch( 20 ); - Map countsByGenre = result.aggregation( countsByGenreKey ); - System.err.println( countsByGenre ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + assertThat( countsByPrice ) + .containsExactly( + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) + ); + } ); + } + + @Test + void range_value() { + withinSearchSession( searchSession -> { + // tag::range-sum[] + AggregationKey, Double>> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.range() + .field( "price", Double.class ) // <1> + .range( 0.0, 10.0 ) // <2> + .range( 10.0, 20.0 ) + .range( 20.0, null ) // <3> + .value( f.sum().field( "price", Double.class ).toAggregation() ) + ) + .fetch( 20 ); + Map, Double> countsByPrice = result.aggregation( countsByPriceKey ); + // end::range-sum[] + assertThat( countsByPrice ) + .containsExactly( + entry( Range.canonical( 0.0, 10.0 ), 7.99 ), + entry( Range.canonical( 10.0, 20.0 ), 35.98 ), + entry( Range.canonical( 20.0, null ), 24.99 ) + ); } ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java index 6c4c05a3a63..e717e361954 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java @@ -26,7 +26,7 @@ public interface RangeAggregationFieldStep The type of field values. * @return The next step. */ - default RangeAggregationRangeStep field(String fieldPath, Class type) { + default RangeAggregationRangeStep field(String fieldPath, Class type) { return field( fieldPath, type, ValueModel.MAPPING ); } @@ -42,7 +42,7 @@ public interface RangeAggregationFieldStep RangeAggregationRangeStep field(String fieldPath, Class type, + default RangeAggregationRangeStep field(String fieldPath, Class type, org.hibernate.search.engine.search.common.ValueConvert convert) { return field( fieldPath, type, org.hibernate.search.engine.search.common.ValueConvert.toValueModel( convert ) ); @@ -58,7 +58,7 @@ public interface RangeAggregationFieldStep RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel); + RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel); /** * Target the given field in the range aggregation. @@ -67,7 +67,8 @@ public interface RangeAggregationFieldStep The type of field values. * @return The next step. */ - default RangeAggregationRangeStep field(RangeAggregationFieldReference fieldReference) { + default RangeAggregationRangeStep field( + RangeAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath(), fieldReference.aggregationType(), fieldReference.valueModel() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java index a9ec19feda3..7d6d561293a 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java @@ -20,14 +20,16 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. * @param The type of the next step. * @param The type of the targeted field. + * @param The type of the aggregated value. */ public interface RangeAggregationRangeMoreStep< SR, - S extends RangeAggregationRangeMoreStep, - N extends RangeAggregationOptionsStep, Long>>, + S extends RangeAggregationRangeMoreStep, + N extends RangeAggregationOptionsStep, A>>, PDF extends TypedSearchPredicateFactory, - F> - extends RangeAggregationOptionsStep, Long>>, - RangeAggregationRangeStep { - + F, + A> + extends RangeAggregationOptionsStep, A>>, + RangeAggregationRangeStep, + RangeAggregationRangeValueStep { } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java index 42e99fc17dd..c1a6c1f8e0e 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java @@ -17,12 +17,14 @@ * @param The type of the next step. * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. * @param The type of the targeted field. + * @param The type of the aggregated value. */ public interface RangeAggregationRangeStep< SR, - N extends RangeAggregationRangeMoreStep, + N extends RangeAggregationRangeMoreStep, PDF extends TypedSearchPredicateFactory, - F> { + F, + A> { /** * Add a bucket for the range {@code [lowerBound, upperBound)} (lower bound included, upper bound excluded), diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java new file mode 100644 index 00000000000..503c3a090db --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import java.util.Map; +import java.util.function.Function; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; +import org.hibernate.search.util.common.data.Range; + +/** + * The step in a "range" aggregation definition where the aggregation value for the range can be set. + * + * @param Scope root type. + * @param The "self" type (the actual exposed type of this step). + * @param The type of factory used to create predicates in {@link #filter(Function)}. + * @param The type of the targeted field. + */ +public interface RangeAggregationRangeValueStep< + SR, + PDF extends TypedSearchPredicateFactory, + F> { + + RangeAggregationOptionsStep, T>> value(SearchAggregation aggregation); + + default RangeAggregationOptionsStep, T>> value(AggregationFinalStep aggregation) { + return value( aggregation.toAggregation() ); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java index dcfb149186d..f696fa7337d 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java @@ -22,11 +22,11 @@ public RangeAggregationFieldStepImpl(SearchAggregationDslContext RangeAggregationRangeStep field(String fieldPath, Class type, + public RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel) { Contracts.assertNotNull( fieldPath, "fieldPath" ); Contracts.assertNotNull( type, "type" ); - RangeAggregationBuilder builder = dslContext.scope() + RangeAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.RANGE ).type( type, valueModel ); return new RangeAggregationRangeStepImpl<>( builder, dslContext ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java index ff5e3c575f3..cd18a0f8580 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java @@ -19,31 +19,32 @@ import org.hibernate.search.util.common.data.Range; import org.hibernate.search.util.common.impl.Contracts; -class RangeAggregationRangeStepImpl, F> - implements RangeAggregationRangeStep, PDF, F>, +class RangeAggregationRangeStepImpl, F, A> + implements RangeAggregationRangeStep, PDF, F, A>, RangeAggregationRangeMoreStep, - RangeAggregationRangeStepImpl, + RangeAggregationRangeStepImpl, + RangeAggregationRangeStepImpl, PDF, - F> { - private final RangeAggregationBuilder builder; + F, + A> { + private final RangeAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - RangeAggregationRangeStepImpl(RangeAggregationBuilder builder, + RangeAggregationRangeStepImpl(RangeAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public RangeAggregationRangeStepImpl range(Range range) { + public RangeAggregationRangeStepImpl range(Range range) { Contracts.assertNotNull( range, "range" ); builder.range( range ); return this; } @Override - public RangeAggregationRangeStepImpl ranges(Collection> ranges) { + public RangeAggregationRangeStepImpl ranges(Collection> ranges) { Contracts.assertNotNull( ranges, "ranges" ); for ( Range range : ranges ) { range( range ); @@ -52,7 +53,7 @@ public RangeAggregationRangeStepImpl ranges(Collection filter( + public RangeAggregationRangeStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); @@ -60,13 +61,18 @@ public RangeAggregationRangeStepImpl filter( } @Override - public RangeAggregationRangeStepImpl filter(SearchPredicate searchPredicate) { + public RangeAggregationRangeStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } @Override - public SearchAggregation, Long>> toAggregation() { + public SearchAggregation, A>> toAggregation() { return builder.build(); } + + @Override + public RangeAggregationRangeStepImpl value(SearchAggregation aggregation) { + return new RangeAggregationRangeStepImpl<>( builder.withValue( aggregation ), dslContext ); + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java index 18cb723153d..15c3258e2c3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java @@ -6,17 +6,20 @@ import java.util.Map; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.util.common.data.Range; -public interface RangeAggregationBuilder extends SearchAggregationBuilder, Long>> { +public interface RangeAggregationBuilder extends SearchAggregationBuilder, A>> { interface TypeSelector { - RangeAggregationBuilder type(Class expectedType, ValueModel valueModel); + RangeAggregationBuilder type(Class expectedType, ValueModel valueModel); } void filter(SearchPredicate filter); void range(Range range); + + RangeAggregationBuilder withValue(SearchAggregation aggregation); } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java index 673095287e0..3a0381984b3 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java @@ -32,6 +32,7 @@ import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.expectations.AggregationScenario; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.expectations.SupportedSingleFieldAggregationExpectations; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.types.LocalDateFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TestedFieldStructure; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.extension.SearchSetupHelper; @@ -66,8 +67,12 @@ class SingleFieldAggregationBaseIT { Optional> expectations = aggregationDescriptor.getSingleFieldAggregationExpectations( fieldTypeDescriptor ).getSupported(); if ( expectations.isPresent() ) { + if ( !LocalDateFieldTypeDescriptor.INSTANCE.equals( fieldTypeDescriptor ) ) { + continue; + } + + supportedFieldTypes.add( fieldTypeDescriptor ); for ( TestedFieldStructure fieldStructure : TestedFieldStructure.all() ) { - supportedFieldTypes.add( fieldTypeDescriptor ); DataSet dataSet = new DataSet<>( expectations.get(), fieldStructure ); dataSets.add( dataSet ); parameters.add( Arguments.of( expectations.get(), fieldStructure, dataSet ) ); diff --git a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java index 61df7b21096..68e34db27af 100644 --- a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java +++ b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java @@ -54,7 +54,7 @@ public TermsBuilder type(Class expectedType, ValueModel valueModel) { public static class RangeTypeSelector implements RangeAggregationBuilder.TypeSelector { @Override - public RangeBuilder type(Class expectedType, ValueModel valueModel) { + public RangeBuilder type(Class expectedType, ValueModel valueModel) { return new RangeBuilder<>(); } } @@ -102,7 +102,7 @@ public SearchAggregation> build() { } } - static class RangeBuilder implements RangeAggregationBuilder { + static class RangeBuilder implements RangeAggregationBuilder { @Override public void range(Range range) { @@ -115,7 +115,12 @@ public void filter(SearchPredicate filter) { } @Override - public SearchAggregation, Long>> build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new RangeBuilder<>(); + } + + @Override + public SearchAggregation, A>> build() { return new StubSearchAggregation<>(); } } diff --git a/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java b/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java index c1594b1d59c..61e22cb2d86 100644 --- a/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java +++ b/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java @@ -33,9 +33,9 @@ public class RangeFacetRequest extends FacetingRequestImpl, Long @Override public AggregationFinalStep, Long>> requestAggregation(TypedSearchAggregationFactory factory) { - RangeAggregationRangeStep rangeStep = factory + RangeAggregationRangeStep rangeStep = factory .range().field( getFieldName(), getFacetValueType() ); - RangeAggregationRangeMoreStep rangeMoreStep = null; + RangeAggregationRangeMoreStep rangeMoreStep = null; for ( FacetRange facetRange : facetRangeList ) { rangeMoreStep = rangeStep.range( facetRange.range() ); rangeStep = rangeMoreStep; From ed874dc4f1d312bc3831c5db3624c57ddc65a955 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Tue, 1 Jul 2025 18:36:08 +0200 Subject: [PATCH 07/23] HSEARCH-3661 Make it more clear that count aggregations are targeting values and not documents --- ...Distinct.java => CountDistinctValues.java} | 8 +-- ... CountDistinctValuesCollectorFactory.java} | 16 ++--- .../impl/{Count.java => CountValues.java} | 6 +- ....java => CountValuesCollectorFactory.java} | 14 ++--- .../LuceneAvgCompensatedSumAggregation.java | 8 +-- .../LuceneAvgNumericFieldAggregation.java | 12 ++-- ...neCountDistinctNumericLongAggregation.java | 4 +- .../LuceneCountNumericLongAggregation.java | 4 +- .../src/main/asciidoc/migration/index.adoc | 3 + .../reference/_search-dsl-aggregation.adoc | 10 ++-- .../search/aggregation/AggregationDslIT.java | 4 +- ...ntDistinctValuesAggregationFieldStep.java} | 12 ++-- ...DistinctValuesAggregationOptionsStep.java} | 4 +- ...a => CountValuesAggregationFieldStep.java} | 14 ++--- ...=> CountValuesAggregationOptionsStep.java} | 4 +- .../dsl/ExtendedSearchAggregationFactory.java | 18 +++++- .../dsl/MaxAggregationFieldStep.java | 2 +- .../dsl/MinAggregationFieldStep.java | 2 +- .../dsl/SearchAggregationFactory.java | 55 ++++++++++++++++-- .../dsl/SumAggregationFieldStep.java | 2 +- .../dsl/TypedSearchAggregationFactory.java | 58 +++++++++++++++++-- ...stinctValuesAggregationFieldStepImpl.java} | 14 ++--- ...inctValuesAggregationOptionsStepImpl.java} | 12 ++-- ... CountValuesAggregationFieldStepImpl.java} | 14 ++--- ...ountValuesAggregationOptionsStepImpl.java} | 12 ++-- .../spi/AbstractSearchAggregationFactory.java | 16 ++--- .../aggregation/AnyAggregationReference.java | 4 +- ...tinctValuesAggregationFieldReference.java} | 2 +- ...CountValuesAggregationFieldReference.java} | 2 +- .../writer/impl/TraitReferenceMapping.java | 8 +-- 30 files changed, 228 insertions(+), 116 deletions(-) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/{CountDistinct.java => CountDistinctValues.java} (78%) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/{CountDistinctCollectorFactory.java => CountDistinctValuesCollectorFactory.java} (59%) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/{Count.java => CountValues.java} (70%) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/{CountCollectorFactory.java => CountValuesCollectorFactory.java} (56%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{CountDistinctAggregationFieldStep.java => CountDistinctValuesAggregationFieldStep.java} (69%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{CountDistinctAggregationOptionsStep.java => CountDistinctValuesAggregationOptionsStep.java} (86%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{CountAggregationFieldStep.java => CountValuesAggregationFieldStep.java} (64%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{CountAggregationOptionsStep.java => CountValuesAggregationOptionsStep.java} (87%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/{CountDistinctAggregationFieldStepImpl.java => CountDistinctValuesAggregationFieldStepImpl.java} (63%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/{CountDistinctAggregationOptionsStepImpl.java => CountDistinctValuesAggregationOptionsStepImpl.java} (72%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/{CountAggregationFieldStepImpl.java => CountValuesAggregationFieldStepImpl.java} (56%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/{CountAggregationOptionsStepImpl.java => CountValuesAggregationOptionsStepImpl.java} (71%) rename engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/{CountDistinctAggregationFieldReference.java => CountDistinctValuesAggregationFieldReference.java} (69%) rename engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/{CountAggregationFieldReference.java => CountValuesAggregationFieldReference.java} (70%) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java similarity index 78% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java index ff903d7c4b4..daa0852e4ae 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java @@ -13,7 +13,7 @@ * The algorithm to collect distinct elements is inspired by {@code org.apache.lucene.facet.LongValueFacetCounts} * of Apache Lucene project. */ -public class CountDistinct implements AggregationFunction { +public class CountDistinctValues implements AggregationFunction { private final BitSet counts = new BitSet( 1024 ); private final LongHashSet hashCounts = new LongHashSet(); @@ -29,8 +29,8 @@ public void apply(long value) { } @Override - public void merge(AggregationFunction sibling) { - CountDistinct other = sibling.implementation(); + public void merge(AggregationFunction sibling) { + CountDistinctValues other = sibling.implementation(); counts.or( other.counts ); hashCounts.addAll( other.hashCounts ); } @@ -41,7 +41,7 @@ public Long result() { } @Override - public CountDistinct implementation() { + public CountDistinctValues implementation() { return this; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java similarity index 59% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java index 93736311be9..e1300ef9fcf 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java @@ -9,26 +9,26 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; -public class CountDistinctCollectorFactory +public class CountDistinctValuesCollectorFactory implements - CollectorFactory, + CollectorFactory, Long, - AggregationFunctionCollectorManager> { + AggregationFunctionCollectorManager> { private final JoiningLongMultiValuesSource source; - private final CollectorKey, Long> key = CollectorKey.create(); + private final CollectorKey, Long> key = CollectorKey.create(); - public CountDistinctCollectorFactory(JoiningLongMultiValuesSource source) { + public CountDistinctValuesCollectorFactory(JoiningLongMultiValuesSource source) { this.source = source; } @Override - public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new AggregationFunctionCollectorManager<>( source, CountDistinct::new ); + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountDistinctValues::new ); } @Override - public CollectorKey, Long> getCollectorKey() { + public CollectorKey, Long> getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java similarity index 70% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java index 5afcf3c6a1f..fb4b7dedce0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; -public class Count implements AggregationFunction { +public class CountValues implements AggregationFunction { private long count = 0L; @@ -14,7 +14,7 @@ public void apply(long value) { } @Override - public void merge(AggregationFunction sibling) { + public void merge(AggregationFunction sibling) { count += sibling.implementation().count; } @@ -24,7 +24,7 @@ public Long result() { } @Override - public Count implementation() { + public CountValues implementation() { return this; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java similarity index 56% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java index 0180c571b58..337615e81d2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java @@ -9,24 +9,24 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; -public class CountCollectorFactory +public class CountValuesCollectorFactory implements - CollectorFactory, Long, AggregationFunctionCollectorManager> { + CollectorFactory, Long, AggregationFunctionCollectorManager> { private final JoiningLongMultiValuesSource source; - private final CollectorKey, Long> key = CollectorKey.create(); + private final CollectorKey, Long> key = CollectorKey.create(); - public CountCollectorFactory(JoiningLongMultiValuesSource source) { + public CountValuesCollectorFactory(JoiningLongMultiValuesSource source) { this.source = source; } @Override - public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new AggregationFunctionCollectorManager<>( source, Count::new ); + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountValues::new ); } @Override - public CollectorKey, Long> getCollectorKey() { + public CollectorKey, Long> getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index 22aa85ff9e8..964419b735e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -5,7 +5,7 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CompensatedSumCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -34,9 +34,9 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte compensatedSumCollectorKey = sumCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); - collectorKey = countCollectorFactory.getCollectorKey(); - context.requireCollector( countCollectorFactory ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); + collectorKey = countValuesCollectorFactory.getCollectorKey(); + context.requireCollector( countValuesCollectorFactory ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 589e89ea9a8..597f8d57f9f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -5,8 +5,8 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.AggregationFunctionCollector; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.Count; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValues; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.SumCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; @@ -28,7 +28,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec } // Supplementary collector used by the avg function - protected CollectorKey, Long> countCollectorKey; + protected CollectorKey, Long> countCollectorKey; LuceneAvgNumericFieldAggregation(Builder builder) { super( builder ); @@ -37,11 +37,11 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { SumCollectorFactory sumCollectorFactory = new SumCollectorFactory( source ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); collectorKey = sumCollectorFactory.getCollectorKey(); - countCollectorKey = countCollectorFactory.getCollectorKey(); + countCollectorKey = countValuesCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - context.requireCollector( countCollectorFactory ); + context.requireCollector( countValuesCollectorFactory ); } private static class LuceneNumericMetricFieldAggregationExtraction implements Extractor { diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java index 7e055d18150..f83e3d09acc 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountDistinctCollectorFactory collectorFactory = new CountDistinctCollectorFactory( source ); + CountDistinctValuesCollectorFactory collectorFactory = new CountDistinctValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java index 7369a8fd21c..b9128f0eae4 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountCollectorFactory collectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory collectorFactory = new CountValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/documentation/src/main/asciidoc/migration/index.adoc b/documentation/src/main/asciidoc/migration/index.adoc index 52b9ec7974b..26a50389cbf 100644 --- a/documentation/src/main/asciidoc/migration/index.adoc +++ b/documentation/src/main/asciidoc/migration/index.adoc @@ -84,6 +84,9 @@ The https://hibernate.org/community/compatibility-policy/#code-categorization[AP in Hibernate Search {hibernateSearchVersion} is, in general, backward-compatible with Hibernate Search {hibernateSearchPreviousStableVersionShort}. +* Metic aggregations `count()` and `countDistinct()` are deprecated in favor of more +descriptive `countValues()` and `countDistinctValues()`, as these aggregations are counting the field values rather than documents. + [[spi]] == SPI diff --git a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc index 2833ced27ba..e6156785e12 100644 --- a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc +++ b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc @@ -390,9 +390,9 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre <1> Define the target field path to which you want to apply the aggregation function and the expected returned type. ==== -=== Count metric aggregation +=== Count values metric aggregation -The `count` aggregation counts the number of documents in which the field is not empty. +The `count values` aggregation counts the number of non-empty field values. .Count the number of the science fiction books with prices ==== @@ -403,9 +403,9 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre <1> Define the target field path to which you want to apply the aggregation function. For this function a `Long.class` value is always returned. ==== -=== Count distinct metric aggregation +=== Count distinct values metric aggregation -The `count distinct` aggregation counts the number of unique field values. +The `count distinct values` aggregation counts the number of unique field values. .Count anytime the price field has a different value among all the science fiction books ==== @@ -418,7 +418,7 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre === Avg metric aggregation -The `avg` aggregation calculates the average value of a given numeric or temporal field among the selected documents. +The `avg` aggregation calculates the average value of a given numeric or temporal field among the matched documents. .Compute the average price of all science fiction books ==== diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index 3b8ea1a763f..e32a076b5cf 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -534,7 +534,7 @@ void count() { AggregationKey countPricesKey = AggregationKey.of( "countPrices" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) - .aggregation( countPricesKey, f -> f.count().field( "price" ) ) // <1> + .aggregation( countPricesKey, f -> f.countValues().field( "price" ) ) // <1> .fetch( 20 ); Long countPrices = result.aggregation( countPricesKey ); assertThat( countPrices ).isEqualTo( 3L ); @@ -549,7 +549,7 @@ void countDistinct() { AggregationKey countDistinctPricesKey = AggregationKey.of( "countDistinctPrices" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) - .aggregation( countDistinctPricesKey, f -> f.countDistinct().field( "price" ) ) // <1> + .aggregation( countDistinctPricesKey, f -> f.countDistinctValues().field( "price" ) ) // <1> .fetch( 20 ); Long countDistinctPrices = result.aggregation( countDistinctPricesKey ); assertThat( countDistinctPrices ).isEqualTo( 3L ); diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java similarity index 69% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java index ab189dde2b3..c7a1a3be38c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java @@ -7,7 +7,7 @@ import java.util.function.Function; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.util.common.annotation.Incubating; /** @@ -17,24 +17,24 @@ * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. */ @Incubating -public interface CountDistinctAggregationFieldStep> { +public interface CountDistinctValuesAggregationFieldStep> { /** - * Target the given field in the count distinct aggregation. + * Target the given field in the count distinct values aggregation. * * @param fieldPath The path to the index field to aggregate. * @return The next step. */ - CountDistinctAggregationOptionsStep field(String fieldPath); + CountDistinctValuesAggregationOptionsStep field(String fieldPath); /** - * Target the given field in the avg aggregation. + * Target the given field in the count distinct values aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @return The next step. */ @Incubating - default CountDistinctAggregationOptionsStep field(CountAggregationFieldReference fieldReference) { + default CountDistinctValuesAggregationOptionsStep field(CountValuesAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath() ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java similarity index 86% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java index 2aca785be30..10eeda388ca 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java @@ -17,9 +17,9 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. */ @Incubating -public interface CountDistinctAggregationOptionsStep< +public interface CountDistinctValuesAggregationOptionsStep< SR, - S extends CountDistinctAggregationOptionsStep, + S extends CountDistinctValuesAggregationOptionsStep, PDF extends TypedSearchPredicateFactory> extends AggregationFinalStep, AggregationFilterStep { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java similarity index 64% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java index a2ccf66f9f9..0f6a699173e 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java @@ -7,34 +7,34 @@ import java.util.function.Function; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.util.common.annotation.Incubating; /** - * The initial step in a "count" aggregation definition, where the target field can be set. + * The initial step in a "count values" aggregation definition, where the target field can be set. * * @param Scope root type. * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. */ @Incubating -public interface CountAggregationFieldStep> { +public interface CountValuesAggregationFieldStep> { /** - * Target the given field in the count aggregation. + * Target the given field in the count values aggregation. * * @param fieldPath The path to the index field to aggregate. * @return The next step. */ - CountAggregationOptionsStep field(String fieldPath); + CountValuesAggregationOptionsStep field(String fieldPath); /** - * Target the given field in the avg aggregation. + * Target the given field in the count values aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @return The next step. */ @Incubating - default CountAggregationOptionsStep field(CountAggregationFieldReference fieldReference) { + default CountValuesAggregationOptionsStep field(CountValuesAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath() ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java similarity index 87% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java index 1e960e720f5..f11ce3cd017 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java @@ -17,9 +17,9 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. */ @Incubating -public interface CountAggregationOptionsStep< +public interface CountValuesAggregationOptionsStep< SR, - S extends CountAggregationOptionsStep, + S extends CountValuesAggregationOptionsStep, PDF extends TypedSearchPredicateFactory> extends AggregationFinalStep, AggregationFilterStep { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java index 35d76c953f1..6e0ac7c4757 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java @@ -43,11 +43,25 @@ public interface ExtendedSearchAggregationFactory< @Override MaxAggregationFieldStep max(); + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) @Override - CountAggregationFieldStep count(); + default CountValuesAggregationFieldStep count() { + return countValues(); + } @Override - CountDistinctAggregationFieldStep countDistinct(); + CountValuesAggregationFieldStep countValues(); + + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Override + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } + + @Override + CountDistinctValuesAggregationFieldStep countDistinctValues(); @Override AvgAggregationFieldStep avg(); diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java index 3d2c8bf4977..c26d6c8e2d7 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface MaxAggregationFieldStep MaxAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the max aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java index 22e9a6991fa..4e5447b07fe 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface MinAggregationFieldStep MinAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the min aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java index d2cac72618b..eb6d7363c2e 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java @@ -72,6 +72,8 @@ public interface SearchAggregationFactory { /** * Perform the sum metric aggregation. + *

+ * Sums up the field values. * * @return The next step. */ @@ -80,6 +82,8 @@ public interface SearchAggregationFactory { /** * Perform the min metric aggregation. + *

+ * Provides the minimum value among the field values. * * @return The next step. */ @@ -88,6 +92,8 @@ public interface SearchAggregationFactory { /** * Perform the max metric aggregation. + *

+ * Provides the maximum value among the field values. * * @return The next step. */ @@ -95,23 +101,62 @@ public interface SearchAggregationFactory { MaxAggregationFieldStep max(); /** - * Perform the count metric aggregation. + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. * * @return The next step. + * @deprecated Use {@link #countValues()} instead. */ + @Deprecated(since = "8.1", forRemoval = true) @Incubating - CountAggregationFieldStep count(); + default CountValuesAggregationFieldStep count() { + return countValues(); + } /** - * Perform the count distinct metric aggregation. + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. + * + * @return The next step. + */ + CountValuesAggregationFieldStep countValues(); + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. + * + * @return The next step. + * @deprecated Use {@link #countDistinctValues()} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. * * @return The next step. */ @Incubating - CountDistinctAggregationFieldStep countDistinct(); + CountDistinctValuesAggregationFieldStep countDistinctValues(); /** - * Perform the avg metric aggregation. + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. * * @return the next step. */ diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java index 9d7e1aba7c5..d4570a76200 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface SumAggregationFieldStep SumAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the sum aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java index 517e72a8e4d..10b81bcb44c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java @@ -73,6 +73,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the sum metric aggregation. + *

+ * Sums up the field values. * * @return The next step. */ @@ -82,6 +84,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the min metric aggregation. + *

+ * Provides the minimum value among the field values. * * @return The next step. */ @@ -91,6 +95,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the max metric aggregation. + *

+ * Provides the maximum value among the field values. * * @return The next step. */ @@ -98,26 +104,70 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact @Incubating MaxAggregationFieldStep max(); + + /** + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. + * + * @return The next step. + * @deprecated Use {@link #countValues()} instead. + */ + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + @Override + default CountValuesAggregationFieldStep count() { + return countValues(); + } /** - * Perform the count metric aggregation. + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. * * @return The next step. */ @Override @Incubating - CountAggregationFieldStep count(); + CountValuesAggregationFieldStep countValues(); + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. + * + * @return The next step. + * @deprecated Use {@link #countDistinctValues()} instead. + */ + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + @Override + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } /** - * Perform the count distinct metric aggregation. + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. * * @return The next step. */ @Override @Incubating - CountDistinctAggregationFieldStep countDistinct(); + CountDistinctValuesAggregationFieldStep countDistinctValues(); /** * Perform the avg metric aggregation. + *

+ * Calculates the average value of a given numeric or temporal field among the matched documents. * * @return the next step. */ diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java similarity index 63% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java index fd67730f29f..4c6590acdfa 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java @@ -4,25 +4,25 @@ */ package org.hibernate.search.engine.search.aggregation.dsl.impl; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -public class CountDistinctAggregationFieldStepImpl> - implements CountDistinctAggregationFieldStep { +public class CountDistinctValuesAggregationFieldStepImpl> + implements CountDistinctValuesAggregationFieldStep { private final SearchAggregationDslContext dslContext; - public CountDistinctAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { + public CountDistinctValuesAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { this.dslContext = dslContext; } @Override - public CountDistinctAggregationOptionsStep field(String fieldPath) { + public CountDistinctValuesAggregationOptionsStep field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_DISTINCT ); - return new CountDistinctAggregationOptionsStepImpl<>( builder, dslContext ); + return new CountDistinctValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java similarity index 72% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java index 293349b2e5a..f776f109a61 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java @@ -7,33 +7,33 @@ import java.util.function.Function; import org.hibernate.search.engine.search.aggregation.SearchAggregation; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -class CountDistinctAggregationOptionsStepImpl> - implements CountDistinctAggregationOptionsStep, PDF> { +class CountDistinctValuesAggregationOptionsStepImpl> + implements CountDistinctValuesAggregationOptionsStep, PDF> { private final SearchFilterableAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - CountDistinctAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, + CountDistinctValuesAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public CountDistinctAggregationOptionsStepImpl filter( + public CountDistinctValuesAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); return filter( predicate ); } @Override - public CountDistinctAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public CountDistinctValuesAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java similarity index 56% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java index 4a2ee3c98dd..4cad32e88d5 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java @@ -4,25 +4,25 @@ */ package org.hibernate.search.engine.search.aggregation.dsl.impl; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -public class CountAggregationFieldStepImpl> - implements CountAggregationFieldStep { +public class CountValuesAggregationFieldStepImpl> + implements CountValuesAggregationFieldStep { private final SearchAggregationDslContext dslContext; - public CountAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { + public CountValuesAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { this.dslContext = dslContext; } @Override - public CountAggregationOptionsStep field(String fieldPath) { + public CountValuesAggregationOptionsStep field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT ); - return new CountAggregationOptionsStepImpl<>( builder, dslContext ); + return new CountValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java similarity index 71% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java index 58cb8bad195..b1547ef39f3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java @@ -7,33 +7,33 @@ import java.util.function.Function; import org.hibernate.search.engine.search.aggregation.SearchAggregation; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -class CountAggregationOptionsStepImpl> - implements CountAggregationOptionsStep, PDF> { +class CountValuesAggregationOptionsStepImpl> + implements CountValuesAggregationOptionsStep, PDF> { private final SearchFilterableAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - CountAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, + CountValuesAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public CountAggregationOptionsStepImpl filter( + public CountValuesAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); return filter( predicate ); } @Override - public CountAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public CountValuesAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java index 61c1d352c07..8c7b688ad94 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java @@ -9,8 +9,8 @@ import org.hibernate.search.engine.common.dsl.spi.DslExtensionState; import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.aggregation.dsl.AvgAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.ExtendedSearchAggregationFactory; import org.hibernate.search.engine.search.aggregation.dsl.MaxAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.MinAggregationFieldStep; @@ -19,8 +19,8 @@ import org.hibernate.search.engine.search.aggregation.dsl.SumAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.impl.AvgAggregationFieldStepImpl; -import org.hibernate.search.engine.search.aggregation.dsl.impl.CountAggregationFieldStepImpl; -import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDistinctAggregationFieldStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountValuesAggregationFieldStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDistinctValuesAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MaxAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MinAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.RangeAggregationFieldStepImpl; @@ -70,13 +70,13 @@ public MaxAggregationFieldStep max() { } @Override - public CountAggregationFieldStep count() { - return new CountAggregationFieldStepImpl<>( dslContext ); + public CountValuesAggregationFieldStep countValues() { + return new CountValuesAggregationFieldStepImpl<>( dslContext ); } @Override - public CountDistinctAggregationFieldStep countDistinct() { - return new CountDistinctAggregationFieldStepImpl<>( dslContext ); + public CountDistinctValuesAggregationFieldStep countDistinctValues() { + return new CountDistinctValuesAggregationFieldStepImpl<>( dslContext ); } public AvgAggregationFieldStep avg() { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java index 0e148348e39..e8886faf11c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java @@ -26,8 +26,8 @@ public record AnyAggregationReference( String absolutePath, Class scopeRootType, ValueModel valueModel, Class aggregationType) implements AvgAggregationFieldReference, - CountAggregationFieldReference, - CountDistinctAggregationFieldReference, + CountValuesAggregationFieldReference, + CountDistinctValuesAggregationFieldReference, MaxAggregationFieldReference, MinAggregationFieldReference, RangeAggregationFieldReference, diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java similarity index 69% rename from engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java rename to engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java index 34902cc99b2..bce6e722387 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java @@ -7,5 +7,5 @@ import org.hibernate.search.util.common.annotation.Incubating; @Incubating -public interface CountDistinctAggregationFieldReference extends AggregationFieldReference { +public interface CountDistinctValuesAggregationFieldReference extends AggregationFieldReference { } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java similarity index 70% rename from engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java rename to engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java index 68e64836aab..7b1fd725532 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java @@ -7,5 +7,5 @@ import org.hibernate.search.util.common.annotation.Incubating; @Incubating -public interface CountAggregationFieldReference extends AggregationFieldReference { +public interface CountValuesAggregationFieldReference extends AggregationFieldReference { } diff --git a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java index 8126cbfe5c7..9dbae56ebbf 100644 --- a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java +++ b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java @@ -10,8 +10,8 @@ import org.hibernate.search.engine.backend.types.IndexFieldTraits; import org.hibernate.search.engine.search.reference.aggregation.AvgAggregationFieldReference; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; -import org.hibernate.search.engine.search.reference.aggregation.CountDistinctAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountDistinctValuesAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MaxAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MinAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.RangeAggregationFieldReference; @@ -117,9 +117,9 @@ private TraitReferenceMapping() { TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE ) ); traits.put( IndexFieldTraits.Aggregations.COUNT, - new TraitReferenceDetails( CountAggregationFieldReference.class, "A5", TraitKind.UNTYPED ) ); + new TraitReferenceDetails( CountValuesAggregationFieldReference.class, "A5", TraitKind.UNTYPED ) ); traits.put( IndexFieldTraits.Aggregations.COUNT_DISTINCT, - new TraitReferenceDetails( CountDistinctAggregationFieldReference.class, "A6", TraitKind.UNTYPED ) ); + new TraitReferenceDetails( CountDistinctValuesAggregationFieldReference.class, "A6", TraitKind.UNTYPED ) ); traits.put( IndexFieldTraits.Aggregations.AVG, new TraitReferenceDetails( AvgAggregationFieldReference.class, "A7", TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE ) ); From cd4f113235fd0298deab6d2481b25efd50c84643 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Tue, 1 Jul 2025 21:30:10 +0200 Subject: [PATCH 08/23] HSEARCH-3661 Introduce count documents aggregation --- ...sticsearchNumericFieldTypeOptionsStep.java | 4 +- ...archTemporalIndexFieldTypeOptionsStep.java | 4 +- .../collector/impl/AggregationFunction.java | 4 + .../impl/AggregationFunctionCollector.java | 3 + .../impl/CountDocuemntsCollectorFactory.java | 31 +++++++ .../impl/CountDocumentsCollector.java | 29 ++++++ .../impl/CountDocumentsCollectorManager.java | 27 ++++++ .../impl/LuceneCountDocumentAggregation.java | 89 +++++++++++++++++++ ...uceneNumericIndexFieldTypeOptionsStep.java | 4 +- ...ceneTemporalIndexFieldTypeOptionsStep.java | 4 +- .../backend/types/IndexFieldTraits.java | 15 +++- ...istinctValuesAggregationFieldStepImpl.java | 2 +- .../CountValuesAggregationFieldStepImpl.java | 2 +- .../aggregation/spi/AggregationTypeKeys.java | 11 ++- .../spi/CountDocumentAggregationBuilder.java | 13 +++ ...uceneNumericIndexFieldTypeOptionsStep.java | 4 +- ...ceneTemporalIndexFieldTypeOptionsStep.java | 4 +- .../writer/impl/TraitReferenceMapping.java | 4 +- .../impl/TraitReferenceMappingTest.java | 4 + 19 files changed, 236 insertions(+), 22 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java create mode 100644 engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java index 59fe97f552f..4976e0b9ebc 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java @@ -69,8 +69,8 @@ protected final void complete() { builder.queryElementFactory( AggregationTypeKeys.MIN, ElasticsearchMetricFieldAggregation.min( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, ElasticsearchMetricFieldAggregation.max( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, ElasticsearchMetricFieldAggregation.avg( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, ElasticsearchMetricLongAggregation.count( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, ElasticsearchMetricLongAggregation.count( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, ElasticsearchMetricLongAggregation.countDistinct( codec ) ); } } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java index 7d480ac29d2..a929b11767b 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java @@ -88,8 +88,8 @@ protected final void complete() { builder.queryElementFactory( AggregationTypeKeys.MIN, ElasticsearchMetricFieldAggregation.min( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, ElasticsearchMetricFieldAggregation.max( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, ElasticsearchMetricFieldAggregation.avg( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, ElasticsearchMetricLongAggregation.count( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, ElasticsearchMetricLongAggregation.count( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, ElasticsearchMetricLongAggregation.countDistinct( codec ) ); } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java index 843a4717f93..bdac2557c0f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java @@ -14,4 +14,8 @@ public interface AggregationFunction> { R implementation(); + default boolean acceptMultipleValues() { + return true; + } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index e9d0ddf3bdc..fb577604054 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -56,6 +56,9 @@ public void collect(int doc) throws IOException { while ( values.hasNextValue() ) { long value = values.nextValue(); aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } } } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java new file mode 100644 index 00000000000..6e3c0e419d1 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; + +public class CountDocuemntsCollectorFactory + implements CollectorFactory { + + private final CollectorKey key = CollectorKey.create(); + + public static CountDocuemntsCollectorFactory instance() { + return new CountDocuemntsCollectorFactory(); + } + + @Override + public CountDocumentsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + return new CountDocumentsCollectorManager(); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java new file mode 100644 index 00000000000..b494f0e45c8 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class CountDocumentsCollector extends SimpleCollector { + + private long count = 0L; + + @Override + public void collect(int doc) throws IOException { + count++; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + public long count() { + return count; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java new file mode 100644 index 00000000000..5f0c875d08c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.search.CollectorManager; + +public class CountDocumentsCollectorManager implements CollectorManager { + + @Override + public CountDocumentsCollector newCollector() throws IOException { + return new CountDocumentsCollector(); + } + + @Override + public Long reduce(Collection collectors) throws IOException { + long count = 0L; + for ( CountDocumentsCollector collector : collectors ) { + count += collector.count(); + } + return count; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java new file mode 100644 index 00000000000..cbf2cb8787c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.logging.impl.QueryLog; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDocuemntsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { + + @SuppressWarnings("unchecked") + public static SearchQueryElementFactory, F> factory() { + return Factory.INSTANCE; + } + + private final Set indexNames; + + LuceneCountDocumentAggregation(Builder builder) { + this.indexNames = builder.scope.hibernateSearchIndexNames(); + } + + @Override + public Extractor request(AggregationRequestContext context) { + CountDocuemntsCollectorFactory collectorFactory = CountDocuemntsCollectorFactory.instance(); + var collectorKey = collectorFactory.getCollectorKey(); + + context.requireCollector( collectorFactory ); + return new CountDocumentsExtractor( collectorKey ); + } + + private record CountDocumentsExtractor(CollectorKey collectorKey) implements Extractor { + + @Override + public Long extract(AggregationExtractContext context) { + return context.getCollectorResults( collectorKey ); + } + } + + @Override + public Set indexNames() { + return indexNames; + } + + protected static class Factory + implements SearchQueryElementFactory, N> { + + @SuppressWarnings("rawtypes") + private static final Factory INSTANCE = new Factory(); + + private Factory() { + } + + @Override + public CountDocumentAggregationBuilder create(LuceneSearchIndexScope scope, N node) { + return new Builder( scope ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + public static class Builder implements CountDocumentAggregationBuilder { + + protected final LuceneSearchIndexScope scope; + + public Builder(LuceneSearchIndexScope scope) { + this.scope = scope; + } + + @Override + public LuceneCountDocumentAggregation build() { + return new LuceneCountDocumentAggregation( this ); + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java index a5e8dfa4b42..250387a6fba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java @@ -96,8 +96,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { builder.queryElementFactory( AggregationTypeKeys.SUM, sumMetricAggregationFactory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, avgMetricAggregationFactory( codec ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java index c4a777fa8b9..08e8311752f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java @@ -102,8 +102,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { } builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, LuceneAvgNumericFieldAggregation.factory( codec ) ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java b/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java index e020d46c071..70a7811f6a0 100644 --- a/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java +++ b/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java @@ -103,8 +103,19 @@ private Aggregations() { public static final String SUM = "aggregation:sum"; public static final String MIN = "aggregation:min"; public static final String MAX = "aggregation:max"; - public static final String COUNT = "aggregation:count"; - public static final String COUNT_DISTINCT = "aggregation:countDistinct"; + /** + * @deprecated Use {@link #COUNT_VALUES} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + public static final String COUNT = "aggregation:countValues"; + /** + * @deprecated Use {@link #COUNT_DISTINCT_VALUES} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + public static final String COUNT_DISTINCT = "aggregation:countDistinctValues"; + public static final String COUNT_VALUES = "aggregation:countValues"; + public static final String COUNT_DISTINCT_VALUES = "aggregation:countDistinctValues"; + public static final String COUNT_DOCUMENTS = "aggregation:countDocuments"; public static final String AVG = "aggregation:avg"; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java index 4c6590acdfa..d98d0ffbaeb 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java @@ -22,7 +22,7 @@ public CountDistinctValuesAggregationFieldStepImpl(SearchAggregationDslContext field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() - .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_DISTINCT ); + .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_DISTINCT_VALUES ); return new CountDistinctValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java index 4cad32e88d5..3f890b8b6d3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java @@ -22,7 +22,7 @@ public CountValuesAggregationFieldStepImpl(SearchAggregationDslContext field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() - .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT ); + .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_VALUES ); return new CountValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java index 0c791a21ece..3edc3e6a5e5 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java @@ -24,11 +24,14 @@ private AggregationTypeKeys() { of( IndexFieldTraits.Aggregations.MIN ); public static final SearchQueryElementTypeKey MAX = of( IndexFieldTraits.Aggregations.MAX ); - public static final SearchQueryElementTypeKey> COUNT = - of( IndexFieldTraits.Aggregations.COUNT ); - public static final SearchQueryElementTypeKey> COUNT_DISTINCT = - of( IndexFieldTraits.Aggregations.COUNT_DISTINCT ); + public static final SearchQueryElementTypeKey> COUNT_VALUES = + of( IndexFieldTraits.Aggregations.COUNT_VALUES ); + public static final SearchQueryElementTypeKey> COUNT_DISTINCT_VALUES = + of( IndexFieldTraits.Aggregations.COUNT_DISTINCT_VALUES ); public static final SearchQueryElementTypeKey AVG = of( IndexFieldTraits.Aggregations.AVG ); + public static final SearchQueryElementTypeKey COUNT_DOCUMENTS = + of( IndexFieldTraits.Aggregations.COUNT_DOCUMENTS ); + } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java new file mode 100644 index 00000000000..29c7a79f423 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.spi; + +public interface CountDocumentAggregationBuilder extends SearchAggregationBuilder { + + interface TypeSelector { + CountDocumentAggregationBuilder type(); + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java index a5e8dfa4b42..250387a6fba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java @@ -96,8 +96,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { builder.queryElementFactory( AggregationTypeKeys.SUM, sumMetricAggregationFactory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, avgMetricAggregationFactory( codec ) ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java index c4a777fa8b9..08e8311752f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java @@ -102,8 +102,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { } builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, LuceneAvgNumericFieldAggregation.factory( codec ) ); } diff --git a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java index 9dbae56ebbf..97c50ce6942 100644 --- a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java +++ b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java @@ -116,9 +116,9 @@ private TraitReferenceMapping() { traits.put( IndexFieldTraits.Aggregations.MAX, new TraitReferenceDetails( MaxAggregationFieldReference.class, "A4", TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE ) ); - traits.put( IndexFieldTraits.Aggregations.COUNT, + traits.put( IndexFieldTraits.Aggregations.COUNT_VALUES, new TraitReferenceDetails( CountValuesAggregationFieldReference.class, "A5", TraitKind.UNTYPED ) ); - traits.put( IndexFieldTraits.Aggregations.COUNT_DISTINCT, + traits.put( IndexFieldTraits.Aggregations.COUNT_DISTINCT_VALUES, new TraitReferenceDetails( CountDistinctValuesAggregationFieldReference.class, "A6", TraitKind.UNTYPED ) ); traits.put( IndexFieldTraits.Aggregations.AVG, new TraitReferenceDetails( AvgAggregationFieldReference.class, "A7", TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE diff --git a/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java b/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java index eefbcbc9234..5c1523049c7 100644 --- a/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java +++ b/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java @@ -45,6 +45,10 @@ private static Stream traitNames() { traitNames.addAll( traitNames( IndexFieldTraits.Projections.class ) ); traitNames.addAll( traitNames( IndexFieldTraits.Sorts.class ) ); traitNames.addAll( traitNames( IndexFieldTraits.Aggregations.class ) ); + + // count documents is an aggregation that does not require a field and as a result does not require the field reference: + traitNames.remove( IndexFieldTraits.Aggregations.COUNT_DOCUMENTS ); + return traitNames.stream().map( Arguments::of ); } From 0d71e9f4b2ecf9013d33fd80e63f6a9dae032c39 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Wed, 2 Jul 2025 19:20:11 +0200 Subject: [PATCH 09/23] HSEARCH-3661 Add count documents aggregation --- .../dsl/impl/LuceneIndexRootBuilder.java | 4 + ...ava => RootAggregationExtractContext.java} | 4 +- ...ava => RootAggregationRequestContext.java} | 4 +- .../impl/LuceneExtractableSearchResult.java | 4 +- .../query/impl/LuceneSearchQueryBuilder.java | 4 +- .../impl/LuceneCountDocumentAggregation.java | 23 ++- .../impl/LuceneNumericRangeAggregation.java | 2 +- ...untDistinctValuesAggregationFieldStep.java | 3 +- .../CountDocumentsAggregationFinalStep.java | 15 ++ .../dsl/ExtendedSearchAggregationFactory.java | 3 + .../dsl/SearchAggregationFactory.java | 11 ++ .../dsl/TypedSearchAggregationFactory.java | 13 ++ ...ountDocumentsAggregationFinalStepImpl.java | 25 +++ .../spi/AbstractSearchAggregationFactory.java | 11 +- .../MetricNumericFieldsAggregationsIT.java | 12 +- .../MetricTemporalFieldsAggregationsIT.java | 8 +- .../MetricAggregationsTestCase.java | 4 +- .../orm/elasticsearch/AggregationTypesIT.java | 4 +- .../orm/lucene/AggregationTypesIT.java | 4 +- .../elasticsearch/AggregationTypesIT.java | 4 +- .../standalone/lucene/AggregationTypesIT.java | 4 +- .../dsl/impl/LuceneIndexRootBuilder.java | 4 + .../collector/impl/AggregationFunction.java | 4 + .../impl/AggregationFunctionCollector.java | 3 + .../collector/impl/CountDistinctValues.java | 47 +++++ .../CountDistinctValuesCollectorFactory.java | 34 ++++ .../impl/CountDocuemntsCollectorFactory.java | 31 +++ .../impl/CountDocumentsCollector.java | 29 +++ .../impl/CountDocumentsCollectorManager.java | 27 +++ .../collector/impl/CountValues.java | 30 +++ .../impl/CountValuesCollectorFactory.java | 32 +++ .../collector/impl/NumericTermsCollector.java | 2 +- .../collector/impl/RangeCollector.java | 41 +++- .../collector/impl/RangeCollectorFactory.java | 35 +++- .../collector/impl/RangeCollectorManager.java | 12 +- .../collector/impl/TextTermsCollector.java | 2 +- .../impl/AggregationExtractContext.java | 52 +---- .../impl/AggregationRequestContext.java | 45 +---- .../impl/RootAggregationExtractContext.java | 72 +++++++ .../impl/RootAggregationRequestContext.java | 63 ++++++ .../impl/LuceneExtractableSearchResult.java | 3 +- .../query/impl/LuceneSearchQueryBuilder.java | 3 +- .../LuceneAvgCompensatedSumAggregation.java | 8 +- .../LuceneAvgNumericFieldAggregation.java | 12 +- ...neCountDistinctNumericLongAggregation.java | 4 +- .../impl/LuceneCountDocumentAggregation.java | 100 ++++++++++ .../LuceneCountNumericLongAggregation.java | 4 +- .../impl/LuceneNumericRangeAggregation.java | 186 +++++++++++++++--- .../writer/impl/TraitReferenceMapping.java | 2 +- 49 files changed, 881 insertions(+), 172 deletions(-) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/{AggregationExtractContextImpl.java => RootAggregationExtractContext.java} (95%) rename backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/{AggregationRequestContextImpl.java => RootAggregationRequestContext.java} (94%) create mode 100644 engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java create mode 100644 engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index 5b4099e7b14..fcdbc7238a3 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -19,6 +19,7 @@ import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexRoot; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueField; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueFieldTemplate; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LuceneCountDocumentAggregation; import org.hibernate.search.backend.lucene.types.dsl.LuceneIndexFieldTypeFactory; import org.hibernate.search.backend.lucene.types.dsl.impl.LuceneIndexFieldTypeFactoryImpl; import org.hibernate.search.backend.lucene.types.impl.LuceneIndexCompositeNodeType; @@ -33,6 +34,7 @@ import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class LuceneIndexRootBuilder extends AbstractLuceneIndexCompositeNodeBuilder @@ -55,6 +57,8 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.backendMapperContext = backendMapperContext; this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; + + typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java similarity index 95% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java index 55f9021edad..c285af570d4 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContextImpl.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java @@ -19,7 +19,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -public class AggregationExtractContextImpl implements AggregationExtractContext { +public class RootAggregationExtractContext implements AggregationExtractContext { private final LuceneSearchQueryIndexScope queryIndexScope; private final BackendSessionContext sessionContext; @@ -29,7 +29,7 @@ public class AggregationExtractContextImpl implements AggregationExtractContext private final Set routingKeys; private final QueryParameters parameters; - public AggregationExtractContextImpl(LuceneSearchQueryIndexScope queryIndexScope, + public RootAggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, IndexReader indexReader, FromDocumentValueConvertContext fromDocumentValueConvertContext, diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java similarity index 94% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java index 43cdc046a45..39e8fcd3bb6 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContextImpl.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java @@ -19,7 +19,7 @@ import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.Query; -public final class AggregationRequestContextImpl implements AggregationRequestContext { +public final class RootAggregationRequestContext implements AggregationRequestContext { private final LuceneSearchQueryIndexScope queryIndexScope; private final BackendSessionContext sessionContext; @@ -27,7 +27,7 @@ public final class AggregationRequestContextImpl implements AggregationRequestCo private final ExtractionRequirements.Builder extractionRequirementsBuilder; private final QueryParameters parameters; - public AggregationRequestContextImpl(LuceneSearchQueryIndexScope queryIndexScope, + public RootAggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, QueryParameters parameters) { diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java index 119a51f1042..9a90f34eaba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java @@ -14,8 +14,8 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollectorExecutionContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContextImpl; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationExtractContext; import org.hibernate.search.backend.lucene.search.extraction.impl.LuceneCollectors; import org.hibernate.search.backend.lucene.search.projection.impl.LuceneSearchProjection; import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext; @@ -113,7 +113,7 @@ private List extractHits(ProjectionHitMapper projectionHitMapper, int } private Map, ?> extractAggregations() throws IOException { - AggregationExtractContext aggregationExtractContext = new AggregationExtractContextImpl( + AggregationExtractContext aggregationExtractContext = new RootAggregationExtractContext( requestContext.getQueryIndexScope(), requestContext.getSessionContext(), indexSearcher.getIndexReader(), diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 9bf310b041d..18e904ce1d2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -19,8 +19,8 @@ import org.hibernate.search.backend.lucene.lowlevel.query.impl.Queries; import org.hibernate.search.backend.lucene.orchestration.impl.LuceneSyncWorkOrchestrator; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContextImpl; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationRequestContext; import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.highlighter.impl.LuceneAbstractSearchHighlighter; import org.hibernate.search.backend.lucene.search.predicate.impl.LuceneSearchPredicate; @@ -267,7 +267,7 @@ public LuceneSearchQuery build() { if ( aggregations != null ) { aggregationExtractors = new LinkedHashMap<>(); AggregationRequestContext aggregationRequestContext = - new AggregationRequestContextImpl( scope, sessionContext, routingKeys, extractionRequirementsBuilder, + new RootAggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, parameters ); for ( Map.Entry, LuceneSearchAggregation> entry : aggregations.entrySet() ) { aggregationExtractors.put( entry.getKey(), entry.getValue().request( aggregationRequestContext ) ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java index cbf2cb8787c..657a23bf319 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -12,6 +12,7 @@ import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexCompositeNodeContext; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; @@ -19,7 +20,7 @@ public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { @SuppressWarnings("unchecked") - public static SearchQueryElementFactory, F> factory() { + public static Factory factory() { return Factory.INSTANCE; } @@ -51,18 +52,21 @@ public Set indexNames() { return indexNames; } - protected static class Factory - implements SearchQueryElementFactory, N> { + protected static class Factory + implements + SearchQueryElementFactory, + LuceneSearchIndexCompositeNodeContext> { - @SuppressWarnings("rawtypes") private static final Factory INSTANCE = new Factory(); private Factory() { } @Override - public CountDocumentAggregationBuilder create(LuceneSearchIndexScope scope, N node) { - return new Builder( scope ); + public CountDocumentAggregationBuilder.TypeSelector create(LuceneSearchIndexScope scope, + LuceneSearchIndexCompositeNodeContext node) { + return new TypeSelector( scope ); } @Override @@ -73,6 +77,13 @@ public void checkCompatibleWith(SearchQueryElementFactory other) { } } + protected record TypeSelector(LuceneSearchIndexScope scope) implements CountDocumentAggregationBuilder.TypeSelector { + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope ); + } + } + public static class Builder implements CountDocumentAggregationBuilder { protected final LuceneSearchIndexScope scope; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 7be366efd53..1558c51efb8 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -163,7 +163,7 @@ public static class Builder private final AbstractLuceneNumericFieldCodec codec; private final Function convertAndEncode; - private LuceneSearchAggregation aggregation; + private final LuceneSearchAggregation aggregation; private final List> rangesInOrder; private final List> encodedRangesInOrder; diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java index c7a1a3be38c..a5661cc6e35 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java @@ -34,7 +34,8 @@ public interface CountDistinctValuesAggregationFieldStep field(CountValuesAggregationFieldReference fieldReference) { + default CountDistinctValuesAggregationOptionsStep field( + CountValuesAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath() ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java new file mode 100644 index 00000000000..dd92e01c396 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import org.hibernate.search.util.common.annotation.Incubating; + +/** + * The initial and final step in a "count documents" aggregation definition. + */ +@Incubating +public interface CountDocumentsAggregationFinalStep extends AggregationFinalStep { + +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java index 6e0ac7c4757..dfbaad2d0c5 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java @@ -63,6 +63,9 @@ default CountDistinctValuesAggregationFieldStep countDistinct() { @Override CountDistinctValuesAggregationFieldStep countDistinctValues(); + @Override + CountDocumentsAggregationFinalStep countDocuments(); + @Override AvgAggregationFieldStep avg(); diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java index eb6d7363c2e..d4066a83569 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java @@ -153,6 +153,17 @@ public interface SearchAggregationFactory { @Incubating CountDistinctValuesAggregationFieldStep countDistinctValues(); + /** + * Perform the count documents metric aggregation. + *

+ * Counts the number of matched documents. + * This aggregation may be useful for building {@link #range()} or {@link #terms()} aggregations. + * + * @return The next step. + */ + @Incubating + CountDocumentsAggregationFinalStep countDocuments(); + /** * Perform the count distinct values metric aggregation. *

diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java index 10b81bcb44c..3139f2eab7f 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java @@ -123,6 +123,7 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact default CountValuesAggregationFieldStep count() { return countValues(); } + /** * Perform the count values metric aggregation. *

@@ -164,6 +165,18 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact @Incubating CountDistinctValuesAggregationFieldStep countDistinctValues(); + /** + * Perform the count documents metric aggregation. + *

+ * Counts the number of matched documents. + * This aggregation may be useful for building {@link #range()} or {@link #terms()} aggregations. + * + * @return The next step. + */ + @Override + @Incubating + CountDocumentsAggregationFinalStep countDocuments(); + /** * Perform the avg metric aggregation. *

diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java new file mode 100644 index 00000000000..ef32043df96 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl.impl; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.aggregation.dsl.CountDocumentsAggregationFinalStep; +import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; + +public class CountDocumentsAggregationFinalStepImpl + implements CountDocumentsAggregationFinalStep { + private final SearchAggregationDslContext dslContext; + + public CountDocumentsAggregationFinalStepImpl(SearchAggregationDslContext dslContext) { + this.dslContext = dslContext; + } + + @Override + public SearchAggregation toAggregation() { + return dslContext.scope() + .rootQueryElement( AggregationTypeKeys.COUNT_DOCUMENTS ).type().build(); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java index 8c7b688ad94..582964579c2 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java @@ -9,8 +9,9 @@ import org.hibernate.search.engine.common.dsl.spi.DslExtensionState; import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.aggregation.dsl.AvgAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDocumentsAggregationFinalStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.ExtendedSearchAggregationFactory; import org.hibernate.search.engine.search.aggregation.dsl.MaxAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.MinAggregationFieldStep; @@ -19,8 +20,9 @@ import org.hibernate.search.engine.search.aggregation.dsl.SumAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.impl.AvgAggregationFieldStepImpl; -import org.hibernate.search.engine.search.aggregation.dsl.impl.CountValuesAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDistinctValuesAggregationFieldStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDocumentsAggregationFinalStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountValuesAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MaxAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MinAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.RangeAggregationFieldStepImpl; @@ -79,6 +81,11 @@ public CountDistinctValuesAggregationFieldStep countDistinctValues() { return new CountDistinctValuesAggregationFieldStepImpl<>( dslContext ); } + @Override + public CountDocumentsAggregationFinalStep countDocuments() { + return new CountDocumentsAggregationFinalStepImpl( dslContext ); + } + public AvgAggregationFieldStep avg() { return new AvgAggregationFieldStepImpl<>( dslContext ); } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java index 994c6e7eae2..7b0d00d3b8e 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java @@ -70,6 +70,7 @@ class MetricNumericFieldsAggregationsIT { private final AggregationKey avgFloats = AggregationKey.of( "avgFloats" ); private final AggregationKey avgBigIntegers = AggregationKey.of( "avgBigIntegers" ); private final AggregationKey avgBigDecimals = AggregationKey.of( "avgBigDecimals" ); + private final AggregationKey countDocuments = AggregationKey.of( "countDocuments" ); @BeforeEach void setup() { @@ -117,6 +118,7 @@ void test_filteringResults() { assertThat( result.aggregation( avgBigIntegers ) ).isEqualTo( BigInteger.valueOf( 5 ) ); assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 580, 2 ) ); + assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); } @Test @@ -159,6 +161,7 @@ void test_allResults() { assertThat( result.aggregation( avgBigIntegers ) ).isEqualTo( BigInteger.valueOf( 5 ) ); assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 550, 2 ) ); + assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); } private SearchQuery defineAggregations( @@ -180,10 +183,10 @@ private SearchQuery defineAggregations( .aggregation( maxIntegers, f -> f.max().field( "integer", Integer.class ) ) .aggregation( maxIntegersAsString, f -> f.max().field( "integer", String.class, ValueModel.STRING ) ) .aggregation( maxConverted, f -> f.max().field( "converted", String.class ) ) - .aggregation( countIntegers, f -> f.count().field( "integer" ) ) - .aggregation( countConverted, f -> f.count().field( "converted" ) ) - .aggregation( countDistinctIntegers, f -> f.countDistinct().field( "integer" ) ) - .aggregation( countDistinctConverted, f -> f.countDistinct().field( "converted" ) ) + .aggregation( countIntegers, f -> f.countValues().field( "integer" ) ) + .aggregation( countConverted, f -> f.countValues().field( "converted" ) ) + .aggregation( countDistinctIntegers, f -> f.countDistinctValues().field( "integer" ) ) + .aggregation( countDistinctConverted, f -> f.countDistinctValues().field( "converted" ) ) .aggregation( avgIntegers, f -> f.avg().field( "integer", Integer.class ) ) .aggregation( avgIntegersAsString, f -> f.avg().field( "integer", String.class, ValueModel.STRING ) ) .aggregation( avgConverted, f -> f.avg().field( "converted", String.class ) ) @@ -201,6 +204,7 @@ private SearchQuery defineAggregations( .aggregation( avgFloats, f -> f.avg().field( "floatF", Float.class ) ) .aggregation( avgBigIntegers, f -> f.avg().field( "bigInteger", BigInteger.class ) ) .aggregation( avgBigDecimals, f -> f.avg().field( "bigDecimal", BigDecimal.class ) ) + .aggregation( countDocuments, f -> f.countDocuments() ) .toQuery(); } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java index 090edb93a82..0175d77251f 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java @@ -121,10 +121,10 @@ private SearchQuery defineAggregations( .aggregation( minConverted, f -> f.min().field( "converted", String.class ) ) .aggregation( maxDates, f -> f.max().field( "date", LocalDate.class ) ) .aggregation( maxConverted, f -> f.max().field( "converted", String.class ) ) - .aggregation( countDates, f -> f.count().field( "date" ) ) - .aggregation( countConverted, f -> f.count().field( "converted" ) ) - .aggregation( countDistinctDates, f -> f.countDistinct().field( "date" ) ) - .aggregation( countDistinctConverted, f -> f.countDistinct().field( "converted" ) ) + .aggregation( countDates, f -> f.countValues().field( "date" ) ) + .aggregation( countConverted, f -> f.countValues().field( "converted" ) ) + .aggregation( countDistinctDates, f -> f.countDistinctValues().field( "date" ) ) + .aggregation( countDistinctConverted, f -> f.countDistinctValues().field( "converted" ) ) .aggregation( avgDates, f -> f.avg().field( "date", LocalDate.class ) ) .aggregation( avgConverted, f -> f.avg().field( "converted", String.class ) ) .toQuery(); diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java index 58dcf4ee8a6..0e273c4d8ff 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java @@ -79,8 +79,8 @@ public Result testMetricsAggregation(StubMappingScope scope, SingleFieldI .where( SearchPredicateFactory::matchAll ) .aggregation( result.minKey, f -> f.min().field( fieldPath, javaClass, valueModel ) ) .aggregation( result.maxKey, f -> f.max().field( fieldPath, javaClass, valueModel ) ) - .aggregation( result.countKey, f -> f.count().field( fieldPath ) ) - .aggregation( result.countDistinctKey, f -> f.countDistinct().field( fieldPath ) ) + .aggregation( result.countKey, f -> f.countValues().field( fieldPath ) ) + .aggregation( result.countDistinctKey, f -> f.countDistinctValues().field( fieldPath ) ) .aggregation( result.avgKey, f -> f.avg().field( fieldPath, javaClass, valueModel ) ); if ( metricAggregationsValues.sum() != null ) { diff --git a/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java b/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java index 51cd1aff3e4..cde1ecf1b4f 100644 --- a/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java +++ b/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java @@ -65,9 +65,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java b/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java index 19a94f8ef69..080ece0bef1 100644 --- a/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java +++ b/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java @@ -65,9 +65,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java b/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java index 46f5b9078dc..f169ab32954 100644 --- a/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java +++ b/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java @@ -86,9 +86,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java b/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java index 6312704dcc1..1ea4c7c53ea 100644 --- a/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java +++ b/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java @@ -86,9 +86,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index 5b4099e7b14..fcdbc7238a3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -19,6 +19,7 @@ import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexRoot; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueField; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueFieldTemplate; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LuceneCountDocumentAggregation; import org.hibernate.search.backend.lucene.types.dsl.LuceneIndexFieldTypeFactory; import org.hibernate.search.backend.lucene.types.dsl.impl.LuceneIndexFieldTypeFactoryImpl; import org.hibernate.search.backend.lucene.types.impl.LuceneIndexCompositeNodeType; @@ -33,6 +34,7 @@ import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class LuceneIndexRootBuilder extends AbstractLuceneIndexCompositeNodeBuilder @@ -55,6 +57,8 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.backendMapperContext = backendMapperContext; this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; + + typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java index 843a4717f93..bdac2557c0f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java @@ -14,4 +14,8 @@ public interface AggregationFunction> { R implementation(); + default boolean acceptMultipleValues() { + return true; + } + } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index e9d0ddf3bdc..fb577604054 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -56,6 +56,9 @@ public void collect(int doc) throws IOException { while ( values.hasNextValue() ) { long value = values.nextValue(); aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } } } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java new file mode 100644 index 00000000000..daa0852e4ae --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.util.BitSet; + +import com.carrotsearch.hppc.LongHashSet; + +/** + *

+ * The algorithm to collect distinct elements is inspired by {@code org.apache.lucene.facet.LongValueFacetCounts} + * of Apache Lucene project. + */ +public class CountDistinctValues implements AggregationFunction { + + private final BitSet counts = new BitSet( 1024 ); + private final LongHashSet hashCounts = new LongHashSet(); + + @Override + public void apply(long value) { + if ( value >= 0 && value < counts.size() ) { + counts.set( (int) value ); + } + else { + hashCounts.add( value ); + } + } + + @Override + public void merge(AggregationFunction sibling) { + CountDistinctValues other = sibling.implementation(); + counts.or( other.counts ); + hashCounts.addAll( other.hashCounts ); + } + + @Override + public Long result() { + return (long) counts.cardinality() + hashCounts.size(); + } + + @Override + public CountDistinctValues implementation() { + return this; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java new file mode 100644 index 00000000000..e1300ef9fcf --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; + +public class CountDistinctValuesCollectorFactory + implements + CollectorFactory, + Long, + AggregationFunctionCollectorManager> { + + private final JoiningLongMultiValuesSource source; + private final CollectorKey, Long> key = CollectorKey.create(); + + public CountDistinctValuesCollectorFactory(JoiningLongMultiValuesSource source) { + this.source = source; + } + + @Override + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountDistinctValues::new ); + } + + @Override + public CollectorKey, Long> getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java new file mode 100644 index 00000000000..6e3c0e419d1 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; + +public class CountDocuemntsCollectorFactory + implements CollectorFactory { + + private final CollectorKey key = CollectorKey.create(); + + public static CountDocuemntsCollectorFactory instance() { + return new CountDocuemntsCollectorFactory(); + } + + @Override + public CountDocumentsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + return new CountDocumentsCollectorManager(); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java new file mode 100644 index 00000000000..b494f0e45c8 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class CountDocumentsCollector extends SimpleCollector { + + private long count = 0L; + + @Override + public void collect(int doc) throws IOException { + count++; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + public long count() { + return count; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java new file mode 100644 index 00000000000..5f0c875d08c --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.search.CollectorManager; + +public class CountDocumentsCollectorManager implements CollectorManager { + + @Override + public CountDocumentsCollector newCollector() throws IOException { + return new CountDocumentsCollector(); + } + + @Override + public Long reduce(Collection collectors) throws IOException { + long count = 0L; + for ( CountDocumentsCollector collector : collectors ) { + count += collector.count(); + } + return count; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java new file mode 100644 index 00000000000..fb4b7dedce0 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +public class CountValues implements AggregationFunction { + + private long count = 0L; + + @Override + public void apply(long value) { + count++; + } + + @Override + public void merge(AggregationFunction sibling) { + count += sibling.implementation().count; + } + + @Override + public Long result() { + return count; + } + + @Override + public CountValues implementation() { + return this; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java new file mode 100644 index 00000000000..337615e81d2 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; + +public class CountValuesCollectorFactory + implements + CollectorFactory, Long, AggregationFunctionCollectorManager> { + + private final JoiningLongMultiValuesSource source; + private final CollectorKey, Long> key = CollectorKey.create(); + + public CountValuesCollectorFactory(JoiningLongMultiValuesSource source) { + this.source = source; + } + + @Override + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountValues::new ); + } + + @Override + public CollectorKey, Long> getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index 641ba3265d0..2977d48b37a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -65,7 +65,7 @@ public List counts(BucketOrder order, int topN, int minDocCount) { List buckets = new LinkedList<>(); while ( pq.size() != 0 ) { LongBucket popped = pq.pop(); - buckets.addFirst( popped ); + buckets.add( 0, popped ); } return buckets; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 7f4a6627eda..93cf5ccda06 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -21,6 +21,9 @@ import com.carrotsearch.hppc.cursors.IntCursor; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; @@ -31,11 +34,19 @@ public class RangeCollector extends SimpleCollector { private final IntArrayList[] countsPerBoundaries; private final long[] counts; + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final LeafCollector[][] leafCollectors; + private final CollectorManager[] managers; private LongMultiValues values; - public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; + this.collectors = collectors; + this.keys = keys; + this.managers = managers; // Maps all range inclusive endpoints to int flags; 1 // = start of interval, 2 = end of interval. We need to @@ -128,11 +139,18 @@ else if ( flags == 1 ) { } counts = new long[ranges.length]; + leafCollectors = new LeafCollector[keys.length][]; + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = new LeafCollector[ranges.length]; + } } - private void incrementCountForLeafWithIndex(int index) { + private void processLeafWithIndex(int index, int doc) throws IOException { for ( IntCursor cursor : countsPerBoundaries[index] ) { counts[cursor.value]++; + for ( int i = 0; i < keys.length; i++ ) { + leafCollectors[i][cursor.value].collect( doc ); + } } } @@ -169,7 +187,7 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. int leafIndex = findLeafIndex( values.nextValue() ); if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { - incrementCountForLeafWithIndex( leafIndex ); + processLeafWithIndex( leafIndex, doc ); } } } @@ -179,6 +197,18 @@ public long[] counts() { return counts; } + public Collector[][] collectors() { + return collectors; + } + + public CollectorKey[] collectorKeys() { + return keys; + } + + public CollectorManager[] managers() { + return managers; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -186,6 +216,11 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { values = valuesSource.getValues( context ); + for ( int i = 0; i < collectors.length; i++ ) { + for ( int j = 0; j < collectors[i].length; j++ ) { + leafCollectors[i][j] = collectors[i][j].getLeafCollector( context ); + } + } } public void finish() throws IOException { diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index cdaba62f538..f3a6c606b90 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -4,29 +4,54 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + public class RangeCollectorFactory implements CollectorFactory> { public static CollectorFactory> instance( - LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { - return new RangeCollectorFactory<>( valuesSource, ranges ); + LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { + return new RangeCollectorFactory<>( valuesSource, ranges, collectorFactories ); } public final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; + private final List> collectorFactories; - public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + List> collectorFactories) { this.valuesSource = valuesSource; this.ranges = ranges; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "rawtypes", "unchecked" }) @Override - public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new RangeCollectorManager<>( valuesSource, ranges ); + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + Collector[][] collectors = new Collector[collectorFactories.size()][]; + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory collectorFactory : collectorFactories ) { + CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); + keys[index] = collectorFactory.getCollectorKey(); + managers[index] = collectorManager; + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int i = 0; i < c.length; i++ ) { + c[i] = collectorManager.newCollector(); + } + index++; + } + return new RangeCollectorManager<>( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index 92f5e854504..d1056204096 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -9,21 +9,29 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges) { + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; this.ranges = ranges; + this.collectors = collectors; + this.keys = keys; + this.managers = managers; } @Override public RangeCollector newCollector() { - return new RangeCollector( valuesSource, ranges ); + return new RangeCollector( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index e72f44900be..c6efffdd032 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -74,7 +74,7 @@ public List counts(BucketOrder order, int topN, int minDocCount) { List buckets = new LinkedList<>(); while ( pq.size() != 0 ) { LongBucket popped = pq.pop(); - buckets.addFirst( popped ); + buckets.add( 0, popped ); } return buckets; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index ba8fb98b0b6..edb3f90d4f3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -4,63 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -public class AggregationExtractContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final IndexReader indexReader; - private final FromDocumentValueConvertContext fromDocumentValueConvertContext; - private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; - private final Set routingKeys; - private final QueryParameters parameters; - - public AggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - IndexReader indexReader, - FromDocumentValueConvertContext fromDocumentValueConvertContext, - HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.indexReader = indexReader; - this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; - this.multiCollectedResults = multiCollectedResults; - this.routingKeys = routingKeys; - this.parameters = parameters; - } - - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } +public interface AggregationExtractContext { + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public IndexReader getIndexReader() { - return indexReader; - } + IndexReader getIndexReader(); - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return fromDocumentValueConvertContext; - } + FromDocumentValueConvertContext fromDocumentValueConvertContext(); - public T getCollectorResults(CollectorKey key) { - return multiCollectedResults.get( key ); - } + T getCollectorResults(CollectorKey key); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java index b7b87fc573a..fbd865f17b7 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java @@ -4,54 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.search.common.NamedValues; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.Query; -public final class AggregationRequestContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final Set routingKeys; - private final ExtractionRequirements.Builder extractionRequirementsBuilder; - private final QueryParameters parameters; - - public AggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.routingKeys = routingKeys; - this.extractionRequirementsBuilder = extractionRequirementsBuilder; - this.parameters = parameters; - } - - public > void requireCollector( - CollectorFactory collectorFactory) { - extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); - } +public interface AggregationRequestContext { + > void requireCollector( + CollectorFactory collectorFactory + ); - public NamedValues queryParameters() { - return parameters; - } + NamedValues queryParameters(); - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java new file mode 100644 index 00000000000..c285af570d4 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +public class RootAggregationExtractContext implements AggregationExtractContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final IndexReader indexReader; + private final FromDocumentValueConvertContext fromDocumentValueConvertContext; + private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; + private final Set routingKeys; + private final QueryParameters parameters; + + public RootAggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + IndexReader indexReader, + FromDocumentValueConvertContext fromDocumentValueConvertContext, + HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.indexReader = indexReader; + this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; + this.multiCollectedResults = multiCollectedResults; + this.routingKeys = routingKeys; + this.parameters = parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return indexReader; + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return fromDocumentValueConvertContext; + } + + @Override + public T getCollectorResults(CollectorKey key) { + return multiCollectedResults.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java new file mode 100644 index 00000000000..39e8fcd3bb6 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.search.common.NamedValues; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +public final class RootAggregationRequestContext implements AggregationRequestContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final Set routingKeys; + private final ExtractionRequirements.Builder extractionRequirementsBuilder; + private final QueryParameters parameters; + + public RootAggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.routingKeys = routingKeys; + this.extractionRequirementsBuilder = extractionRequirementsBuilder; + this.parameters = parameters; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java index 9b84c33d833..9a90f34eaba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java @@ -15,6 +15,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollectorExecutionContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationExtractContext; import org.hibernate.search.backend.lucene.search.extraction.impl.LuceneCollectors; import org.hibernate.search.backend.lucene.search.projection.impl.LuceneSearchProjection; import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext; @@ -112,7 +113,7 @@ private List extractHits(ProjectionHitMapper projectionHitMapper, int } private Map, ?> extractAggregations() throws IOException { - AggregationExtractContext aggregationExtractContext = new AggregationExtractContext( + AggregationExtractContext aggregationExtractContext = new RootAggregationExtractContext( requestContext.getQueryIndexScope(), requestContext.getSessionContext(), indexSearcher.getIndexReader(), diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 2d1fb77dd7b..c5eaaee583f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -20,6 +20,7 @@ import org.hibernate.search.backend.lucene.orchestration.impl.LuceneSyncWorkOrchestrator; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationRequestContext; import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.highlighter.impl.LuceneAbstractSearchHighlighter; import org.hibernate.search.backend.lucene.search.predicate.impl.LuceneSearchPredicate; @@ -265,7 +266,7 @@ public LuceneSearchQuery build() { if ( aggregations != null ) { aggregationExtractors = new LinkedHashMap<>(); AggregationRequestContext aggregationRequestContext = - new AggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, + new RootAggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, parameters ); for ( Map.Entry, LuceneSearchAggregation> entry : aggregations.entrySet() ) { aggregationExtractors.put( entry.getKey(), entry.getValue().request( aggregationRequestContext ) ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index 22aa85ff9e8..964419b735e 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -5,7 +5,7 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CompensatedSumCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -34,9 +34,9 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte compensatedSumCollectorKey = sumCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); - collectorKey = countCollectorFactory.getCollectorKey(); - context.requireCollector( countCollectorFactory ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); + collectorKey = countValuesCollectorFactory.getCollectorKey(); + context.requireCollector( countValuesCollectorFactory ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 589e89ea9a8..597f8d57f9f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -5,8 +5,8 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.AggregationFunctionCollector; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.Count; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValues; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.SumCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; @@ -28,7 +28,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec } // Supplementary collector used by the avg function - protected CollectorKey, Long> countCollectorKey; + protected CollectorKey, Long> countCollectorKey; LuceneAvgNumericFieldAggregation(Builder builder) { super( builder ); @@ -37,11 +37,11 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { SumCollectorFactory sumCollectorFactory = new SumCollectorFactory( source ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); collectorKey = sumCollectorFactory.getCollectorKey(); - countCollectorKey = countCollectorFactory.getCollectorKey(); + countCollectorKey = countValuesCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - context.requireCollector( countCollectorFactory ); + context.requireCollector( countValuesCollectorFactory ); } private static class LuceneNumericMetricFieldAggregationExtraction implements Extractor { diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java index 7e055d18150..f83e3d09acc 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountDistinctCollectorFactory collectorFactory = new CountDistinctCollectorFactory( source ); + CountDistinctValuesCollectorFactory collectorFactory = new CountDistinctValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java new file mode 100644 index 00000000000..657a23bf319 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -0,0 +1,100 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.logging.impl.QueryLog; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDocuemntsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexCompositeNodeContext; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { + + @SuppressWarnings("unchecked") + public static Factory factory() { + return Factory.INSTANCE; + } + + private final Set indexNames; + + LuceneCountDocumentAggregation(Builder builder) { + this.indexNames = builder.scope.hibernateSearchIndexNames(); + } + + @Override + public Extractor request(AggregationRequestContext context) { + CountDocuemntsCollectorFactory collectorFactory = CountDocuemntsCollectorFactory.instance(); + var collectorKey = collectorFactory.getCollectorKey(); + + context.requireCollector( collectorFactory ); + return new CountDocumentsExtractor( collectorKey ); + } + + private record CountDocumentsExtractor(CollectorKey collectorKey) implements Extractor { + + @Override + public Long extract(AggregationExtractContext context) { + return context.getCollectorResults( collectorKey ); + } + } + + @Override + public Set indexNames() { + return indexNames; + } + + protected static class Factory + implements + SearchQueryElementFactory, + LuceneSearchIndexCompositeNodeContext> { + + private static final Factory INSTANCE = new Factory(); + + private Factory() { + } + + @Override + public CountDocumentAggregationBuilder.TypeSelector create(LuceneSearchIndexScope scope, + LuceneSearchIndexCompositeNodeContext node) { + return new TypeSelector( scope ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + protected record TypeSelector(LuceneSearchIndexScope scope) implements CountDocumentAggregationBuilder.TypeSelector { + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope ); + } + } + + public static class Builder implements CountDocumentAggregationBuilder { + + protected final LuceneSearchIndexScope scope; + + public Builder(LuceneSearchIndexScope scope) { + this.scope = scope; + } + + @Override + public LuceneCountDocumentAggregation build() { + return new LuceneCountDocumentAggregation( this ); + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java index 7369a8fd21c..b9128f0eae4 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountCollectorFactory collectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory collectorFactory = new CountValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 4d639ecf19c..845b29480a9 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -4,12 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; +import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; @@ -17,23 +22,35 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; +import org.hibernate.search.engine.search.common.NamedValues; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + /** * @param The type of field values. * @param The type of encoded field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class LuceneNumericRangeAggregation - extends AbstractLuceneBucketAggregation, Long> { +public class LuceneNumericRangeAggregation + extends AbstractLuceneBucketAggregation, V> { + private final LuceneSearchAggregation aggregation; private final AbstractLuceneNumericFieldCodec codec; private final List> rangesInOrder; @@ -41,26 +58,32 @@ public class LuceneNumericRangeAggregation private CollectorKey collectorKey; - private LuceneNumericRangeAggregation(Builder builder) { + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); + this.aggregation = builder.aggregation; this.codec = builder.codec; this.rangesInOrder = builder.rangesInOrder; this.encodedRangesInOrder = builder.encodedRangesInOrder; } @Override - public Extractor, Long>> request(AggregationRequestContext context) { + public Extractor, V>> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( absoluteFieldPath, nestedDocsProvider ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + var rangeFactory = RangeCollectorFactory.instance( source, - codec.getDomain().createEffectiveRanges( encodedRangesInOrder ) ); + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ), + localAggregationContext.localCollectorFactories() ); + collectorKey = rangeFactory.getCollectorKey(); context.requireCollector( rangeFactory ); - return new LuceneNumericRangeAggregationExtractor(); + return new LuceneNumericRangeAggregationExtractor( extractor ); } public static class Factory @@ -78,20 +101,38 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { } } - private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { + private class LuceneNumericRangeAggregationExtractor implements Extractor, V>> { + private final Extractor extractor; + + public LuceneNumericRangeAggregationExtractor(Extractor extractor) { + this.extractor = extractor; + } @Override - public Map, Long> extract(AggregationExtractContext context) { + public Map, V> extract(AggregationExtractContext context) throws IOException { RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); - long[] counts = rangeCollector.counts(); - Map, Long> result = new LinkedHashMap<>(); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), counts[i] ); + localContext.setResults( prepareResults( i, rangeCollector ) ); + result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } + + private Map, Object> prepareResults(int index, RangeCollector rangeCollector) throws IOException { + Map, Object> result = new HashMap<>(); + Collector[][] collectors = rangeCollector.collectors(); + CollectorKey[] collectorKeys = rangeCollector.collectorKeys(); + CollectorManager[] managers = rangeCollector.managers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( List.of( collectors[i][index] ) ) ); + } + return result; + } } public static class TypeSelector implements RangeAggregationBuilder.TypeSelector { @@ -107,28 +148,34 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( codec, - field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), - scope, field ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( + codec, field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), + scope, field + ); } } - public static class Builder - extends AbstractBuilder, Long> - implements RangeAggregationBuilder { + public static class Builder + extends AbstractBuilder, V> + implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function convertAndEncode; - private final List> rangesInOrder = new ArrayList<>(); - private final List> encodedRangesInOrder = new ArrayList<>(); + private final LuceneSearchAggregation aggregation; + private final List> rangesInOrder; + private final List> encodedRangesInOrder; - public Builder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, - LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + protected Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchAggregation aggregation, List> rangesInOrder, List> encodedRangesInOrder) { super( scope, field ); this.codec = codec; this.convertAndEncode = convertAndEncode; + this.aggregation = aggregation; + this.rangesInOrder = rangesInOrder; + this.encodedRangesInOrder = encodedRangesInOrder; } @Override @@ -138,8 +185,101 @@ public void range(Range range) { } @Override - public LuceneNumericRangeAggregation build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, aggregation ), + new ArrayList<>( rangesInOrder ), new ArrayList<>( encodedRangesInOrder ) ); + } + + @Override + public LuceneNumericRangeAggregation build() { return new LuceneNumericRangeAggregation<>( this ); } } + + public static class CountBuilder extends Builder { + + protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + super( scope, field, codec, convertAndEncode, + LuceneSearchAggregation.from( scope, + LuceneCountNumericLongAggregation.factory( codec ).create( scope, field ).build() ), + new ArrayList<>(), new ArrayList<>() ); + } + } + + private static class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + private LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + } + + private static class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + private LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } + } } diff --git a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java index 97c50ce6942..474e09af463 100644 --- a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java +++ b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java @@ -10,8 +10,8 @@ import org.hibernate.search.engine.backend.types.IndexFieldTraits; import org.hibernate.search.engine.search.reference.aggregation.AvgAggregationFieldReference; -import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.CountDistinctValuesAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MaxAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MinAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.RangeAggregationFieldReference; From 44c71882b333c4e626b8af17fe7362f127d81d2e Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Wed, 2 Jul 2025 19:28:56 +0200 Subject: [PATCH 10/23] HSEARCH-3661 Test countValues aggregations on multivalued fields --- .../MetricNumericFieldsAggregationsIT.java | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java index 7b0d00d3b8e..a8c1a9860f7 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java @@ -71,6 +71,8 @@ class MetricNumericFieldsAggregationsIT { private final AggregationKey avgBigIntegers = AggregationKey.of( "avgBigIntegers" ); private final AggregationKey avgBigDecimals = AggregationKey.of( "avgBigDecimals" ); private final AggregationKey countDocuments = AggregationKey.of( "countDocuments" ); + private final AggregationKey countValuesIntegerMultiValued = AggregationKey.of( "countValuesIntegerMultiValued" ); + private final AggregationKey countDistinctValuesIntegerMultiValued = AggregationKey.of( "countDistinctValuesIntegerMultiValued" ); @BeforeEach void setup() { @@ -119,6 +121,8 @@ void test_filteringResults() { assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 580, 2 ) ); assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); + assertThat( result.aggregation( countValuesIntegerMultiValued ) ).isEqualTo( 25 ); + assertThat( result.aggregation( countDistinctValuesIntegerMultiValued ) ).isEqualTo( 3 ); } @Test @@ -162,6 +166,8 @@ void test_allResults() { assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 550, 2 ) ); assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); + assertThat( result.aggregation( countValuesIntegerMultiValued ) ).isEqualTo( 50 ); + assertThat( result.aggregation( countDistinctValuesIntegerMultiValued ) ).isEqualTo( 6 ); } private SearchQuery defineAggregations( @@ -205,6 +211,8 @@ private SearchQuery defineAggregations( .aggregation( avgBigIntegers, f -> f.avg().field( "bigInteger", BigInteger.class ) ) .aggregation( avgBigDecimals, f -> f.avg().field( "bigDecimal", BigDecimal.class ) ) .aggregation( countDocuments, f -> f.countDocuments() ) + .aggregation( countDistinctValuesIntegerMultiValued, f -> f.countDistinctValues().field( "integerMultiValued" ) ) + .aggregation( countValuesIntegerMultiValued, f -> f.countValues().field( "integerMultiValued" ) ) .toQuery(); } @@ -227,6 +235,10 @@ private void initData() { document.addValue( mainIndex.binding().bigDecimal, BigDecimal.valueOf( value ) ); document.addValue( mainIndex.binding().style, style ); + for ( int j = 0; j < 5; j++ ) { + document.addValue( mainIndex.binding().integerMultiValued, value ); + } + DocumentElement object = document.addObject( mainIndex.binding().object ); object.addValue( mainIndex.binding().nestedInteger, value ); } ); @@ -238,6 +250,7 @@ private void initData() { @SuppressWarnings("unused") private static class IndexBinding { final IndexFieldReference integer; + final IndexFieldReference integerMultiValued; final IndexFieldReference converted; final IndexFieldReference doubleF; final IndexFieldReference floatF; @@ -249,6 +262,7 @@ private static class IndexBinding { IndexBinding(IndexSchemaElement root) { integer = root.field( "integer", f -> f.asInteger().aggregable( Aggregable.YES ) ).toReference(); + integerMultiValued = root.field( "integerMultiValued", f -> f.asInteger().aggregable( Aggregable.YES ) ).multiValued().toReference(); converted = root.field( "converted", f -> f.asInteger().aggregable( Aggregable.YES ) .projectionConverter( String.class, (value, context) -> value.toString() ) ).toReference(); doubleF = root.field( "doubleF", f -> f.asDouble().aggregable( Aggregable.YES ) ).toReference(); From 0b703471fc911ed5eaf146f158e77be4f5377ace Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Thu, 3 Jul 2025 11:08:15 +0200 Subject: [PATCH 11/23] HSEARCH-3661 Update the Elasticsearch backend with "new aggregations" --- .../impl/ElasticsearchIndexRootBuilder.java | 3 + ...bstractElasticsearchBucketAggregation.java | 4 +- ...ElasticsearchCountDocumentAggregation.java | 121 ++++++++++++++++++ .../impl/ElasticsearchRangeAggregation.java | 85 ++++++++---- .../impl/ElasticsearchTermsAggregation.java | 2 +- ...lasticsearchSearchQueryExtractContext.java | 4 +- .../dsl/impl/LuceneIndexRootBuilder.java | 2 +- .../impl/LuceneCountDocumentAggregation.java | 1 - .../impl/LuceneNumericRangeAggregation.java | 2 +- .../search/aggregation/AggregationDslIT.java | 71 +++++++++- .../dsl/impl/LuceneIndexRootBuilder.java | 2 +- .../impl/LuceneCountDocumentAggregation.java | 1 - .../impl/LuceneNumericRangeAggregation.java | 2 +- 13 files changed, 265 insertions(+), 35 deletions(-) create mode 100644 backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java index b5f0f8b0250..d1f06b9ac14 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java @@ -26,6 +26,7 @@ import org.hibernate.search.backend.elasticsearch.lowlevel.index.mapping.impl.RoutingType; import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.IndexSettings; import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.PropertyMappingIndexSettingsContributor; +import org.hibernate.search.backend.elasticsearch.search.aggregation.impl.ElasticsearchCountDocumentAggregation; import org.hibernate.search.backend.elasticsearch.types.dsl.ElasticsearchIndexFieldTypeFactory; import org.hibernate.search.backend.elasticsearch.types.dsl.provider.impl.ElasticsearchIndexFieldTypeFactoryProvider; import org.hibernate.search.backend.elasticsearch.types.impl.ElasticsearchIndexCompositeNodeType; @@ -45,6 +46,7 @@ import org.hibernate.search.engine.common.tree.spi.TreeNodeInclusion; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class ElasticsearchIndexRootBuilder extends AbstractElasticsearchIndexCompositeNodeBuilder @@ -84,6 +86,7 @@ public ElasticsearchIndexRootBuilder(ElasticsearchIndexFieldTypeFactoryProvider this.customIndexMapping = customIndexMapping; this.defaultDynamicType = DynamicType.create( dynamicMapping ); + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, ElasticsearchCountDocumentAggregation.factory( false ) ); this.addDefaultImplicitFields(); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java index 3331b0838ac..9042a92d2b2 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java @@ -43,7 +43,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); - doRequest( outerObject, innerObject ); + doRequest( outerObject, innerObject, context ); if ( isNested() ) { JsonObject rootDocCountSubAggregationOuterObject = new JsonObject(); @@ -56,7 +56,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { return outerObject; } - protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject); + protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context); protected final long getBucketDocCount(JsonObject bucket) { if ( isNested() ) { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java new file mode 100644 index 00000000000..af8966b4ff7 --- /dev/null +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; + +import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor; +import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog; +import org.hibernate.search.backend.elasticsearch.logging.impl.QueryLog; +import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexCompositeNodeContext; +import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope; +import org.hibernate.search.backend.elasticsearch.search.query.impl.ElasticsearchSearchQueryExtractContext; +import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +import com.google.gson.JsonObject; + +public class ElasticsearchCountDocumentAggregation extends AbstractElasticsearchAggregation { + + private static final JsonAccessor TOTAL_HITS_VALUE_PROPERTY_ACCESSOR = + JsonAccessor.root().property( "hits" ) + .property( "total" ) + .property( "value" ).asLong(); + + private static final JsonAccessor RESPONSE_DOC_COUNT_ACCESSOR = + JsonAccessor.root().property( "doc_count" ).asLong(); + private static final JsonAccessor RESPONSE_ROOT_DOC_COUNT_ACCESSOR = + JsonAccessor.root().property( "root_doc_count" ).property( "doc_count" ).asLong(); + + public static SearchQueryElementFactory, + ElasticsearchSearchIndexCompositeNodeContext> factory(boolean isNested) { + return new ElasticsearchCountDocumentAggregation.Factory( isNested ); + } + + private final boolean isNested; + + private ElasticsearchCountDocumentAggregation(Builder builder) { + super( builder ); + this.isNested = builder.isNested; + } + + @Override + public Extractor request(AggregationRequestContext context, AggregationKey key, JsonObject jsonAggregations) { + return new CountDocumentsExtractor(isNested); + } + + private record CountDocumentsExtractor(boolean isNested) implements Extractor { + + @Override + public Long extract(JsonObject aggregationResult, AggregationExtractContext context) { + if ( aggregationResult != null ) { + if ( isNested ) { + // We must return the number of root documents, + // not the number of leaf documents that Elasticsearch returns by default. + return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( aggregationResult ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + else { + return RESPONSE_DOC_COUNT_ACCESSOR.get( aggregationResult ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + } + else if ( context instanceof ElasticsearchSearchQueryExtractContext c ) { + return TOTAL_HITS_VALUE_PROPERTY_ACCESSOR.get( c.getResponseBody() ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + throw ElasticsearchClientLog.INSTANCE.elasticsearchResponseMissingData(); + } + } + + private static class Factory + implements + SearchQueryElementFactory, + ElasticsearchSearchIndexCompositeNodeContext> { + private final boolean isNested; + + public Factory(boolean isNested) { + this.isNested = isNested; + } + + @Override + public CountDocumentAggregationBuilder.TypeSelector create(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexCompositeNodeContext node) { + return new ElasticsearchCountDocumentAggregation.TypeSelector( scope, isNested ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + private record TypeSelector(ElasticsearchSearchIndexScope scope, boolean isNested) + implements CountDocumentAggregationBuilder.TypeSelector { + + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope, isNested ); + } + } + + private static class Builder extends AbstractBuilder + implements CountDocumentAggregationBuilder { + private final boolean isNested; + + private Builder(ElasticsearchSearchIndexScope scope, boolean isNested) { + super( scope ); + this.isNested = isNested; + } + + @Override + public ElasticsearchCountDocumentAggregation build() { + return new ElasticsearchCountDocumentAggregation( this ); + } + } +} diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java index 985309b8831..e1c2bfb840e 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java @@ -16,6 +16,7 @@ import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext; import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate; import org.hibernate.search.backend.elasticsearch.types.codec.impl.ElasticsearchFieldCodec; +import org.hibernate.search.engine.search.aggregation.AggregationKey; import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; @@ -30,33 +31,50 @@ /** * @param The type of field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class ElasticsearchRangeAggregation - extends AbstractElasticsearchBucketAggregation, Long> { +public class ElasticsearchRangeAggregation + extends AbstractElasticsearchBucketAggregation, V> { private final String absoluteFieldPath; private final List> rangesInOrder; private final JsonArray rangesJson; - private ElasticsearchRangeAggregation(Builder builder) { + private final ElasticsearchSearchAggregation aggregation; + + private Extractor innerExtractor; + private AggregationKey innerExtractorKey; + + private ElasticsearchRangeAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); this.rangesInOrder = builder.rangesInOrder; this.rangesJson = builder.rangesJson; + this.aggregation = builder.aggregation; } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context) { outerObject.add( "range", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); innerObject.addProperty( "keyed", true ); innerObject.add( "ranges", rangesJson ); + + JsonObject subOuterObject = new JsonObject(); + // this is just a "random name" so we can get the aggregation back from the response. + // once we switch to the "composite aggregation" where we compute multiple aggregations for a range, + // this should be moved into a new "aggregation" that would handle all the logic for adding and then extracting 0-n aggregations. + innerExtractorKey = AggregationKey.of( "agg" ); + innerExtractor = aggregation.request( context, innerExtractorKey, subOuterObject ); + if ( !subOuterObject.isEmpty() ) { + outerObject.add( "aggs", subOuterObject ); + } } @Override - protected Extractor, Long>> extractor(AggregationRequestContext context) { + protected Extractor, V>> extractor(AggregationRequestContext context) { return new RangeBucketExtractor( nestedPathHierarchy, filter, rangesInOrder ); } @@ -84,12 +102,12 @@ private TypeSelector(ElasticsearchSearchIndexScope scope, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( scope, field, field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( scope, field, field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); } } - protected class RangeBucketExtractor extends AbstractBucketExtractor, Long> { + protected class RangeBucketExtractor extends AbstractBucketExtractor, V> { private final List> rangesInOrder; protected RangeBucketExtractor(List nestedPathHierarchy, ElasticsearchSearchPredicate filter, @@ -100,31 +118,52 @@ protected RangeBucketExtractor(List nestedPathHierarchy, ElasticsearchSe @Override - protected Map, Long> doExtract(AggregationExtractContext context, JsonElement buckets) { + protected Map, V> doExtract(AggregationExtractContext context, JsonElement buckets) { JsonObject bucketMap = buckets.getAsJsonObject(); - Map, Long> result = CollectionHelper.newLinkedHashMap( rangesInOrder.size() ); + Map, V> result = CollectionHelper.newLinkedHashMap( rangesInOrder.size() ); for ( int i = 0; i < rangesInOrder.size(); i++ ) { JsonObject bucket = bucketMap.get( String.valueOf( i ) ).getAsJsonObject(); Range range = rangesInOrder.get( i ); - long documentCount = getBucketDocCount( bucket ); - result.put( range, documentCount ); + if ( bucket.has( innerExtractorKey.name() ) ) { + bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); + } + result.put( range, innerExtractor.extract( bucket, context ) ); } return result; } } - private static class Builder extends AbstractBuilder, Long> - implements RangeAggregationBuilder { + public static class CountBuilder extends Builder { - private final Function encoder; + protected CountBuilder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + Function encoder) { + super( scope, field, encoder, new ArrayList<>(), new JsonArray(), + ElasticsearchSearchAggregation.from( scope, + ElasticsearchCountDocumentAggregation.factory(field.nestedPathHierarchy().isEmpty()).create( scope, null ).type().build() ) ); + } + } - private final List> rangesInOrder = new ArrayList<>(); - private final JsonArray rangesJson = new JsonArray(); + private static class Builder extends AbstractBuilder, T> + implements RangeAggregationBuilder { - private Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, - Function encoder) { + private final Function encoder; + + private final List> rangesInOrder; + private final JsonArray rangesJson; + private final ElasticsearchSearchAggregation aggregation; + + protected Builder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + Function encoder, + List> rangesInOrder, + JsonArray rangesJson, + ElasticsearchSearchAggregation aggregation) { super( scope, field ); this.encoder = encoder; + this.rangesInOrder = rangesInOrder; + this.rangesJson = rangesJson; + this.aggregation = aggregation; } @Override @@ -152,14 +191,14 @@ public void range(Range range) { } @Override - public RangeAggregationBuilder withValue(SearchAggregation aggregation) { - throw new UnsupportedOperationException(); + public Builder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, encoder, rangesInOrder, rangesJson, + ElasticsearchSearchAggregation.from( scope, aggregation ) ); } @Override - public ElasticsearchRangeAggregation build() { + public ElasticsearchRangeAggregation build() { return new ElasticsearchRangeAggregation<>( this ); } - } } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java index eb19184b5fa..903a8ccdef6 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java @@ -51,7 +51,7 @@ private ElasticsearchTermsAggregation(Builder builder) { } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context) { outerObject.add( "terms", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); if ( order != null ) { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java index 76ca12c01ca..62a91a01e4f 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java @@ -18,7 +18,7 @@ * The context holding all the useful information pertaining to the extraction of data from * the response to the Elasticsearch search query. */ -class ElasticsearchSearchQueryExtractContext implements AggregationExtractContext { +public class ElasticsearchSearchQueryExtractContext implements AggregationExtractContext { private final ElasticsearchSearchQueryRequestContext requestContext; private final ProjectionHitMapper projectionHitMapper; @@ -41,7 +41,7 @@ public FromDocumentValueConvertContext fromDocumentValueConvertContext() { return fromDocumentValueConvertContext; } - JsonObject getResponseBody() { + public JsonObject getResponseBody() { return responseBody; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index fcdbc7238a3..574815f8fdd 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -58,7 +58,7 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; - typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java index 657a23bf319..b35fd6dc169 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -19,7 +19,6 @@ public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { - @SuppressWarnings("unchecked") public static Factory factory() { return Factory.INSTANCE; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 1558c51efb8..1b1b8964f92 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -202,7 +202,7 @@ protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function scope, LuceneSearchIndexValueFieldContext field) { super( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, - LuceneCountNumericLongAggregation.factory( codec ).create( scope, field ).build() ), + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), new ArrayList<>(), new ArrayList<>() ); } } diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index e32a076b5cf..1c1effe61a9 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -268,6 +268,51 @@ void terms() { } ); } + @Test + void terms_value() { + withinSearchSession( searchSession -> { + // tag::terms-sum[] + AggregationKey> sumByPriceKey = AggregationKey.of( "sumByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + sumByPriceKey, f -> f.terms() + .field( "price", Double.class ) // <1> + .value( f.sum().field( "price", Double.class ) ) + ) + .fetch( 20 ); + Map sumByPrice = result.aggregation( sumByPriceKey ); + // end::terms-sum[] + assertThat( sumByPrice ) + .containsExactly( + entry( 10.0, 7.99 ), + entry( 20.0 , 35.98 ), + entry( null , 24.99 ) + ); + } ); + + withinSearchSession( searchSession -> { + // tag::terms-count[] + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.terms() + .field( "price", Double.class ) // <1> + .value( f.countDocuments() ) // <4> + ) + .fetch( 20 ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + // end::terms-count[] + assertThat( countsByPrice ) + .containsExactly( + entry( Range.canonical( 0.0, 10.0 ), 1L ), + entry( Range.canonical( 10.0, 20.0 ), 2L ), + entry( Range.canonical( 20.0, null ), 1L ) + ); + } ); + } + @Test void range_value() { withinSearchSession( searchSession -> { @@ -281,7 +326,7 @@ void range_value() { .range( 0.0, 10.0 ) // <2> .range( 10.0, 20.0 ) .range( 20.0, null ) // <3> - .value( f.sum().field( "price", Double.class ).toAggregation() ) + .value( f.sum().field( "price", Double.class ) ) ) .fetch( 20 ); Map, Double> countsByPrice = result.aggregation( countsByPriceKey ); @@ -293,6 +338,30 @@ void range_value() { entry( Range.canonical( 20.0, null ), 24.99 ) ); } ); + + withinSearchSession( searchSession -> { + // tag::range-count[] + AggregationKey, Long>> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.range() + .field( "price", Double.class ) // <1> + .range( 0.0, 10.0 ) // <2> + .range( 10.0, 20.0 ) + .range( 20.0, null ) // <3> + .value( f.countDocuments() ) // <4> + ) + .fetch( 20 ); + Map, Long> countsByPrice = result.aggregation( countsByPriceKey ); + // end::range-count[] + assertThat( countsByPrice ) + .containsExactly( + entry( Range.canonical( 0.0, 10.0 ), 1L ), + entry( Range.canonical( 10.0, 20.0 ), 2L ), + entry( Range.canonical( 20.0, null ), 1L ) + ); + } ); } @Test diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index fcdbc7238a3..574815f8fdd 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -58,7 +58,7 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; - typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java index 657a23bf319..b35fd6dc169 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -19,7 +19,6 @@ public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { - @SuppressWarnings("unchecked") public static Factory factory() { return Factory.INSTANCE; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 845b29480a9..6825b163611 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -202,7 +202,7 @@ protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function scope, LuceneSearchIndexValueFieldContext field) { super( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, - LuceneCountNumericLongAggregation.factory( codec ).create( scope, field ).build() ), + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), new ArrayList<>(), new ArrayList<>() ); } } From 876fbf30f42497351e72ef8133cf5ed3006a6272 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 4 Jul 2025 00:07:32 +0200 Subject: [PATCH 12/23] HSEARCH-3661 Make necessary adjustments to the terms aggregations to support values in the Lucene backends --- .../collector/impl/BaseTermsCollector.java | 16 +++ .../collector/impl/NumericTermsCollector.java | 82 ++++++++++++-- .../impl/NumericTermsCollectorFactory.java | 30 ++++- .../impl/NumericTermsCollectorManager.java | 10 +- .../collector/impl/TextTermsCollector.java | 92 ++++++++++++--- .../impl/TextTermsCollectorFactory.java | 29 ++++- .../impl/TextTermsCollectorManager.java | 10 +- ...ractLuceneMultivaluedTermsAggregation.java | 103 ++++++++++++----- .../lucene/types/aggregation/impl/Bucket.java | 2 +- .../types/aggregation/impl/BucketOrder.java | 20 ++-- .../impl/LocalAggregationExtractContext.java | 58 ++++++++++ .../impl/LocalAggregationRequestContext.java | 60 ++++++++++ .../types/aggregation/impl/LongBucket.java | 39 ++++++- .../impl/LuceneNumericRangeAggregation.java | 83 -------------- .../impl/LuceneNumericTermsAggregation.java | 106 ++++++++++++------ .../impl/LuceneTextTermsAggregation.java | 93 ++++++++++----- .../dsl/RangeAggregationRangeValueStep.java | 3 +- .../dsl/TermsAggregationFieldStep.java | 8 +- .../dsl/TermsAggregationRangeValueStep.java | 32 ++++++ .../impl/TermsAggregationFieldStepImpl.java | 6 +- .../impl/TermsAggregationOptionsStepImpl.java | 32 +++--- .../spi/TermsAggregationBuilder.java | 7 +- .../MetricNumericFieldsAggregationsIT.java | 9 +- .../collector/impl/BaseTermsCollector.java | 16 +++ .../collector/impl/NumericTermsCollector.java | 82 ++++++++++++-- .../impl/NumericTermsCollectorFactory.java | 30 ++++- .../impl/NumericTermsCollectorManager.java | 10 +- .../collector/impl/TextTermsCollector.java | 92 ++++++++++++--- .../impl/TextTermsCollectorFactory.java | 29 ++++- .../impl/TextTermsCollectorManager.java | 10 +- ...ractLuceneMultivaluedTermsAggregation.java | 103 ++++++++++++----- .../lucene/types/aggregation/impl/Bucket.java | 2 +- .../types/aggregation/impl/BucketOrder.java | 20 ++-- .../impl/LocalAggregationExtractContext.java | 58 ++++++++++ .../impl/LocalAggregationRequestContext.java | 60 ++++++++++ .../types/aggregation/impl/LongBucket.java | 39 ++++++- .../impl/LuceneNumericRangeAggregation.java | 83 -------------- .../impl/LuceneNumericTermsAggregation.java | 106 ++++++++++++------ .../impl/LuceneTextTermsAggregation.java | 93 ++++++++++----- .../impl/StubSearchAggregation.java | 11 +- 40 files changed, 1286 insertions(+), 488 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java create mode 100644 engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java new file mode 100644 index 00000000000..c139c430de6 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public interface BaseTermsCollector { + + CollectorKey[] keys(); + + CollectorManager[] managers(); + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index 2977d48b37a..02094d1966e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -15,25 +15,36 @@ import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.procedures.LongIntProcedure; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.util.PriorityQueue; -public class NumericTermsCollector extends SimpleCollector { +public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final LongMultiValuesSource valuesSource; - private final LongIntMap hashCounts = new LongIntHashMap(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + private LongMultiValues values; + private LeafReaderContext leafReaderContext; - public NumericTermsCollector(LongMultiValuesSource valuesSource) { + public NumericTermsCollector(LongMultiValuesSource valuesSource, CollectorKey[] keys, + CollectorManager[] managers) { this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override @@ -46,19 +57,24 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextValue(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - hashCounts.addTo( value, 1 ); + SegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new SegmentValue( managers ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); } } } } public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashCounts.size() ); + int size = Math.min( topN, segmentValues.size() ); PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - hashCounts.forEach( (LongIntProcedure) (key, value) -> { - if ( value >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value ) ); + segmentValues.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) ); } } ); @@ -77,13 +93,27 @@ public ScoreMode scoreMode() { } protected void doSetNextReader(LeafReaderContext context) throws IOException { - values = valuesSource.getValues( context ); + this.values = valuesSource.getValues( context ); + this.leafReaderContext = context; + for ( LongObjectCursor value : segmentValues ) { + value.value.resetLeafCollectors( context ); + } } public void finish() { values = null; } + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { private final Comparator comparator; @@ -98,4 +128,32 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) { } } + private class SegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + SegmentValue(CollectorManager[] managers) throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java index a99503ef62a..7c2cc459e44 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -4,26 +4,44 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.apache.lucene.search.CollectorManager; + public class NumericTermsCollectorFactory implements CollectorFactory { public static CollectorFactory instance( - LongMultiValuesSource valuesSource) { - return new NumericTermsCollectorFactory( valuesSource ); + LongMultiValuesSource valuesSource, List> collectorFactories) { + return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + private final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; + private final List> collectorFactories; - public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource) { + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource, + List> collectorFactories) { this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "unchecked" }) @Override - public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new NumericTermsCollectorManager( valuesSource ); + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new NumericTermsCollectorManager( valuesSource, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java index afdeab71207..affb41d99ed 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -8,20 +8,26 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public NumericTermsCollectorManager(LongMultiValuesSource valuesSource) { + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override public NumericTermsCollector newCollector() { - return new NumericTermsCollector( valuesSource ); + return new NumericTermsCollector( valuesSource, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index c6efffdd032..bfcd753303a 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -15,34 +15,43 @@ import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongIntProcedure; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.util.PriorityQueue; -public class TextTermsCollector extends SimpleCollector { +public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final TextMultiValuesSource valuesSource; - private final LongIntMap hashCounts = new LongIntHashMap(); - private final LongIntMap segmentCounts = new LongIntHashMap(); + private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final String field; private SortedSetDocValues sortedSetValues; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + private TextMultiValues values; + private LeafReaderContext leafReaderContext; - public TextTermsCollector(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollector(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.field = field; this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override @@ -55,19 +64,24 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextOrd(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - segmentCounts.addTo( value, 1 ); + SegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new SegmentValue( managers ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); } } } } - public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashCounts.size() ); + public List results(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashValues.size() ); PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - hashCounts.forEach( (LongIntProcedure) (key, value) -> { - if ( value >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value ) ); + hashValues.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); } } ); @@ -80,6 +94,16 @@ public List counts(BucketOrder order, int topN, int minDocCount) { return buckets; } + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -88,17 +112,28 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { initRootSortedSetDocValues( context ); this.values = valuesSource.getValues( context ); + leafReaderContext = context; } public void finish() throws IOException { - for ( LongIntCursor hashCount : segmentCounts ) { - hashCounts.addTo( sortedSetValues.lookupTerm( values.lookupOrd( hashCount.key ) ), hashCount.value ); + for ( LongObjectCursor value : segmentValues ) { + long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); + LongBucket bucket = hashValues.get( globalOrd ); + if ( bucket == null ) { + bucket = new LongBucket( globalOrd, value.value.collectors, value.value.count ); + hashValues.put( globalOrd, bucket ); + } + else { + bucket.count += value.value.count; + for ( int i = 0; i < bucket.collectors.length; i++ ) { + bucket.collectors[i].add( value.value.collectors[i] ); + } + } } this.values = null; - this.segmentCounts.clear(); + this.segmentValues.clear(); } - private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { if ( sortedSetValues != null || ctx == null ) { return; @@ -123,4 +158,25 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) { } } + private class SegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + public SegmentValue(CollectorManager[] managers) throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + public void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java index 11489538470..65d2aded89f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -4,29 +4,46 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.apache.lucene.search.CollectorManager; + public class TextTermsCollectorFactory implements CollectorFactory { public static CollectorFactory instance( - String field, TextMultiValuesSource valuesSource) { - return new TextTermsCollectorFactory( field, valuesSource ); + String field, TextMultiValuesSource valuesSource, List> collectorFactories) { + return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); private final TextMultiValuesSource valuesSource; private final String field; + private final List> collectorFactories; - public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource, + List> collectorFactories) { this.field = field; this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "unchecked" }) @Override - public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new TextTermsCollectorManager( field, valuesSource ); + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new TextTermsCollectorManager( field, valuesSource, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java index b309f509b05..9609e7aca82 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -8,6 +8,7 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorManager @@ -15,15 +16,20 @@ public class TextTermsCollectorManager private final TextMultiValuesSource valuesSource; private final String field; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.field = field; this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override public TextTermsCollector newCollector() { - return new TextTermsCollector( field, valuesSource ); + return new TextTermsCollector( field, valuesSource, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index b80563a010d..60ef17c2109 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -6,13 +6,16 @@ import java.io.IOException; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; @@ -21,6 +24,8 @@ import org.hibernate.search.engine.search.common.ValueModel; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; /** * @param The type of field values exposed to the mapper. @@ -28,42 +33,64 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public abstract class AbstractLuceneMultivaluedTermsAggregation - extends AbstractLuceneBucketAggregation { +public abstract class AbstractLuceneMultivaluedTermsAggregation + extends AbstractLuceneBucketAggregation { protected final ProjectionConverter fromFieldValueConverter; protected final BucketOrder order; protected final int maxTermCount; protected final int minDocCount; + protected final LuceneSearchAggregation aggregation; - AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { super( builder ); this.fromFieldValueConverter = builder.fromFieldValueConverter; this.order = builder.order; this.maxTermCount = builder.maxTermCount; this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; } - protected abstract Extractor> extractor(AggregationRequestContext context); + protected abstract class AbstractExtractor implements Extractor> { + protected final Extractor extractor; + + protected AbstractExtractor(Extractor extractor) { + this.extractor = extractor; + } - protected abstract class AbstractExtractor implements Extractor> { @Override - public final Map extract(AggregationExtractContext context) throws IOException { - List> buckets = getTopBuckets( context ); + public final Map extract(AggregationExtractContext context) throws IOException { + List> buckets = getTopBuckets( context ); if ( minDocCount == 0 && buckets.size() < maxTermCount ) { Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - for ( Bucket bucket : buckets ) { + for ( Bucket bucket : buckets ) { firstTerms.remove( bucket.term() ); } - firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0 ) ) ); + R zeroValue = createZeroValue( context ); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0, zeroValue ) ) ); buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); } return toMap( context.fromDocumentValueConvertContext(), buckets ); } + protected abstract BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException; + + protected R createZeroValue(AggregationExtractContext context) throws IOException { + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + var termsCollector = termsCollector( context ); + CollectorManager[] managers = termsCollector.managers(); + CollectorKey[] keys = termsCollector.keys(); + HashMap, Object> results = new HashMap<>(); + for ( int i = 0; i < keys.length; i++ ) { + results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); + } + localContext.setResults( results ); + return extractor.extract( localContext ); + } + abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException; @@ -71,20 +98,32 @@ abstract Set collectFirstTerms(IndexReader reader, boolean descending, int li abstract V termToFieldValue(T key); - abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; - private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { - Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering - for ( Bucket bucket : buckets ) { + private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { + Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering + for ( Bucket bucket : buckets ) { V decoded = termToFieldValue( bucket.term() ); K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count() ); + result.put( key, bucket.value() ); + } + return result; + } + + protected Map, Object> prepareResults(LongBucket bucket, BaseTermsCollector termsCollector) + throws IOException { + Map, Object> result = new HashMap<>(); + List[] collectors = bucket.collectors; + CollectorKey[] collectorKeys = termsCollector.keys(); + CollectorManager[] managers = termsCollector.managers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); } return result; } } - abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { + abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { protected final LuceneSearchIndexScope scope; protected final LuceneSearchIndexValueFieldContext field; @@ -94,23 +133,33 @@ protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchInde } @Override - public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); + public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); } - abstract static class AbstractBuilder - extends AbstractLuceneBucketAggregation.AbstractBuilder - implements TermsAggregationBuilder { + abstract static class AbstractBuilder + extends AbstractLuceneBucketAggregation.AbstractBuilder + implements TermsAggregationBuilder { - private final ProjectionConverter fromFieldValueConverter; - - private BucketOrder order = BucketOrder.COUNT_DESC; - private int minDocCount = 1; - private int maxTermCount = 100; + protected final LuceneSearchAggregation aggregation; + protected final ProjectionConverter fromFieldValueConverter; + protected BucketOrder order; + protected int minDocCount; + protected int maxTermCount; AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + this( scope, field, aggregation, fromFieldValueConverter, BucketOrder.COUNT_DESC, 1, 100 ); + } + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { super( scope, field ); + this.aggregation = aggregation; this.fromFieldValueConverter = fromFieldValueConverter; + this.order = order; + this.minDocCount = minDocCount; + this.maxTermCount = maxTermCount; } @Override @@ -144,7 +193,7 @@ public void maxTermCount(int maxTermCount) { } @Override - public abstract AbstractLuceneMultivaluedTermsAggregation build(); + public abstract AbstractLuceneMultivaluedTermsAggregation build(); protected final void order(BucketOrder order) { this.order = order; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index ce9b0a13ce3..7f01fc781ba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,5 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -public record Bucket(F term, long count) { +public record Bucket(F term, long count, R value) { } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 31233e5ad39..320fba0f177 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -9,7 +9,7 @@ public enum BucketOrder { COUNT_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { @@ -22,12 +22,12 @@ Comparator> toBucketComparatorInternal(Comparator termComparato @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::term ); + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::termOrd ); } }, COUNT_DESC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { @@ -45,20 +45,20 @@ Comparator toLongBucketComparatorInternal() { if ( order != 0 ) { return order; } - order = Long.compare( left.term(), right.term() ); + order = Long.compare( left.termOrd(), right.termOrd() ); return order; }; } }, TERM_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> termComparator.compare( left.term(), right.term() ); } @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::term ); + return Comparator.comparingLong( LongBucket::termOrd ); } }, TERM_DESC { @@ -68,17 +68,17 @@ boolean isTermOrderDescending() { } @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> termComparator.compare( left.term(), right.term() ); } @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::term ).reversed(); + return Comparator.comparingLong( LongBucket::termOrd ).reversed(); } }; - public Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } @@ -87,7 +87,7 @@ public Comparator toLongBucketComparator() { return toLongBucketComparatorInternal(); } - abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); abstract Comparator toLongBucketComparatorInternal(); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java new file mode 100644 index 00000000000..2882036b99c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Map; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java new file mode 100644 index 00000000000..97dc028f22a --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.search.common.NamedValues; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + + public AggregationRequestContext actualContext() { + return delegate; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java index 4d980d2c5ab..b76319c716f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -4,5 +4,42 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -public record LongBucket(long term, long count) { +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.search.Collector; + +public class LongBucket { + public final long termOrd; + public final List[] collectors; + public long count; + + @SuppressWarnings("unchecked") + public LongBucket(long termOrd, Collector[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = new List[collectors.length]; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i] = new ArrayList<>(); + this.collectors[i].add( collectors[i] ); + } + this.count = count; + } + + public long count() { + return count; + } + + public long termOrd() { + return termOrd; + } + + @Override + public String toString() { + return "LongBucket{" + + "termOrd=" + termOrd + + ", count=" + count + + ", collectors=" + Arrays.toString( collectors ) + + '}'; + } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 1b1b8964f92..e9ebf2f5a85 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -8,13 +8,10 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; @@ -26,19 +23,14 @@ import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; -import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; -import org.hibernate.search.engine.search.common.NamedValues; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.Query; /** * @param The type of field values. @@ -207,79 +199,4 @@ protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function> localCollectorFactories = new LinkedHashSet<>(); - - private LocalAggregationRequestContext(AggregationRequestContext delegate) { - this.delegate = delegate; - } - - @Override - public > void requireCollector( - CollectorFactory collectorFactory) { - localCollectorFactories.add( collectorFactory ); - } - - @Override - public NamedValues queryParameters() { - return delegate.queryParameters(); - } - - @Override - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return delegate.toPredicateRequestContext( absolutePath ); - } - - @Override - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); - } - - public List> localCollectorFactories() { - return new ArrayList<>( localCollectorFactories ); - } - } - - private static class LocalAggregationExtractContext implements AggregationExtractContext { - - private final AggregationExtractContext delegate; - - private Map, Object> results; - - private LocalAggregationExtractContext(AggregationExtractContext delegate) { - this.delegate = delegate; - } - - @Override - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return delegate.toPredicateRequestContext( absolutePath ); - } - - @Override - public IndexReader getIndexReader() { - return delegate.getIndexReader(); - } - - @Override - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return delegate.fromDocumentValueConvertContext(); - } - - @SuppressWarnings("unchecked") - @Override - public T getCollectorResults(CollectorKey key) { - return (T) results.get( key ); - } - - @Override - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); - } - - public void setResults(Map, Object> results) { - this.results = results; - } - } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index 5ec21a7054f..96b7a0ff2c2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -14,6 +14,7 @@ import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; @@ -21,12 +22,14 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; @@ -35,15 +38,14 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; /** * @param The type of field values. * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class LuceneNumericTermsAggregation - extends AbstractLuceneMultivaluedTermsAggregation { +public class LuceneNumericTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; @@ -51,7 +53,7 @@ public class LuceneNumericTermsAggregation private final Function decoder; private CollectorKey collectorKey; - private LuceneNumericTermsAggregation(Builder builder) { + private LuceneNumericTermsAggregation(Builder builder) { super( builder ); this.numericDomain = builder.codec.getDomain(); this.termComparator = numericDomain.createComparator(); @@ -59,21 +61,20 @@ private LuceneNumericTermsAggregation(Builder builder) { } @Override - public Extractor> request(AggregationRequestContext context) { + public Extractor> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( absoluteFieldPath, nestedDocsProvider ); - var termsCollectorFactory = NumericTermsCollectorFactory.instance( source ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = + NumericTermsCollectorFactory.instance( source, localAggregationContext.localCollectorFactories() ); context.requireCollector( termsCollectorFactory ); collectorKey = termsCollectorFactory.getCollectorKey(); - return extractor( context ); - } - - @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneNumericTermsAggregationExtractor(); + return new LuceneNumericTermsAggregationExtractor( extractor ); } public static class Factory @@ -94,6 +95,15 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + private LuceneNumericTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + + @Override + protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + @Override Comparator getAscendingTermComparator() { return termComparator; @@ -105,13 +115,22 @@ V termToFieldValue(E key) { } @Override - List> getTopBuckets(AggregationExtractContext context) { + List> getTopBuckets(AggregationExtractContext context) throws IOException { var termsCollector = context.getCollectorResults( collectorKey ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); - List> buckets = new ArrayList<>(); + List> buckets = new ArrayList<>(); for ( LongBucket bucket : counts ) { - buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.term() ), bucket.count() ) ); + localContext.setResults( prepareResults( bucket, termsCollector ) ); + buckets.add( + new Bucket<>( + numericDomain.sortedDocValueToTerm( bucket.termOrd() ), + bucket.count(), + extractor.extract( localContext ) + ) + ); } return buckets; } @@ -142,7 +161,7 @@ Set collectFirstTerms(IndexReader reader, boolean descending, int limit) } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private final AbstractLuceneNumericFieldCodec codec; private TypeSelector(AbstractLuceneNumericFieldCodec codec, @@ -153,16 +172,16 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ), Function.identity() ); } else { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ), codec::decode ); @@ -170,37 +189,52 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { + + private CountBuilder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + ProjectionConverter fromFieldValueConverter, + Function decoder) { + super( codec, scope, field, LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), fromFieldValueConverter, + decoder ); + } + } + + private static class Builder + extends AbstractBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function decoder; - public Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, - LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter, + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, LuceneSearchAggregation aggregation, + ProjectionConverter fromFieldValueConverter, Function decoder) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, aggregation, fromFieldValueConverter ); this.codec = codec; this.decoder = decoder; } - @Override - public LuceneNumericTermsAggregation build() { - return new LuceneNumericTermsAggregation<>( this ); + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + Function decoder, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); + this.codec = codec; + this.decoder = decoder; } - } - private static class HibernateSearchTermsQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchTermsQueue(Comparator comparator, int maxSize) { - super( maxSize ); - this.comparator = comparator; + @Override + public LuceneNumericTermsAggregation build() { + return new LuceneNumericTermsAggregation<>( this ); } @Override - protected boolean lessThan(E t1, E t2) { - return comparator.compare( t1, t2 ) > 0; + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( codec, scope, field, LuceneSearchAggregation.from( scope, aggregation ), + fromFieldValueConverter, decoder, order, minDocCount, maxTermCount ); } } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index bf20eff0559..8d305759230 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -12,6 +12,7 @@ import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; @@ -19,10 +20,12 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; @@ -36,37 +39,46 @@ * @param The type of keys in the returned map. It can be {@code String} * or a different type if value converters are used. */ -public class LuceneTextTermsAggregation - extends AbstractLuceneMultivaluedTermsAggregation { +public class LuceneTextTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); private CollectorKey collectorKey; - private LuceneTextTermsAggregation(Builder builder) { + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } @Override - public Extractor> request(AggregationRequestContext context) { + public Extractor> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( absoluteFieldPath, nestedDocsProvider ); - var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source, + localAggregationContext.localCollectorFactories() ); context.requireCollector( termsCollectorFactory ); collectorKey = termsCollectorFactory.getCollectorKey(); - return extractor( context ); - } - - @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneTextTermsAggregationExtractor(); + return new LuceneTextTermsAggregationExtractor( extractor ); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { + private LuceneTextTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + + @Override + protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + @Override Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { @@ -106,15 +118,24 @@ String termToFieldValue(String key) { } @Override - List> getTopBuckets(AggregationExtractContext context) throws IOException { + List> getTopBuckets(AggregationExtractContext context) throws IOException { var termsCollector = context.getCollectorResults( collectorKey ); - List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List results = termsCollector.results( order, maxTermCount, minDocCount ); var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); - List> buckets = new ArrayList<>(); - for ( LongBucket bucket : counts ) { - buckets.add( new Bucket<>( dv.lookupOrd( bucket.term() ).utf8ToString(), bucket.count() ) ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : results ) { + localContext.setResults( prepareResults( bucket, termsCollector ) ); + buckets.add( + new Bucket<>( + dv.lookupOrd( bucket.termOrd() ).utf8ToString(), + bucket.count(), + extractor.extract( localContext ) + ) + ); } return buckets; } @@ -128,40 +149,62 @@ public TypeSelector create(LuceneSearchIndexScope scope, LuceneSearchIndexVal } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private TypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { super( scope, field ); } @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ) ); } else { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { - private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + private CountBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + fromFieldValueConverter ); + } + } + + private static class Builder + extends AbstractBuilder { + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + super( scope, field, aggregation, fromFieldValueConverter ); + } + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); } @Override - public LuceneTextTermsAggregation build() { + public LuceneTextTermsAggregation build() { return new LuceneTextTermsAggregation<>( this ); } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, LuceneSearchAggregation.from( scope, aggregation ), fromFieldValueConverter, + order, minDocCount, maxTermCount ); + } } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java index 503c3a090db..963796503f4 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java @@ -15,8 +15,7 @@ * The step in a "range" aggregation definition where the aggregation value for the range can be set. * * @param Scope root type. - * @param The "self" type (the actual exposed type of this step). - * @param The type of factory used to create predicates in {@link #filter(Function)}. + * @param The type of factory used to create predicates in {@link RangeAggregationOptionsStep#filter(Function)}. * @param The type of the targeted field. */ public interface RangeAggregationRangeValueStep< diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java index 6d4ae9fd595..249f885b62c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java @@ -27,7 +27,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationOptionsStep> field(String fieldPath, Class type) { + default TermsAggregationRangeValueStep> field(String fieldPath, Class type) { return field( fieldPath, type, ValueModel.MAPPING ); } @@ -43,7 +43,7 @@ public interface TermsAggregationFieldStep TermsAggregationOptionsStep> field(String fieldPath, Class type, + default TermsAggregationRangeValueStep> field(String fieldPath, Class type, org.hibernate.search.engine.search.common.ValueConvert convert) { return field( fieldPath, type, org.hibernate.search.engine.search.common.ValueConvert.toValueModel( convert ) ); @@ -59,7 +59,7 @@ public interface TermsAggregationFieldStep TermsAggregationOptionsStep> field(String fieldPath, Class type, + TermsAggregationRangeValueStep> field(String fieldPath, Class type, ValueModel valueModel); /** @@ -69,7 +69,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationOptionsStep> field( + default TermsAggregationRangeValueStep> field( TermsAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath(), fieldReference.aggregationType(), fieldReference.valueModel() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java new file mode 100644 index 00000000000..316cad75133 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import java.util.Map; +import java.util.function.Function; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; + +/** + * The step in a "terms" aggregation definition where the aggregation value for the term can be set. + * + * @param Scope root type. + * @param The type of factory used to create predicates in {@link TermsAggregationOptionsStep#filter(Function)}. + * @param The type of the targeted field. + */ +public interface TermsAggregationRangeValueStep< + SR, + S extends TermsAggregationOptionsStep, + PDF extends TypedSearchPredicateFactory, + F, + A> extends TermsAggregationOptionsStep { + + TermsAggregationOptionsStep> value(SearchAggregation aggregation); + + default TermsAggregationOptionsStep> value(AggregationFinalStep aggregation) { + return value( aggregation.toAggregation() ); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java index 6f4d7ac29af..d3f5fd9f342 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java @@ -7,7 +7,7 @@ import java.util.Map; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationRangeValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; @@ -24,11 +24,11 @@ public TermsAggregationFieldStepImpl(SearchAggregationDslContext TermsAggregationOptionsStep> field(String fieldPath, Class type, + public TermsAggregationRangeValueStep> field(String fieldPath, Class type, ValueModel valueModel) { Contracts.assertNotNull( fieldPath, "fieldPath" ); Contracts.assertNotNull( type, "type" ); - TermsAggregationBuilder builder = dslContext.scope() + TermsAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.TERMS ).type( type, valueModel ); return new TermsAggregationOptionsStepImpl<>( builder, dslContext ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java index 687a58cb583..634417581d7 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java @@ -9,6 +9,7 @@ import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationRangeValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; @@ -16,57 +17,57 @@ import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; import org.hibernate.search.util.common.impl.Contracts; -class TermsAggregationOptionsStepImpl, F> - implements TermsAggregationOptionsStep, PDF, F, Map> { - private final TermsAggregationBuilder builder; +class TermsAggregationOptionsStepImpl, F, V> + implements TermsAggregationRangeValueStep, PDF, F, Map> { + private final TermsAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - TermsAggregationOptionsStepImpl(TermsAggregationBuilder builder, + TermsAggregationOptionsStepImpl(TermsAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public TermsAggregationOptionsStepImpl orderByCountDescending() { + public TermsAggregationOptionsStepImpl orderByCountDescending() { builder.orderByCountDescending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByCountAscending() { + public TermsAggregationOptionsStepImpl orderByCountAscending() { builder.orderByCountAscending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByTermAscending() { + public TermsAggregationOptionsStepImpl orderByTermAscending() { builder.orderByTermAscending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByTermDescending() { + public TermsAggregationOptionsStepImpl orderByTermDescending() { builder.orderByTermDescending(); return this; } @Override - public TermsAggregationOptionsStepImpl minDocumentCount(int minDocumentCount) { + public TermsAggregationOptionsStepImpl minDocumentCount(int minDocumentCount) { Contracts.assertPositiveOrZero( minDocumentCount, "minDocumentCount" ); builder.minDocumentCount( minDocumentCount ); return this; } @Override - public TermsAggregationOptionsStepImpl maxTermCount(int maxTermCount) { + public TermsAggregationOptionsStepImpl maxTermCount(int maxTermCount) { Contracts.assertStrictlyPositive( maxTermCount, "maxTermCount" ); builder.maxTermCount( maxTermCount ); return this; } @Override - public TermsAggregationOptionsStepImpl filter( + public TermsAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); @@ -74,13 +75,18 @@ public TermsAggregationOptionsStepImpl filter( } @Override - public TermsAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public TermsAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } @Override - public SearchAggregation> toAggregation() { + public SearchAggregation> toAggregation() { return builder.build(); } + + @Override + public TermsAggregationOptionsStep> value(SearchAggregation aggregation) { + return new TermsAggregationOptionsStepImpl<>( builder.withValue( aggregation ), dslContext ); + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java index 40414b5cd92..f6ca70c19ba 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java @@ -6,13 +6,14 @@ import java.util.Map; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.SearchPredicate; -public interface TermsAggregationBuilder extends SearchAggregationBuilder> { +public interface TermsAggregationBuilder extends SearchAggregationBuilder> { interface TypeSelector { - TermsAggregationBuilder type(Class expectedType, ValueModel valueModel); + TermsAggregationBuilder type(Class expectedType, ValueModel valueModel); } void filter(SearchPredicate filter); @@ -29,4 +30,6 @@ interface TypeSelector { void maxTermCount(int maxTermCount); + TermsAggregationBuilder withValue(SearchAggregation aggregation); + } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java index a8c1a9860f7..0d6c297aefe 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java @@ -72,7 +72,8 @@ class MetricNumericFieldsAggregationsIT { private final AggregationKey avgBigDecimals = AggregationKey.of( "avgBigDecimals" ); private final AggregationKey countDocuments = AggregationKey.of( "countDocuments" ); private final AggregationKey countValuesIntegerMultiValued = AggregationKey.of( "countValuesIntegerMultiValued" ); - private final AggregationKey countDistinctValuesIntegerMultiValued = AggregationKey.of( "countDistinctValuesIntegerMultiValued" ); + private final AggregationKey countDistinctValuesIntegerMultiValued = + AggregationKey.of( "countDistinctValuesIntegerMultiValued" ); @BeforeEach void setup() { @@ -211,7 +212,8 @@ private SearchQuery defineAggregations( .aggregation( avgBigIntegers, f -> f.avg().field( "bigInteger", BigInteger.class ) ) .aggregation( avgBigDecimals, f -> f.avg().field( "bigDecimal", BigDecimal.class ) ) .aggregation( countDocuments, f -> f.countDocuments() ) - .aggregation( countDistinctValuesIntegerMultiValued, f -> f.countDistinctValues().field( "integerMultiValued" ) ) + .aggregation( countDistinctValuesIntegerMultiValued, + f -> f.countDistinctValues().field( "integerMultiValued" ) ) .aggregation( countValuesIntegerMultiValued, f -> f.countValues().field( "integerMultiValued" ) ) .toQuery(); } @@ -262,7 +264,8 @@ private static class IndexBinding { IndexBinding(IndexSchemaElement root) { integer = root.field( "integer", f -> f.asInteger().aggregable( Aggregable.YES ) ).toReference(); - integerMultiValued = root.field( "integerMultiValued", f -> f.asInteger().aggregable( Aggregable.YES ) ).multiValued().toReference(); + integerMultiValued = root.field( "integerMultiValued", f -> f.asInteger().aggregable( Aggregable.YES ) ) + .multiValued().toReference(); converted = root.field( "converted", f -> f.asInteger().aggregable( Aggregable.YES ) .projectionConverter( String.class, (value, context) -> value.toString() ) ).toReference(); doubleF = root.field( "doubleF", f -> f.asDouble().aggregable( Aggregable.YES ) ).toReference(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java new file mode 100644 index 00000000000..c139c430de6 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public interface BaseTermsCollector { + + CollectorKey[] keys(); + + CollectorManager[] managers(); + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index 2977d48b37a..02094d1966e 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -15,25 +15,36 @@ import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.procedures.LongIntProcedure; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.util.PriorityQueue; -public class NumericTermsCollector extends SimpleCollector { +public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final LongMultiValuesSource valuesSource; - private final LongIntMap hashCounts = new LongIntHashMap(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + private LongMultiValues values; + private LeafReaderContext leafReaderContext; - public NumericTermsCollector(LongMultiValuesSource valuesSource) { + public NumericTermsCollector(LongMultiValuesSource valuesSource, CollectorKey[] keys, + CollectorManager[] managers) { this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override @@ -46,19 +57,24 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextValue(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - hashCounts.addTo( value, 1 ); + SegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new SegmentValue( managers ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); } } } } public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashCounts.size() ); + int size = Math.min( topN, segmentValues.size() ); PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - hashCounts.forEach( (LongIntProcedure) (key, value) -> { - if ( value >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value ) ); + segmentValues.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) ); } } ); @@ -77,13 +93,27 @@ public ScoreMode scoreMode() { } protected void doSetNextReader(LeafReaderContext context) throws IOException { - values = valuesSource.getValues( context ); + this.values = valuesSource.getValues( context ); + this.leafReaderContext = context; + for ( LongObjectCursor value : segmentValues ) { + value.value.resetLeafCollectors( context ); + } } public void finish() { values = null; } + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { private final Comparator comparator; @@ -98,4 +128,32 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) { } } + private class SegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + SegmentValue(CollectorManager[] managers) throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + } + } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java index a99503ef62a..7c2cc459e44 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -4,26 +4,44 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.apache.lucene.search.CollectorManager; + public class NumericTermsCollectorFactory implements CollectorFactory { public static CollectorFactory instance( - LongMultiValuesSource valuesSource) { - return new NumericTermsCollectorFactory( valuesSource ); + LongMultiValuesSource valuesSource, List> collectorFactories) { + return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + private final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; + private final List> collectorFactories; - public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource) { + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource, + List> collectorFactories) { this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "unchecked" }) @Override - public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new NumericTermsCollectorManager( valuesSource ); + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new NumericTermsCollectorManager( valuesSource, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java index afdeab71207..affb41d99ed 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -8,20 +8,26 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public NumericTermsCollectorManager(LongMultiValuesSource valuesSource) { + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override public NumericTermsCollector newCollector() { - return new NumericTermsCollector( valuesSource ); + return new NumericTermsCollector( valuesSource, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index c6efffdd032..bfcd753303a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -15,34 +15,43 @@ import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongIntProcedure; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.util.PriorityQueue; -public class TextTermsCollector extends SimpleCollector { +public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final TextMultiValuesSource valuesSource; - private final LongIntMap hashCounts = new LongIntHashMap(); - private final LongIntMap segmentCounts = new LongIntHashMap(); + private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final String field; private SortedSetDocValues sortedSetValues; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + private TextMultiValues values; + private LeafReaderContext leafReaderContext; - public TextTermsCollector(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollector(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.field = field; this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override @@ -55,19 +64,24 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextOrd(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - segmentCounts.addTo( value, 1 ); + SegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new SegmentValue( managers ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); } } } } - public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashCounts.size() ); + public List results(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, hashValues.size() ); PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - hashCounts.forEach( (LongIntProcedure) (key, value) -> { - if ( value >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value ) ); + hashValues.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); } } ); @@ -80,6 +94,16 @@ public List counts(BucketOrder order, int topN, int minDocCount) { return buckets; } + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -88,17 +112,28 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { initRootSortedSetDocValues( context ); this.values = valuesSource.getValues( context ); + leafReaderContext = context; } public void finish() throws IOException { - for ( LongIntCursor hashCount : segmentCounts ) { - hashCounts.addTo( sortedSetValues.lookupTerm( values.lookupOrd( hashCount.key ) ), hashCount.value ); + for ( LongObjectCursor value : segmentValues ) { + long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); + LongBucket bucket = hashValues.get( globalOrd ); + if ( bucket == null ) { + bucket = new LongBucket( globalOrd, value.value.collectors, value.value.count ); + hashValues.put( globalOrd, bucket ); + } + else { + bucket.count += value.value.count; + for ( int i = 0; i < bucket.collectors.length; i++ ) { + bucket.collectors[i].add( value.value.collectors[i] ); + } + } } this.values = null; - this.segmentCounts.clear(); + this.segmentValues.clear(); } - private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { if ( sortedSetValues != null || ctx == null ) { return; @@ -123,4 +158,25 @@ protected boolean lessThan(LongBucket t1, LongBucket t2) { } } + private class SegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + public SegmentValue(CollectorManager[] managers) throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + public void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java index 11489538470..65d2aded89f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -4,29 +4,46 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; +import java.util.List; + import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.apache.lucene.search.CollectorManager; + public class TextTermsCollectorFactory implements CollectorFactory { public static CollectorFactory instance( - String field, TextMultiValuesSource valuesSource) { - return new TextTermsCollectorFactory( field, valuesSource ); + String field, TextMultiValuesSource valuesSource, List> collectorFactories) { + return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); private final TextMultiValuesSource valuesSource; private final String field; + private final List> collectorFactories; - public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource, + List> collectorFactories) { this.field = field; this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; } + @SuppressWarnings({ "unchecked" }) @Override - public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new TextTermsCollectorManager( field, valuesSource ); + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new TextTermsCollectorManager( field, valuesSource, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java index b309f509b05..9609e7aca82 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -8,6 +8,7 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorManager @@ -15,15 +16,20 @@ public class TextTermsCollectorManager private final TextMultiValuesSource valuesSource; private final String field; + private final CollectorKey[] keys; + private final CollectorManager[] managers; - public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource) { + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { this.field = field; this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; } @Override public TextTermsCollector newCollector() { - return new TextTermsCollector( field, valuesSource ); + return new TextTermsCollector( field, valuesSource, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index b80563a010d..60ef17c2109 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -6,13 +6,16 @@ import java.io.IOException; import java.util.Comparator; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; @@ -21,6 +24,8 @@ import org.hibernate.search.engine.search.common.ValueModel; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; /** * @param The type of field values exposed to the mapper. @@ -28,42 +33,64 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public abstract class AbstractLuceneMultivaluedTermsAggregation - extends AbstractLuceneBucketAggregation { +public abstract class AbstractLuceneMultivaluedTermsAggregation + extends AbstractLuceneBucketAggregation { protected final ProjectionConverter fromFieldValueConverter; protected final BucketOrder order; protected final int maxTermCount; protected final int minDocCount; + protected final LuceneSearchAggregation aggregation; - AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { super( builder ); this.fromFieldValueConverter = builder.fromFieldValueConverter; this.order = builder.order; this.maxTermCount = builder.maxTermCount; this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; } - protected abstract Extractor> extractor(AggregationRequestContext context); + protected abstract class AbstractExtractor implements Extractor> { + protected final Extractor extractor; + + protected AbstractExtractor(Extractor extractor) { + this.extractor = extractor; + } - protected abstract class AbstractExtractor implements Extractor> { @Override - public final Map extract(AggregationExtractContext context) throws IOException { - List> buckets = getTopBuckets( context ); + public final Map extract(AggregationExtractContext context) throws IOException { + List> buckets = getTopBuckets( context ); if ( minDocCount == 0 && buckets.size() < maxTermCount ) { Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - for ( Bucket bucket : buckets ) { + for ( Bucket bucket : buckets ) { firstTerms.remove( bucket.term() ); } - firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0 ) ) ); + R zeroValue = createZeroValue( context ); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0, zeroValue ) ) ); buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); } return toMap( context.fromDocumentValueConvertContext(), buckets ); } + protected abstract BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException; + + protected R createZeroValue(AggregationExtractContext context) throws IOException { + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + var termsCollector = termsCollector( context ); + CollectorManager[] managers = termsCollector.managers(); + CollectorKey[] keys = termsCollector.keys(); + HashMap, Object> results = new HashMap<>(); + for ( int i = 0; i < keys.length; i++ ) { + results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); + } + localContext.setResults( results ); + return extractor.extract( localContext ); + } + abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException; @@ -71,20 +98,32 @@ abstract Set collectFirstTerms(IndexReader reader, boolean descending, int li abstract V termToFieldValue(T key); - abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; - private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { - Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering - for ( Bucket bucket : buckets ) { + private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { + Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering + for ( Bucket bucket : buckets ) { V decoded = termToFieldValue( bucket.term() ); K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count() ); + result.put( key, bucket.value() ); + } + return result; + } + + protected Map, Object> prepareResults(LongBucket bucket, BaseTermsCollector termsCollector) + throws IOException { + Map, Object> result = new HashMap<>(); + List[] collectors = bucket.collectors; + CollectorKey[] collectorKeys = termsCollector.keys(); + CollectorManager[] managers = termsCollector.managers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); } return result; } } - abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { + abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { protected final LuceneSearchIndexScope scope; protected final LuceneSearchIndexValueFieldContext field; @@ -94,23 +133,33 @@ protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchInde } @Override - public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); + public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); } - abstract static class AbstractBuilder - extends AbstractLuceneBucketAggregation.AbstractBuilder - implements TermsAggregationBuilder { + abstract static class AbstractBuilder + extends AbstractLuceneBucketAggregation.AbstractBuilder + implements TermsAggregationBuilder { - private final ProjectionConverter fromFieldValueConverter; - - private BucketOrder order = BucketOrder.COUNT_DESC; - private int minDocCount = 1; - private int maxTermCount = 100; + protected final LuceneSearchAggregation aggregation; + protected final ProjectionConverter fromFieldValueConverter; + protected BucketOrder order; + protected int minDocCount; + protected int maxTermCount; AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + this( scope, field, aggregation, fromFieldValueConverter, BucketOrder.COUNT_DESC, 1, 100 ); + } + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { super( scope, field ); + this.aggregation = aggregation; this.fromFieldValueConverter = fromFieldValueConverter; + this.order = order; + this.minDocCount = minDocCount; + this.maxTermCount = maxTermCount; } @Override @@ -144,7 +193,7 @@ public void maxTermCount(int maxTermCount) { } @Override - public abstract AbstractLuceneMultivaluedTermsAggregation build(); + public abstract AbstractLuceneMultivaluedTermsAggregation build(); protected final void order(BucketOrder order) { this.order = order; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index ce9b0a13ce3..7f01fc781ba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,5 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -public record Bucket(F term, long count) { +public record Bucket(F term, long count, R value) { } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 31233e5ad39..320fba0f177 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -9,7 +9,7 @@ public enum BucketOrder { COUNT_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { @@ -22,12 +22,12 @@ Comparator> toBucketComparatorInternal(Comparator termComparato @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::term ); + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::termOrd ); } }, COUNT_DESC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { @@ -45,20 +45,20 @@ Comparator toLongBucketComparatorInternal() { if ( order != 0 ) { return order; } - order = Long.compare( left.term(), right.term() ); + order = Long.compare( left.termOrd(), right.termOrd() ); return order; }; } }, TERM_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> termComparator.compare( left.term(), right.term() ); } @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::term ); + return Comparator.comparingLong( LongBucket::termOrd ); } }, TERM_DESC { @@ -68,17 +68,17 @@ boolean isTermOrderDescending() { } @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> termComparator.compare( left.term(), right.term() ); } @Override Comparator toLongBucketComparatorInternal() { - return Comparator.comparingLong( LongBucket::term ).reversed(); + return Comparator.comparingLong( LongBucket::termOrd ).reversed(); } }; - public Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } @@ -87,7 +87,7 @@ public Comparator toLongBucketComparator() { return toLongBucketComparatorInternal(); } - abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); abstract Comparator toLongBucketComparatorInternal(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java new file mode 100644 index 00000000000..2882036b99c --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Map; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java new file mode 100644 index 00000000000..97dc028f22a --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.search.common.NamedValues; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + + public AggregationRequestContext actualContext() { + return delegate; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java index 4d980d2c5ab..b76319c716f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -4,5 +4,42 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -public record LongBucket(long term, long count) { +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.search.Collector; + +public class LongBucket { + public final long termOrd; + public final List[] collectors; + public long count; + + @SuppressWarnings("unchecked") + public LongBucket(long termOrd, Collector[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = new List[collectors.length]; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i] = new ArrayList<>(); + this.collectors[i].add( collectors[i] ); + } + this.count = count; + } + + public long count() { + return count; + } + + public long termOrd() { + return termOrd; + } + + @Override + public String toString() { + return "LongBucket{" + + "termOrd=" + termOrd + + ", count=" + count + + ", collectors=" + Arrays.toString( collectors ) + + '}'; + } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 6825b163611..35ea9bff9ff 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -8,13 +8,10 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.LinkedHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; @@ -26,19 +23,14 @@ import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; -import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; -import org.hibernate.search.engine.search.common.NamedValues; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.Query; /** * @param The type of field values. @@ -207,79 +199,4 @@ protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function> localCollectorFactories = new LinkedHashSet<>(); - - private LocalAggregationRequestContext(AggregationRequestContext delegate) { - this.delegate = delegate; - } - - @Override - public > void requireCollector( - CollectorFactory collectorFactory) { - localCollectorFactories.add( collectorFactory ); - } - - @Override - public NamedValues queryParameters() { - return delegate.queryParameters(); - } - - @Override - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return delegate.toPredicateRequestContext( absolutePath ); - } - - @Override - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); - } - - public List> localCollectorFactories() { - return new ArrayList<>( localCollectorFactories ); - } - } - - private static class LocalAggregationExtractContext implements AggregationExtractContext { - - private final AggregationExtractContext delegate; - - private Map, Object> results; - - private LocalAggregationExtractContext(AggregationExtractContext delegate) { - this.delegate = delegate; - } - - @Override - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return delegate.toPredicateRequestContext( absolutePath ); - } - - @Override - public IndexReader getIndexReader() { - return delegate.getIndexReader(); - } - - @Override - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return delegate.fromDocumentValueConvertContext(); - } - - @SuppressWarnings("unchecked") - @Override - public T getCollectorResults(CollectorKey key) { - return (T) results.get( key ); - } - - @Override - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); - } - - public void setResults(Map, Object> results) { - this.results = results; - } - } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index 5ec21a7054f..96b7a0ff2c2 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -14,6 +14,7 @@ import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; @@ -21,12 +22,14 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; @@ -35,15 +38,14 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; /** * @param The type of field values. * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class LuceneNumericTermsAggregation - extends AbstractLuceneMultivaluedTermsAggregation { +public class LuceneNumericTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; @@ -51,7 +53,7 @@ public class LuceneNumericTermsAggregation private final Function decoder; private CollectorKey collectorKey; - private LuceneNumericTermsAggregation(Builder builder) { + private LuceneNumericTermsAggregation(Builder builder) { super( builder ); this.numericDomain = builder.codec.getDomain(); this.termComparator = numericDomain.createComparator(); @@ -59,21 +61,20 @@ private LuceneNumericTermsAggregation(Builder builder) { } @Override - public Extractor> request(AggregationRequestContext context) { + public Extractor> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( absoluteFieldPath, nestedDocsProvider ); - var termsCollectorFactory = NumericTermsCollectorFactory.instance( source ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = + NumericTermsCollectorFactory.instance( source, localAggregationContext.localCollectorFactories() ); context.requireCollector( termsCollectorFactory ); collectorKey = termsCollectorFactory.getCollectorKey(); - return extractor( context ); - } - - @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneNumericTermsAggregationExtractor(); + return new LuceneNumericTermsAggregationExtractor( extractor ); } public static class Factory @@ -94,6 +95,15 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + private LuceneNumericTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + + @Override + protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + @Override Comparator getAscendingTermComparator() { return termComparator; @@ -105,13 +115,22 @@ V termToFieldValue(E key) { } @Override - List> getTopBuckets(AggregationExtractContext context) { + List> getTopBuckets(AggregationExtractContext context) throws IOException { var termsCollector = context.getCollectorResults( collectorKey ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + List counts = termsCollector.counts( order, maxTermCount, minDocCount ); - List> buckets = new ArrayList<>(); + List> buckets = new ArrayList<>(); for ( LongBucket bucket : counts ) { - buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.term() ), bucket.count() ) ); + localContext.setResults( prepareResults( bucket, termsCollector ) ); + buckets.add( + new Bucket<>( + numericDomain.sortedDocValueToTerm( bucket.termOrd() ), + bucket.count(), + extractor.extract( localContext ) + ) + ); } return buckets; } @@ -142,7 +161,7 @@ Set collectFirstTerms(IndexReader reader, boolean descending, int limit) } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private final AbstractLuceneNumericFieldCodec codec; private TypeSelector(AbstractLuceneNumericFieldCodec codec, @@ -153,16 +172,16 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ), Function.identity() ); } else { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ), codec::decode ); @@ -170,37 +189,52 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { + + private CountBuilder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + ProjectionConverter fromFieldValueConverter, + Function decoder) { + super( codec, scope, field, LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), fromFieldValueConverter, + decoder ); + } + } + + private static class Builder + extends AbstractBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function decoder; - public Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, - LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter, + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, LuceneSearchAggregation aggregation, + ProjectionConverter fromFieldValueConverter, Function decoder) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, aggregation, fromFieldValueConverter ); this.codec = codec; this.decoder = decoder; } - @Override - public LuceneNumericTermsAggregation build() { - return new LuceneNumericTermsAggregation<>( this ); + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + Function decoder, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); + this.codec = codec; + this.decoder = decoder; } - } - private static class HibernateSearchTermsQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchTermsQueue(Comparator comparator, int maxSize) { - super( maxSize ); - this.comparator = comparator; + @Override + public LuceneNumericTermsAggregation build() { + return new LuceneNumericTermsAggregation<>( this ); } @Override - protected boolean lessThan(E t1, E t2) { - return comparator.compare( t1, t2 ) > 0; + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( codec, scope, field, LuceneSearchAggregation.from( scope, aggregation ), + fromFieldValueConverter, decoder, order, minDocCount, maxTermCount ); } } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index bf20eff0559..8d305759230 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -12,6 +12,7 @@ import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; @@ -19,10 +20,12 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; @@ -36,37 +39,46 @@ * @param The type of keys in the returned map. It can be {@code String} * or a different type if value converters are used. */ -public class LuceneTextTermsAggregation - extends AbstractLuceneMultivaluedTermsAggregation { +public class LuceneTextTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); private CollectorKey collectorKey; - private LuceneTextTermsAggregation(Builder builder) { + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } @Override - public Extractor> request(AggregationRequestContext context) { + public Extractor> request(AggregationRequestContext context) { NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( absoluteFieldPath, nestedDocsProvider ); - var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source, + localAggregationContext.localCollectorFactories() ); context.requireCollector( termsCollectorFactory ); collectorKey = termsCollectorFactory.getCollectorKey(); - return extractor( context ); - } - - @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneTextTermsAggregationExtractor(); + return new LuceneTextTermsAggregationExtractor( extractor ); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { + private LuceneTextTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + + @Override + protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + @Override Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { @@ -106,15 +118,24 @@ String termToFieldValue(String key) { } @Override - List> getTopBuckets(AggregationExtractContext context) throws IOException { + List> getTopBuckets(AggregationExtractContext context) throws IOException { var termsCollector = context.getCollectorResults( collectorKey ); - List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List results = termsCollector.results( order, maxTermCount, minDocCount ); var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); - List> buckets = new ArrayList<>(); - for ( LongBucket bucket : counts ) { - buckets.add( new Bucket<>( dv.lookupOrd( bucket.term() ).utf8ToString(), bucket.count() ) ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : results ) { + localContext.setResults( prepareResults( bucket, termsCollector ) ); + buckets.add( + new Bucket<>( + dv.lookupOrd( bucket.termOrd() ).utf8ToString(), + bucket.count(), + extractor.extract( localContext ) + ) + ); } return buckets; } @@ -128,40 +149,62 @@ public TypeSelector create(LuceneSearchIndexScope scope, LuceneSearchIndexVal } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private TypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { super( scope, field ); } @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ) ); } else { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { - private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + private CountBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + fromFieldValueConverter ); + } + } + + private static class Builder + extends AbstractBuilder { + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + super( scope, field, aggregation, fromFieldValueConverter ); + } + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); } @Override - public LuceneTextTermsAggregation build() { + public LuceneTextTermsAggregation build() { return new LuceneTextTermsAggregation<>( this ); } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, LuceneSearchAggregation.from( scope, aggregation ), fromFieldValueConverter, + order, minDocCount, maxTermCount ); + } } } diff --git a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java index 68e34db27af..9666fdd1453 100644 --- a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java +++ b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java @@ -47,7 +47,7 @@ public RangeAggregationBuilder.TypeSelector create(StubSearchIndexScope scope public static class TermsTypeSelector implements TermsAggregationBuilder.TypeSelector { @Override - public TermsBuilder type(Class expectedType, ValueModel valueModel) { + public TermsBuilder type(Class expectedType, ValueModel valueModel) { return new TermsBuilder<>(); } } @@ -59,7 +59,7 @@ public RangeBuilder type(Class expectedType, ValueModel valueMod } } - static class TermsBuilder implements TermsAggregationBuilder { + static class TermsBuilder implements TermsAggregationBuilder { @Override public void orderByCountDescending() { @@ -91,13 +91,18 @@ public void maxTermCount(int maxTermCount) { // No-op } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new TermsBuilder<>(); + } + @Override public void filter(SearchPredicate filter) { // No-op } @Override - public SearchAggregation> build() { + public SearchAggregation> build() { return new StubSearchAggregation<>(); } } From 916fd415aa5c7b3e0db5e73501c0cd38d89adfc7 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 4 Jul 2025 12:04:47 +0200 Subject: [PATCH 13/23] HSEARCH-3661 Make Elasticsearch's terms aggregation accept "value" --- .../impl/ElasticsearchIndexRootBuilder.java | 3 +- ...bstractElasticsearchBucketAggregation.java | 18 --- ...ElasticsearchCountDocumentAggregation.java | 2 +- .../impl/ElasticsearchRangeAggregation.java | 12 +- .../impl/ElasticsearchTermsAggregation.java | 104 ++++++++++++++---- .../search/aggregation/AggregationDslIT.java | 35 +++++- 6 files changed, 122 insertions(+), 52 deletions(-) diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java index d1f06b9ac14..20a55aeeffc 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java @@ -86,7 +86,8 @@ public ElasticsearchIndexRootBuilder(ElasticsearchIndexFieldTypeFactoryProvider this.customIndexMapping = customIndexMapping; this.defaultDynamicType = DynamicType.create( dynamicMapping ); - this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, ElasticsearchCountDocumentAggregation.factory( false ) ); + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, + ElasticsearchCountDocumentAggregation.factory( false ) ); this.addDefaultImplicitFields(); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java index 9042a92d2b2..965ca64107d 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java @@ -8,7 +8,6 @@ import java.util.Map; import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor; -import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog; import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope; import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext; import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate; @@ -29,10 +28,6 @@ public abstract class AbstractElasticsearchBucketAggregation private static final String ROOT_DOC_COUNT_NAME = "root_doc_count"; private static final JsonAccessor REQUEST_AGGREGATIONS_ROOT_DOC_COUNT_ACCESSOR = JsonAccessor.root().property( "aggregations" ).property( ROOT_DOC_COUNT_NAME ).asObject(); - private static final JsonAccessor RESPONSE_DOC_COUNT_ACCESSOR = - JsonAccessor.root().property( "doc_count" ).asLong(); - private static final JsonAccessor RESPONSE_ROOT_DOC_COUNT_ACCESSOR = - JsonAccessor.root().property( ROOT_DOC_COUNT_NAME ).property( "doc_count" ).asLong(); AbstractElasticsearchBucketAggregation(AbstractBuilder builder) { super( builder ); @@ -58,19 +53,6 @@ protected final JsonObject doRequest(AggregationRequestContext context) { protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context); - protected final long getBucketDocCount(JsonObject bucket) { - if ( isNested() ) { - // We must return the number of root documents, - // not the number of leaf documents that Elasticsearch returns by default. - return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( bucket ) - .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); - } - else { - return RESPONSE_DOC_COUNT_ACCESSOR.get( bucket ) - .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); - } - } - protected abstract class AbstractBucketExtractor extends AbstractExtractor> { protected AbstractBucketExtractor(List nestedPathHierarchy, diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java index af8966b4ff7..70bb8406883 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java @@ -43,7 +43,7 @@ private ElasticsearchCountDocumentAggregation(Builder builder) { @Override public Extractor request(AggregationRequestContext context, AggregationKey key, JsonObject jsonAggregations) { - return new CountDocumentsExtractor(isNested); + return new CountDocumentsExtractor( isNested ); } private record CountDocumentsExtractor(boolean isNested) implements Extractor { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java index e1c2bfb840e..83b2e322297 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java @@ -44,6 +44,7 @@ public class ElasticsearchRangeAggregation private final ElasticsearchSearchAggregation aggregation; + // TODO: do not store these two here: private Extractor innerExtractor; private AggregationKey innerExtractorKey; @@ -66,6 +67,7 @@ protected void doRequest(JsonObject outerObject, JsonObject innerObject, Aggrega // this is just a "random name" so we can get the aggregation back from the response. // once we switch to the "composite aggregation" where we compute multiple aggregations for a range, // this should be moved into a new "aggregation" that would handle all the logic for adding and then extracting 0-n aggregations. + // TODO: not really good that we have state saved into aggregation within the request, we should pass it up instead innerExtractorKey = AggregationKey.of( "agg" ); innerExtractor = aggregation.request( context, innerExtractorKey, subOuterObject ); if ( !subOuterObject.isEmpty() ) { @@ -103,7 +105,8 @@ private TypeSelector(ElasticsearchSearchIndexScope scope, @Override public Builder type(Class expectedType, ValueModel valueModel) { - return new CountBuilder<>( scope, field, field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); + return new CountBuilder<>( scope, field, + field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); } } @@ -125,7 +128,7 @@ protected Map, V> doExtract(AggregationExtractContext context, JsonElem JsonObject bucket = bucketMap.get( String.valueOf( i ) ).getAsJsonObject(); Range range = rangesInOrder.get( i ); if ( bucket.has( innerExtractorKey.name() ) ) { - bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); + bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); } result.put( range, innerExtractor.extract( bucket, context ) ); } @@ -133,14 +136,15 @@ protected Map, V> doExtract(AggregationExtractContext context, JsonElem } } - public static class CountBuilder extends Builder { + private static class CountBuilder extends Builder { protected CountBuilder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, Function encoder) { super( scope, field, encoder, new ArrayList<>(), new JsonArray(), ElasticsearchSearchAggregation.from( scope, - ElasticsearchCountDocumentAggregation.factory(field.nestedPathHierarchy().isEmpty()).create( scope, null ).type().build() ) ); + ElasticsearchCountDocumentAggregation.factory( !field.nestedPathHierarchy().isEmpty() ) + .create( scope, null ).type().build() ) ); } } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java index 903a8ccdef6..70421859cb4 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java @@ -15,6 +15,8 @@ import org.hibernate.search.backend.elasticsearch.types.codec.impl.ElasticsearchFieldCodec; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.impl.CollectionHelper; @@ -28,19 +30,25 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class ElasticsearchTermsAggregation - extends AbstractElasticsearchBucketAggregation { +public class ElasticsearchTermsAggregation + extends AbstractElasticsearchBucketAggregation { private final String absoluteFieldPath; private final ProjectionConverter fromFieldValueConverter; private final BiFunction decodeFunction; + private final ElasticsearchSearchAggregation aggregation; + private final JsonObject order; private final int size; private final int minDocCount; - private ElasticsearchTermsAggregation(Builder builder) { + // TODO: do not store these two here: + private Extractor innerExtractor; + private AggregationKey innerExtractorKey; + + private ElasticsearchTermsAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); this.fromFieldValueConverter = builder.fromFieldValueConverter; @@ -48,6 +56,7 @@ private ElasticsearchTermsAggregation(Builder builder) { this.order = builder.order; this.size = builder.size; this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; } @Override @@ -59,11 +68,19 @@ protected void doRequest(JsonObject outerObject, JsonObject innerObject, Aggrega } innerObject.addProperty( "size", size ); innerObject.addProperty( "min_doc_count", minDocCount ); + + // TODO: not really good that we have state saved into aggregation within the request, we should pass it up instead + JsonObject subOuterObject = new JsonObject(); + innerExtractorKey = AggregationKey.of( "agg" ); + innerExtractor = aggregation.request( context, innerExtractorKey, subOuterObject ); + if ( !subOuterObject.isEmpty() ) { + outerObject.add( "aggs", subOuterObject ); + } } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new TermsBucketExtractor( nestedPathHierarchy, filter ); + protected Extractor> extractor(AggregationRequestContext context) { + return new TermsBucketExtractor( nestedPathHierarchy, filter, innerExtractorKey, innerExtractor ); } public static class Factory @@ -93,11 +110,11 @@ private TypeSelector(ElasticsearchFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( - (key, string) -> string != null && !string.isJsonNull() ? string : key, + return new CountBuilder<>( scope, field, + (key, string) -> string != null && !string.isJsonNull() ? string : key, // unchecked cast to make eclipse-compiler happy // we know that Elasticsearch projection converters work with the String ( (ProjectionConverter) field.type().rawProjectionConverter() ) @@ -105,22 +122,28 @@ private TypeSelector(ElasticsearchFieldCodec codec, ); } else { - return new Builder<>( codec::decodeAggregationKey, scope, field, + return new CountBuilder<>( scope, field, codec::decodeAggregationKey, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - protected class TermsBucketExtractor extends AbstractBucketExtractor { + protected class TermsBucketExtractor extends AbstractBucketExtractor { + private final AggregationKey innerExtractorKey; + private final Extractor innerExtractor; + protected TermsBucketExtractor(List nestedPathHierarchy, - ElasticsearchSearchPredicate filter) { + ElasticsearchSearchPredicate filter, AggregationKey innerExtractorKey, Extractor innerExtractor + ) { super( nestedPathHierarchy, filter ); + this.innerExtractorKey = innerExtractorKey; + this.innerExtractor = innerExtractor; } @Override - protected Map doExtract(AggregationExtractContext context, JsonElement buckets) { + protected Map doExtract(AggregationExtractContext context, JsonElement buckets) { JsonArray bucketArray = buckets.getAsJsonArray(); - Map result = CollectionHelper.newLinkedHashMap( bucketArray.size() ); + Map result = CollectionHelper.newLinkedHashMap( bucketArray.size() ); FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); for ( JsonElement bucketElement : bucketArray ) { JsonObject bucket = bucketElement.getAsJsonObject(); @@ -130,29 +153,60 @@ protected Map doExtract(AggregationExtractContext context, JsonElement decodeFunction.apply( keyJson, keyAsStringJson ), convertContext ); - long documentCount = getBucketDocCount( bucket ); - result.put( key, documentCount ); + + if ( bucket.has( innerExtractorKey.name() ) ) { + bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); + } + result.put( key, innerExtractor.extract( bucket, context ) ); } return result; } } - private static class Builder extends AbstractBuilder - implements TermsAggregationBuilder { + private static class CountBuilder extends Builder { + + protected CountBuilder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter) { + super( scope, field, decodeFunction, fromFieldValueConverter, + ElasticsearchSearchAggregation.from( scope, + ElasticsearchCountDocumentAggregation.factory( !field.nestedPathHierarchy().isEmpty() ) + .create( scope, null ).type().build() ) ); + } + } + + private static class Builder extends AbstractBuilder + implements TermsAggregationBuilder { private final BiFunction decodeFunction; private final ProjectionConverter fromFieldValueConverter; + private final ElasticsearchSearchAggregation aggregation; private JsonObject order; - private int minDocCount = 1; - private int size = 100; + private int minDocCount; + private int size; - private Builder(BiFunction decodeFunction, ElasticsearchSearchIndexScope scope, + private Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter, + ElasticsearchSearchAggregation aggregation) { + this( scope, field, decodeFunction, fromFieldValueConverter, aggregation, null, 1, 100 ); + } + + public Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter, + ElasticsearchSearchAggregation aggregation, + JsonObject order, int minDocCount, int size) { super( scope, field ); + this.order = order; this.decodeFunction = decodeFunction; this.fromFieldValueConverter = fromFieldValueConverter; + this.aggregation = aggregation; + this.minDocCount = minDocCount; + this.size = size; } @Override @@ -186,7 +240,13 @@ public void maxTermCount(int maxTermCount) { } @Override - public ElasticsearchTermsAggregation build() { + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, decodeFunction, fromFieldValueConverter, + ElasticsearchSearchAggregation.from( scope, aggregation ), order, minDocCount, size ); + } + + @Override + public ElasticsearchTermsAggregation build() { return new ElasticsearchTermsAggregation<>( this ); } diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index 1c1effe61a9..75c482ff8ca 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -285,9 +285,10 @@ void terms_value() { // end::terms-sum[] assertThat( sumByPrice ) .containsExactly( - entry( 10.0, 7.99 ), - entry( 20.0 , 35.98 ), - entry( null , 24.99 ) + entry( 7.99, 7.99 ), + entry( 15.99, 15.99 ), + entry( 19.99, 19.99 ), + entry( 24.99, 24.99 ) ); } ); @@ -306,9 +307,31 @@ void terms_value() { // end::terms-count[] assertThat( countsByPrice ) .containsExactly( - entry( Range.canonical( 0.0, 10.0 ), 1L ), - entry( Range.canonical( 10.0, 20.0 ), 2L ), - entry( Range.canonical( 20.0, null ), 1L ) + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) + ); + } ); + + withinSearchSession( searchSession -> { + // tag::terms-count-implicit[] + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.terms() + .field( "price", Double.class ) // <1> + ) + .fetch( 20 ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + // end::terms-count-implicit[] + assertThat( countsByPrice ) + .containsExactly( + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) ); } ); } From 9291b9b72f5ed9a9af8ed3603fcffaf47ce402c6 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 4 Jul 2025 12:15:21 +0200 Subject: [PATCH 14/23] HSEARCH-3661 Do not collect counts in Lucene's range collectors if needed that would get collected by the collectors/managers etc --- .../lucene/lowlevel/collector/impl/RangeCollector.java | 7 ------- .../lucene/lowlevel/collector/impl/RangeCollector.java | 7 ------- 2 files changed, 14 deletions(-) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 93cf5ccda06..db9386bc17e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -33,7 +33,6 @@ public class RangeCollector extends SimpleCollector { private final long[] boundaries; private final IntArrayList[] countsPerBoundaries; - private final long[] counts; private final Collector[][] collectors; private final CollectorKey[] keys; private final LeafCollector[][] leafCollectors; @@ -138,7 +137,6 @@ else if ( flags == 1 ) { } } - counts = new long[ranges.length]; leafCollectors = new LeafCollector[keys.length][]; for ( int i = 0; i < leafCollectors.length; i++ ) { leafCollectors[i] = new LeafCollector[ranges.length]; @@ -147,7 +145,6 @@ else if ( flags == 1 ) { private void processLeafWithIndex(int index, int doc) throws IOException { for ( IntCursor cursor : countsPerBoundaries[index] ) { - counts[cursor.value]++; for ( int i = 0; i < keys.length; i++ ) { leafCollectors[i][cursor.value].collect( doc ); } @@ -193,10 +190,6 @@ public void collect(int doc) throws IOException { } } - public long[] counts() { - return counts; - } - public Collector[][] collectors() { return collectors; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 93cf5ccda06..db9386bc17e 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -33,7 +33,6 @@ public class RangeCollector extends SimpleCollector { private final long[] boundaries; private final IntArrayList[] countsPerBoundaries; - private final long[] counts; private final Collector[][] collectors; private final CollectorKey[] keys; private final LeafCollector[][] leafCollectors; @@ -138,7 +137,6 @@ else if ( flags == 1 ) { } } - counts = new long[ranges.length]; leafCollectors = new LeafCollector[keys.length][]; for ( int i = 0; i < leafCollectors.length; i++ ) { leafCollectors[i] = new LeafCollector[ranges.length]; @@ -147,7 +145,6 @@ else if ( flags == 1 ) { private void processLeafWithIndex(int index, int doc) throws IOException { for ( IntCursor cursor : countsPerBoundaries[index] ) { - counts[cursor.value]++; for ( int i = 0; i < keys.length; i++ ) { leafCollectors[i][cursor.value].collect( doc ); } @@ -193,10 +190,6 @@ public void collect(int doc) throws IOException { } } - public long[] counts() { - return counts; - } - public Collector[][] collectors() { return collectors; } From 3a0dacd1e89b7788ffbe5a20c5851cac2f9c9d90 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 4 Jul 2025 16:00:38 +0200 Subject: [PATCH 15/23] HSEARCH-3661 Use SimpleCollectors to not recreate leaf collectors --- .../impl/AggregationFunctionCollector.java | 51 ++++++++----------- .../DoubleAggregationFunctionCollector.java | 45 +++++++--------- .../collector/impl/NumericTermsCollector.java | 2 + .../collector/impl/RangeCollector.java | 2 + .../collector/impl/RangeCollectorFactory.java | 12 ++--- .../collector/impl/RangeCollectorManager.java | 2 +- .../collector/impl/TextTermsCollector.java | 2 + .../types/aggregation/impl/BucketOrder.java | 2 +- .../impl/AggregationFunctionCollector.java | 50 ++++++++---------- .../DoubleAggregationFunctionCollector.java | 44 +++++++--------- .../collector/impl/NumericTermsCollector.java | 2 + .../collector/impl/RangeCollector.java | 2 + .../collector/impl/RangeCollectorFactory.java | 12 ++--- .../collector/impl/RangeCollectorManager.java | 2 +- .../collector/impl/TextTermsCollector.java | 2 + .../types/aggregation/impl/BucketOrder.java | 2 +- 16 files changed, 108 insertions(+), 126 deletions(-) diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index fb577604054..cb515632b8e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -10,16 +10,16 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class AggregationFunctionCollector> implements Collector { +public class AggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final AggregationFunction aggregationFunction; + private LongMultiValues values; + public AggregationFunctionCollector(LongMultiValuesSource valueSource, AggregationFunction aggregationFunction) { this.valueSource = valueSource; this.aggregationFunction = aggregationFunction; @@ -34,8 +34,16 @@ public Long result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } + } + } } @Override @@ -43,29 +51,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( value ); - if ( !aggregationFunction.acceptMultipleValues() ) { - break; - } - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java index 3a441653d58..e5d121d27c7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java @@ -11,17 +11,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class DoubleAggregationFunctionCollector> implements Collector { +public class DoubleAggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final DoubleAggregationFunction aggregationFunction; private final Function longToDouble; + private LongMultiValues values; + public DoubleAggregationFunctionCollector(LongMultiValuesSource valueSource, DoubleAggregationFunction aggregationFunction, Function longToDouble) { this.valueSource = valueSource; @@ -38,8 +38,13 @@ public Double result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( longToDouble.apply( value ) ); + } + } } @Override @@ -47,26 +52,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( longToDouble.apply( value ) ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index 02094d1966e..ef0d14abe17 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -92,6 +92,7 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { this.values = valuesSource.getValues( context ); this.leafReaderContext = context; @@ -100,6 +101,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { } } + @Override public void finish() { values = null; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index db9386bc17e..57395708b4a 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -207,6 +207,7 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { values = valuesSource.getValues( context ); for ( int i = 0; i < collectors.length; i++ ) { @@ -216,6 +217,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { } } + @Override public void finish() throws IOException { values = null; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index f3a6c606b90..91a398b7b3c 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -13,12 +13,12 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorFactory - implements CollectorFactory> { +public class RangeCollectorFactory + implements CollectorFactory { - public static CollectorFactory> instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { - return new RangeCollectorFactory<>( valuesSource, ranges, collectorFactories ); + return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); } public final CollectorKey key = CollectorKey.create(); @@ -35,7 +35,7 @@ public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[ @SuppressWarnings({ "rawtypes", "unchecked" }) @Override - public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { Collector[][] collectors = new Collector[collectorFactories.size()][]; CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; var managers = new CollectorManager[collectorFactories.size()]; @@ -51,7 +51,7 @@ public RangeCollectorManager createCollectorManager(CollectorExecutionContext } index++; } - return new RangeCollectorManager<>( valuesSource, ranges, collectors, keys, managers ); + return new RangeCollectorManager( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index d1056204096..a842782c9ba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorManager implements CollectorManager { +public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index bfcd753303a..b06a3948d96 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -109,12 +109,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { initRootSortedSetDocValues( context ); this.values = valuesSource.getValues( context ); leafReaderContext = context; } + @Override public void finish() throws IOException { for ( LongObjectCursor value : segmentValues ) { long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 320fba0f177..2f9ab4292ad 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -83,7 +83,7 @@ public Comparator> toBucketComparator(Comparator termAsce isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } - public Comparator toLongBucketComparator() { + public Comparator toLongBucketComparator() { return toLongBucketComparatorInternal(); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index fb577604054..28988a10f8a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -10,16 +10,16 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class AggregationFunctionCollector> implements Collector { +public class AggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final AggregationFunction aggregationFunction; + private LongMultiValues values; + public AggregationFunctionCollector(LongMultiValuesSource valueSource, AggregationFunction aggregationFunction) { this.valueSource = valueSource; this.aggregationFunction = aggregationFunction; @@ -34,8 +34,16 @@ public Long result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } + } + } } @Override @@ -43,29 +51,13 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( value ); - if ( !aggregationFunction.acceptMultipleValues() ) { - break; - } - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java index 3a441653d58..aae6609a773 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java @@ -11,17 +11,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class DoubleAggregationFunctionCollector> implements Collector { +public class DoubleAggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final DoubleAggregationFunction aggregationFunction; private final Function longToDouble; + private LongMultiValues values; + public DoubleAggregationFunctionCollector(LongMultiValuesSource valueSource, DoubleAggregationFunction aggregationFunction, Function longToDouble) { this.valueSource = valueSource; @@ -38,8 +38,13 @@ public Double result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( longToDouble.apply( value ) ); + } + } } @Override @@ -47,26 +52,13 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( longToDouble.apply( value ) ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index 02094d1966e..ef0d14abe17 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -92,6 +92,7 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { this.values = valuesSource.getValues( context ); this.leafReaderContext = context; @@ -100,6 +101,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { } } + @Override public void finish() { values = null; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index db9386bc17e..57395708b4a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -207,6 +207,7 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { values = valuesSource.getValues( context ); for ( int i = 0; i < collectors.length; i++ ) { @@ -216,6 +217,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { } } + @Override public void finish() throws IOException { values = null; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index f3a6c606b90..91a398b7b3c 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -13,12 +13,12 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorFactory - implements CollectorFactory> { +public class RangeCollectorFactory + implements CollectorFactory { - public static CollectorFactory> instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { - return new RangeCollectorFactory<>( valuesSource, ranges, collectorFactories ); + return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); } public final CollectorKey key = CollectorKey.create(); @@ -35,7 +35,7 @@ public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[ @SuppressWarnings({ "rawtypes", "unchecked" }) @Override - public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { Collector[][] collectors = new Collector[collectorFactories.size()][]; CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; var managers = new CollectorManager[collectorFactories.size()]; @@ -51,7 +51,7 @@ public RangeCollectorManager createCollectorManager(CollectorExecutionContext } index++; } - return new RangeCollectorManager<>( valuesSource, ranges, collectors, keys, managers ); + return new RangeCollectorManager( valuesSource, ranges, collectors, keys, managers ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index d1056204096..a842782c9ba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorManager implements CollectorManager { +public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index bfcd753303a..b06a3948d96 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -109,12 +109,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } + @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { initRootSortedSetDocValues( context ); this.values = valuesSource.getValues( context ); leafReaderContext = context; } + @Override public void finish() throws IOException { for ( LongObjectCursor value : segmentValues ) { long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 320fba0f177..2f9ab4292ad 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -83,7 +83,7 @@ public Comparator> toBucketComparator(Comparator termAsce isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } - public Comparator toLongBucketComparator() { + public Comparator toLongBucketComparator() { return toLongBucketComparatorInternal(); } From 87c5f210776393f3b2f336a9c9d24a04100325e1 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Fri, 4 Jul 2025 19:28:13 +0200 Subject: [PATCH 16/23] HSEARCH-3661 Adjust how extractors are built for bucket Elasticsearch aggregations so that the state is not "stored" in the aggregation but passed around through a context. --- ...bstractElasticsearchBucketAggregation.java | 8 +- ...tractElasticsearchNestableAggregation.java | 11 +-- ...regationRequestBuildingContextContext.java | 76 +++++++++++++++++++ .../ElasticsearchMetricFieldAggregation.java | 4 +- .../ElasticsearchMetricLongAggregation.java | 4 +- .../impl/ElasticsearchRangeAggregation.java | 30 ++++---- .../impl/ElasticsearchTermsAggregation.java | 19 ++--- 7 files changed, 118 insertions(+), 34 deletions(-) create mode 100644 backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java index 965ca64107d..3808d343d20 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java @@ -29,12 +29,15 @@ public abstract class AbstractElasticsearchBucketAggregation private static final JsonAccessor REQUEST_AGGREGATIONS_ROOT_DOC_COUNT_ACCESSOR = JsonAccessor.root().property( "aggregations" ).property( ROOT_DOC_COUNT_NAME ).asObject(); + protected static final String INNER_EXTRACTOR_KEY = "innerExtractorKey"; + protected static final String INNER_EXTRACTOR = "innerExtractor"; + AbstractElasticsearchBucketAggregation(AbstractBuilder builder) { super( builder ); } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); @@ -51,7 +54,8 @@ protected final JsonObject doRequest(AggregationRequestContext context) { return outerObject; } - protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context); + protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject, + AggregationRequestBuildingContextContext context); protected abstract class AbstractBucketExtractor extends AbstractExtractor> { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java index 54de0a2d3c6..94ce1f2aeda 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java @@ -50,11 +50,12 @@ public abstract class AbstractElasticsearchNestableAggregation extends Abstra @Override public final Extractor request(AggregationRequestContext context, AggregationKey key, JsonObject jsonAggregations) { - jsonAggregations.add( key.name(), request( context ) ); - return extractor( context ); + AggregationRequestBuildingContextContext buildingContext = new AggregationRequestBuildingContextContext( context ); + jsonAggregations.add( key.name(), request( buildingContext ) ); + return extractor( buildingContext ); } - private JsonObject request(AggregationRequestContext context) { + private JsonObject request(AggregationRequestBuildingContextContext context) { JsonObject result = doRequest( context ); if ( nestedPathHierarchy.isEmpty() ) { @@ -90,9 +91,9 @@ private JsonObject request(AggregationRequestContext context) { return result; } - protected abstract JsonObject doRequest(AggregationRequestContext context); + protected abstract JsonObject doRequest(AggregationRequestBuildingContextContext context); - protected abstract Extractor extractor(AggregationRequestContext context); + protected abstract Extractor extractor(AggregationRequestBuildingContextContext context); protected abstract static class AbstractExtractor implements Extractor { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java new file mode 100644 index 00000000000..35999aadeaa --- /dev/null +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import org.hibernate.search.backend.elasticsearch.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.util.common.annotation.Incubating; + +/** + * Sometimes we need to pass something we created while building up the json in one of the "doRequest" methods + * in the aggregation build up to the "later" steps e.g. to when we create the extractor. + */ +@Incubating +public final class AggregationRequestBuildingContextContext implements AggregationRequestContext { + private final AggregationRequestContext aggregationRequestContext; + private final Map, Object> buildingContext = new HashMap<>(); + + public AggregationRequestBuildingContextContext(AggregationRequestContext aggregationRequestContext) { + this.aggregationRequestContext = aggregationRequestContext; + } + + public T get(Key key) { + Object value = buildingContext.get( key ); + return key.cast( value ); + } + + public void add(Key key, Object value) { + buildingContext.put( key, value ); + } + + public AggregationRequestContext rootAggregationRequestContext() { + return aggregationRequestContext; + } + + @Override + public PredicateRequestContext getRootPredicateContext() { + return aggregationRequestContext.getRootPredicateContext(); + } + + public static Key buildingContextKey(String name) { + return new Key<>( name ); + } + + public static class Key { + + private final String name; + + private Key(String name) { + this.name = name; + } + + @SuppressWarnings("unchecked") + private V cast(Object value) { + return (V) value; + } + + @Override + public boolean equals(Object o) { + if ( o == null || getClass() != o.getClass() ) { + return false; + } + Key key = (Key) o; + return Objects.equals( name, key.name ); + } + + @Override + public int hashCode() { + return Objects.hashCode( name ); + } + } +} diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java index 108ca7d15e8..f424efdbcde 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java @@ -74,7 +74,7 @@ private ElasticsearchMetricFieldAggregation(Builder builder) { } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); @@ -84,7 +84,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { } @Override - protected Extractor extractor(AggregationRequestContext context) { + protected Extractor extractor(AggregationRequestBuildingContextContext context) { return metricFieldExtractorCreator.extractor( filter ); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java index a5ab9fcc4c3..a0801179b70 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java @@ -47,7 +47,7 @@ private ElasticsearchMetricLongAggregation(Builder builder) { } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); @@ -57,7 +57,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { } @Override - protected Extractor extractor(AggregationRequestContext context) { + protected Extractor extractor(AggregationRequestBuildingContextContext context) { return new MetricLongExtractor( nestedPathHierarchy, filter ); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java index 83b2e322297..e8d7eaaffbf 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java @@ -4,6 +4,8 @@ */ package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; +import static org.hibernate.search.backend.elasticsearch.search.aggregation.impl.AggregationRequestBuildingContextContext.buildingContextKey; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -44,10 +46,6 @@ public class ElasticsearchRangeAggregation private final ElasticsearchSearchAggregation aggregation; - // TODO: do not store these two here: - private Extractor innerExtractor; - private AggregationKey innerExtractorKey; - private ElasticsearchRangeAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); @@ -57,27 +55,27 @@ private ElasticsearchRangeAggregation(Builder builder) { } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestBuildingContextContext context) { outerObject.add( "range", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); innerObject.addProperty( "keyed", true ); innerObject.add( "ranges", rangesJson ); JsonObject subOuterObject = new JsonObject(); - // this is just a "random name" so we can get the aggregation back from the response. - // once we switch to the "composite aggregation" where we compute multiple aggregations for a range, - // this should be moved into a new "aggregation" that would handle all the logic for adding and then extracting 0-n aggregations. - // TODO: not really good that we have state saved into aggregation within the request, we should pass it up instead - innerExtractorKey = AggregationKey.of( "agg" ); - innerExtractor = aggregation.request( context, innerExtractorKey, subOuterObject ); + AggregationKey innerExtractorKey = AggregationKey.of( "agg" ); + context.add( buildingContextKey( INNER_EXTRACTOR_KEY ), innerExtractorKey ); + context.add( buildingContextKey( INNER_EXTRACTOR ), aggregation.request( context, innerExtractorKey, subOuterObject ) ); + if ( !subOuterObject.isEmpty() ) { outerObject.add( "aggs", subOuterObject ); } } @Override - protected Extractor, V>> extractor(AggregationRequestContext context) { - return new RangeBucketExtractor( nestedPathHierarchy, filter, rangesInOrder ); + protected Extractor, V>> extractor(AggregationRequestBuildingContextContext context) { + AggregationKey innerExtractorKey = context.get( buildingContextKey( INNER_EXTRACTOR_KEY ) ); + Extractor innerExtractor = context.get( buildingContextKey( INNER_EXTRACTOR ) ); + return new RangeBucketExtractor( nestedPathHierarchy, filter, rangesInOrder, innerExtractorKey, innerExtractor ); } public static class Factory @@ -112,11 +110,15 @@ public Builder type(Class expectedType, ValueModel valueModel protected class RangeBucketExtractor extends AbstractBucketExtractor, V> { private final List> rangesInOrder; + private final Extractor innerExtractor; + private final AggregationKey innerExtractorKey; protected RangeBucketExtractor(List nestedPathHierarchy, ElasticsearchSearchPredicate filter, - List> rangesInOrder) { + List> rangesInOrder, AggregationKey innerExtractorKey, Extractor innerExtractor) { super( nestedPathHierarchy, filter ); this.rangesInOrder = rangesInOrder; + this.innerExtractorKey = innerExtractorKey; + this.innerExtractor = innerExtractor; } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java index 70421859cb4..cb7d8c416a8 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java @@ -4,6 +4,8 @@ */ package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; +import static org.hibernate.search.backend.elasticsearch.search.aggregation.impl.AggregationRequestBuildingContextContext.buildingContextKey; + import java.util.List; import java.util.Map; import java.util.function.BiFunction; @@ -44,10 +46,6 @@ public class ElasticsearchTermsAggregation private final int size; private final int minDocCount; - // TODO: do not store these two here: - private Extractor innerExtractor; - private AggregationKey innerExtractorKey; - private ElasticsearchTermsAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); @@ -60,7 +58,7 @@ private ElasticsearchTermsAggregation(Builder builder) { } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestContext context) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestBuildingContextContext context) { outerObject.add( "terms", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); if ( order != null ) { @@ -69,17 +67,20 @@ protected void doRequest(JsonObject outerObject, JsonObject innerObject, Aggrega innerObject.addProperty( "size", size ); innerObject.addProperty( "min_doc_count", minDocCount ); - // TODO: not really good that we have state saved into aggregation within the request, we should pass it up instead JsonObject subOuterObject = new JsonObject(); - innerExtractorKey = AggregationKey.of( "agg" ); - innerExtractor = aggregation.request( context, innerExtractorKey, subOuterObject ); + AggregationKey innerExtractorKey = AggregationKey.of( "agg" ); + context.add( buildingContextKey( INNER_EXTRACTOR_KEY ), innerExtractorKey ); + context.add( buildingContextKey( INNER_EXTRACTOR ), aggregation.request( context, innerExtractorKey, subOuterObject ) ); + if ( !subOuterObject.isEmpty() ) { outerObject.add( "aggs", subOuterObject ); } } @Override - protected Extractor> extractor(AggregationRequestContext context) { + protected Extractor> extractor(AggregationRequestBuildingContextContext context) { + AggregationKey innerExtractorKey = context.get( buildingContextKey( INNER_EXTRACTOR_KEY ) ); + Extractor innerExtractor = context.get( buildingContextKey( INNER_EXTRACTOR ) ); return new TermsBucketExtractor( nestedPathHierarchy, filter, innerExtractorKey, innerExtractor ); } From 9eed71f3cc997cea0e9ec41c86dd647a583b4542 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Mon, 7 Jul 2025 17:41:43 +0200 Subject: [PATCH 17/23] HSEARCH-3661 Use reducable results in terms collectors --- .../collector/impl/NumericTermsCollector.java | 77 ++----------- .../impl/NumericTermsCollectorFactory.java | 8 +- .../impl/NumericTermsCollectorManager.java | 14 ++- .../impl/TermCollectorSegmentValue.java | 41 +++++++ .../lowlevel/collector/impl/TermResults.java | 102 ++++++++++++++++++ .../collector/impl/TextTermsCollector.java | 74 ++----------- .../impl/TextTermsCollectorFactory.java | 8 +- .../impl/TextTermsCollectorManager.java | 14 ++- ...ractLuceneMultivaluedTermsAggregation.java | 16 +-- .../types/aggregation/impl/LongBucket.java | 20 ++++ .../impl/LuceneNumericTermsAggregation.java | 12 +-- .../impl/LuceneTextTermsAggregation.java | 12 +-- .../collector/impl/NumericTermsCollector.java | 77 ++----------- .../impl/NumericTermsCollectorFactory.java | 8 +- .../impl/NumericTermsCollectorManager.java | 14 ++- .../impl/TermCollectorSegmentValue.java | 41 +++++++ .../lowlevel/collector/impl/TermResults.java | 102 ++++++++++++++++++ .../collector/impl/TextTermsCollector.java | 74 ++----------- .../impl/TextTermsCollectorFactory.java | 8 +- .../impl/TextTermsCollectorManager.java | 14 ++- ...ractLuceneMultivaluedTermsAggregation.java | 16 +-- .../types/aggregation/impl/LongBucket.java | 20 ++++ .../impl/LuceneNumericTermsAggregation.java | 12 +-- .../impl/LuceneTextTermsAggregation.java | 12 +-- 24 files changed, 450 insertions(+), 346 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index ef0d14abe17..9e3875f0757 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -5,34 +5,26 @@ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; import java.io.IOException; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; -import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongObjectHashMap; import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.util.PriorityQueue; public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final LongMultiValuesSource valuesSource; - private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final CollectorKey[] keys; private final CollectorManager[] managers; @@ -57,9 +49,9 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextValue(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - SegmentValue segmentValue = segmentValues.get( value ); + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); if ( segmentValue == null ) { - segmentValue = new SegmentValue( managers ); + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); segmentValues.put( value, segmentValue ); } segmentValue.collect( doc ); @@ -68,25 +60,6 @@ public void collect(int doc) throws IOException { } } - public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, segmentValues.size() ); - PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - - segmentValues.forEach( (LongObjectProcedure) (key, value) -> { - if ( value.count >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) ); - } - } ); - - List buckets = new LinkedList<>(); - while ( pq.size() != 0 ) { - LongBucket popped = pq.pop(); - buckets.add( 0, popped ); - } - - return buckets; - } - @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -96,7 +69,7 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { this.values = valuesSource.getValues( context ); this.leafReaderContext = context; - for ( LongObjectCursor value : segmentValues ) { + for ( LongObjectCursor value : segmentValues ) { value.value.resetLeafCollectors( context ); } } @@ -116,46 +89,8 @@ public void finish() { return managers; } - private static class HibernateSearchBucketOrderQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { - super( maxSize ); - this.comparator = order.toLongBucketComparator(); - } - - @Override - protected boolean lessThan(LongBucket t1, LongBucket t2) { - return comparator.compare( t1, t2 ) > 0; - } - } - - private class SegmentValue { - final Collector[] collectors; - final LeafCollector[] leafCollectors; - long count = 0L; - - SegmentValue(CollectorManager[] managers) throws IOException { - this.collectors = new Collector[managers.length]; - this.leafCollectors = new LeafCollector[managers.length]; - for ( int i = 0; i < managers.length; i++ ) { - collectors[i] = managers[i].newCollector(); - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } - - void collect(int doc) throws IOException { - count++; - for ( LeafCollector collector : leafCollectors ) { - collector.collect( doc ); - } - } - - void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { - for ( int i = 0; i < leafCollectors.length; i++ ) { - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } + LongObjectHashMap segmentValues() { + return segmentValues; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java index 7c2cc459e44..81e0e9a9e92 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -12,14 +12,14 @@ import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, List> collectorFactories) { return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); } - private final CollectorKey key = CollectorKey.create(); + private final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final List> collectorFactories; @@ -45,7 +45,7 @@ public NumericTermsCollectorManager createCollectorManager(CollectorExecutionCon } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java index affb41d99ed..be8ec930b8d 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorManager - implements CollectorManager { + implements CollectorManager { private final LongMultiValuesSource valuesSource; private final CollectorKey[] keys; @@ -31,8 +31,14 @@ public NumericTermsCollector newCollector() { } @Override - public NumericTermsCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( NumericTermsCollector collector : collection ) { + results.add( collector.segmentValues() ); + } + return results; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java new file mode 100644 index 00000000000..24cce336229 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; + +class TermCollectorSegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + TermCollectorSegmentValue(CollectorManager[] managers, LeafReaderContext leafReaderContext) + throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java new file mode 100644 index 00000000000..2f8e517a1ef --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.util.PriorityQueue; + +public class TermResults { + + @SuppressWarnings("unchecked") + static final TermResults EMPTY = new TermResults( new CollectorKey[0], new CollectorManager[0] ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final LongObjectHashMap buckets = new LongObjectHashMap<>(); + + TermResults(CollectorKey[] collectorKeys, CollectorManager[] managers) { + this.collectorKeys = collectorKeys; + this.managers = managers; + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, buckets.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + buckets.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); + } + } ); + + List results = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + results.add( 0, popped ); + } + + return results; + } + + void add(LongObjectHashMap segmentValues) { + for ( var segment : segmentValues ) { + LongBucket bucket = buckets.get( segment.key ); + if ( bucket == null ) { + bucket = new LongBucket( segment.key, segment.value.collectors, segment.value.count ); + buckets.put( segment.key, bucket ); + } + else { + bucket.add( segment.value.collectors, segment.value.count ); + } + } + } + + public void merge(LongObjectHashMap values) { + for ( var toadd : values ) { + LongBucket bucket = buckets.get( toadd.key ); + if ( bucket == null ) { + bucket = new LongBucket( toadd.key, toadd.value.collectors, toadd.value.count ); + buckets.put( toadd.key, bucket ); + } + else { + bucket.add( toadd.value ); + } + } + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index b06a3948d96..de1dd651aad 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -5,19 +5,14 @@ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; import java.io.IOException; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongObjectHashMap; import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; @@ -25,10 +20,8 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.util.PriorityQueue; public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { @@ -36,7 +29,7 @@ public class TextTermsCollector extends SimpleCollector implements BaseTermsColl private final TextMultiValuesSource valuesSource; private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); - private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final String field; private SortedSetDocValues sortedSetValues; @@ -64,9 +57,9 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextOrd(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - SegmentValue segmentValue = segmentValues.get( value ); + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); if ( segmentValue == null ) { - segmentValue = new SegmentValue( managers ); + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); segmentValues.put( value, segmentValue ); } segmentValue.collect( doc ); @@ -75,25 +68,6 @@ public void collect(int doc) throws IOException { } } - public List results(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashValues.size() ); - PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - - hashValues.forEach( (LongObjectProcedure) (key, value) -> { - if ( value.count >= minDocCount ) { - pq.insertWithOverflow( value ); - } - } ); - - List buckets = new LinkedList<>(); - while ( pq.size() != 0 ) { - LongBucket popped = pq.pop(); - buckets.add( 0, popped ); - } - - return buckets; - } - @Override public CollectorKey[] keys() { return keys; @@ -104,6 +78,10 @@ public List results(BucketOrder order, int topN, int minDocCount) { return managers; } + LongObjectHashMap segmentValues() { + return hashValues; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -118,7 +96,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { @Override public void finish() throws IOException { - for ( LongObjectCursor value : segmentValues ) { + for ( LongObjectCursor value : segmentValues ) { long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); LongBucket bucket = hashValues.get( globalOrd ); if ( bucket == null ) { @@ -145,40 +123,4 @@ private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOExcepti } initRootSortedSetDocValues( ctx.parent ); } - - private static class HibernateSearchBucketOrderQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { - super( maxSize ); - this.comparator = order.toLongBucketComparator(); - } - - @Override - protected boolean lessThan(LongBucket t1, LongBucket t2) { - return comparator.compare( t1, t2 ) > 0; - } - } - - private class SegmentValue { - final Collector[] collectors; - final LeafCollector[] leafCollectors; - long count = 0L; - - public SegmentValue(CollectorManager[] managers) throws IOException { - this.collectors = new Collector[managers.length]; - this.leafCollectors = new LeafCollector[managers.length]; - for ( int i = 0; i < managers.length; i++ ) { - collectors[i] = managers[i].newCollector(); - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } - - public void collect(int doc) throws IOException { - count++; - for ( LeafCollector collector : leafCollectors ) { - collector.collect( doc ); - } - } - } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java index 65d2aded89f..7e95c8e270b 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -12,14 +12,14 @@ import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( String field, TextMultiValuesSource valuesSource, List> collectorFactories) { return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + public final CollectorKey key = CollectorKey.create(); private final TextMultiValuesSource valuesSource; private final String field; private final List> collectorFactories; @@ -47,7 +47,7 @@ public TextTermsCollectorManager createCollectorManager(CollectorExecutionContex } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java index 9609e7aca82..37115c1d145 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorManager - implements CollectorManager { + implements CollectorManager { private final TextMultiValuesSource valuesSource; private final String field; @@ -33,8 +33,14 @@ public TextTermsCollector newCollector() { } @Override - public TextTermsCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( TextTermsCollector collector : collection ) { + results.merge( collector.segmentValues() ); + } + return results; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index 60ef17c2109..306f6c64bbd 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -12,8 +12,8 @@ import java.util.Map; import java.util.Set; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -76,13 +76,13 @@ public final Map extract(AggregationExtractContext context) throws IOExcep return toMap( context.fromDocumentValueConvertContext(), buckets ); } - protected abstract BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException; + protected abstract TermResults termResults(AggregationExtractContext context) throws IOException; protected R createZeroValue(AggregationExtractContext context) throws IOException { LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - var termsCollector = termsCollector( context ); - CollectorManager[] managers = termsCollector.managers(); - CollectorKey[] keys = termsCollector.keys(); + var termResults = termResults( context ); + CollectorManager[] managers = termResults.collectorManagers(); + CollectorKey[] keys = termResults.collectorKeys(); HashMap, Object> results = new HashMap<>(); for ( int i = 0; i < keys.length; i++ ) { results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); @@ -110,12 +110,12 @@ private Map toMap(FromDocumentValueConvertContext convertContext, List, Object> prepareResults(LongBucket bucket, BaseTermsCollector termsCollector) + protected Map, Object> prepareResults(LongBucket bucket, TermResults termResults) throws IOException { Map, Object> result = new HashMap<>(); List[] collectors = bucket.collectors; - CollectorKey[] collectorKeys = termsCollector.keys(); - CollectorManager[] managers = termsCollector.managers(); + CollectorKey[] collectorKeys = termResults.collectorKeys(); + CollectorManager[] managers = termResults.collectorManagers(); for ( int i = 0; i < collectorKeys.length; i++ ) { result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java index b76319c716f..aad664b24b4 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -26,6 +26,26 @@ public LongBucket(long termOrd, Collector[] collectors, long count) { this.count = count; } + public LongBucket(long termOrd, List[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = collectors; + this.count = count; + } + + public void add(Collector[] collectors, long count) { + this.count += count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].add( collectors[i] ); + } + } + + public void add(LongBucket bucket) { + this.count += bucket.count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].addAll( bucket.collectors[i] ); + } + } + public long count() { return count; } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index 96b7a0ff2c2..b070e7b7977 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -14,10 +14,10 @@ import java.util.TreeSet; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; @@ -51,7 +51,7 @@ public class LuceneNumericTermsAggregation private final Comparator termComparator; private final Function decoder; - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneNumericTermsAggregation(Builder builder) { super( builder ); @@ -100,7 +100,7 @@ private LuceneNumericTermsAggregationExtractor(Extractor extractor) { } @Override - protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + protected TermResults termResults(AggregationExtractContext context) throws IOException { return context.getCollectorResults( collectorKey ); } @@ -116,14 +116,14 @@ V termToFieldValue(E key) { @Override List> getTopBuckets(AggregationExtractContext context) throws IOException { - var termsCollector = context.getCollectorResults( collectorKey ); + var termResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + List counts = termResults.counts( order, maxTermCount, minDocCount ); List> buckets = new ArrayList<>(); for ( LongBucket bucket : counts ) { - localContext.setResults( prepareResults( bucket, termsCollector ) ); + localContext.setResults( prepareResults( bucket, termResults ) ); buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.termOrd() ), diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index 8d305759230..d0980d10457 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -12,8 +12,8 @@ import java.util.Set; import java.util.TreeSet; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; @@ -44,7 +44,7 @@ public class LuceneTextTermsAggregation private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneTextTermsAggregation(Builder builder) { super( builder ); @@ -75,7 +75,7 @@ private LuceneTextTermsAggregationExtractor(Extractor extractor) { } @Override - protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + protected TermResults termResults(AggregationExtractContext context) throws IOException { return context.getCollectorResults( collectorKey ); } @@ -119,16 +119,16 @@ String termToFieldValue(String key) { @Override List> getTopBuckets(AggregationExtractContext context) throws IOException { - var termsCollector = context.getCollectorResults( collectorKey ); + var termResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - List results = termsCollector.results( order, maxTermCount, minDocCount ); + List results = termResults.counts( order, maxTermCount, minDocCount ); var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); List> buckets = new ArrayList<>(); for ( LongBucket bucket : results ) { - localContext.setResults( prepareResults( bucket, termsCollector ) ); + localContext.setResults( prepareResults( bucket, termResults ) ); buckets.add( new Bucket<>( dv.lookupOrd( bucket.termOrd() ).utf8ToString(), diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java index ef0d14abe17..9e3875f0757 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -5,34 +5,26 @@ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; import java.io.IOException; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; -import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongObjectHashMap; import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.util.PriorityQueue; public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); private final LongMultiValuesSource valuesSource; - private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final CollectorKey[] keys; private final CollectorManager[] managers; @@ -57,9 +49,9 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextValue(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - SegmentValue segmentValue = segmentValues.get( value ); + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); if ( segmentValue == null ) { - segmentValue = new SegmentValue( managers ); + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); segmentValues.put( value, segmentValue ); } segmentValue.collect( doc ); @@ -68,25 +60,6 @@ public void collect(int doc) throws IOException { } } - public List counts(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, segmentValues.size() ); - PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - - segmentValues.forEach( (LongObjectProcedure) (key, value) -> { - if ( value.count >= minDocCount ) { - pq.insertWithOverflow( new LongBucket( key, value.collectors, value.count ) ); - } - } ); - - List buckets = new LinkedList<>(); - while ( pq.size() != 0 ) { - LongBucket popped = pq.pop(); - buckets.add( 0, popped ); - } - - return buckets; - } - @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -96,7 +69,7 @@ public ScoreMode scoreMode() { protected void doSetNextReader(LeafReaderContext context) throws IOException { this.values = valuesSource.getValues( context ); this.leafReaderContext = context; - for ( LongObjectCursor value : segmentValues ) { + for ( LongObjectCursor value : segmentValues ) { value.value.resetLeafCollectors( context ); } } @@ -116,46 +89,8 @@ public void finish() { return managers; } - private static class HibernateSearchBucketOrderQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { - super( maxSize ); - this.comparator = order.toLongBucketComparator(); - } - - @Override - protected boolean lessThan(LongBucket t1, LongBucket t2) { - return comparator.compare( t1, t2 ) > 0; - } - } - - private class SegmentValue { - final Collector[] collectors; - final LeafCollector[] leafCollectors; - long count = 0L; - - SegmentValue(CollectorManager[] managers) throws IOException { - this.collectors = new Collector[managers.length]; - this.leafCollectors = new LeafCollector[managers.length]; - for ( int i = 0; i < managers.length; i++ ) { - collectors[i] = managers[i].newCollector(); - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } - - void collect(int doc) throws IOException { - count++; - for ( LeafCollector collector : leafCollectors ) { - collector.collect( doc ); - } - } - - void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { - for ( int i = 0; i < leafCollectors.length; i++ ) { - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } + LongObjectHashMap segmentValues() { + return segmentValues; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java index 7c2cc459e44..81e0e9a9e92 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -12,14 +12,14 @@ import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, List> collectorFactories) { return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); } - private final CollectorKey key = CollectorKey.create(); + private final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final List> collectorFactories; @@ -45,7 +45,7 @@ public NumericTermsCollectorManager createCollectorManager(CollectorExecutionCon } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java index affb41d99ed..be8ec930b8d 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.CollectorManager; public class NumericTermsCollectorManager - implements CollectorManager { + implements CollectorManager { private final LongMultiValuesSource valuesSource; private final CollectorKey[] keys; @@ -31,8 +31,14 @@ public NumericTermsCollector newCollector() { } @Override - public NumericTermsCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( NumericTermsCollector collector : collection ) { + results.add( collector.segmentValues() ); + } + return results; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java new file mode 100644 index 00000000000..24cce336229 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; + +class TermCollectorSegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + TermCollectorSegmentValue(CollectorManager[] managers, LeafReaderContext leafReaderContext) + throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java new file mode 100644 index 00000000000..2f8e517a1ef --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.util.PriorityQueue; + +public class TermResults { + + @SuppressWarnings("unchecked") + static final TermResults EMPTY = new TermResults( new CollectorKey[0], new CollectorManager[0] ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final LongObjectHashMap buckets = new LongObjectHashMap<>(); + + TermResults(CollectorKey[] collectorKeys, CollectorManager[] managers) { + this.collectorKeys = collectorKeys; + this.managers = managers; + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, buckets.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + buckets.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); + } + } ); + + List results = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + results.add( 0, popped ); + } + + return results; + } + + void add(LongObjectHashMap segmentValues) { + for ( var segment : segmentValues ) { + LongBucket bucket = buckets.get( segment.key ); + if ( bucket == null ) { + bucket = new LongBucket( segment.key, segment.value.collectors, segment.value.count ); + buckets.put( segment.key, bucket ); + } + else { + bucket.add( segment.value.collectors, segment.value.count ); + } + } + } + + public void merge(LongObjectHashMap values) { + for ( var toadd : values ) { + LongBucket bucket = buckets.get( toadd.key ); + if ( bucket == null ) { + bucket = new LongBucket( toadd.key, toadd.value.collectors, toadd.value.count ); + buckets.put( toadd.key, bucket ); + } + else { + bucket.add( toadd.value ); + } + } + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java index b06a3948d96..de1dd651aad 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -5,19 +5,14 @@ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; import java.io.IOException; -import java.util.Comparator; -import java.util.LinkedList; -import java.util.List; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; -import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; import com.carrotsearch.hppc.LongHashSet; import com.carrotsearch.hppc.LongObjectHashMap; import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.procedures.LongObjectProcedure; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.LeafReaderContext; @@ -25,10 +20,8 @@ import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; -import org.apache.lucene.util.PriorityQueue; public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { @@ -36,7 +29,7 @@ public class TextTermsCollector extends SimpleCollector implements BaseTermsColl private final TextMultiValuesSource valuesSource; private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); - private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); private final String field; private SortedSetDocValues sortedSetValues; @@ -64,9 +57,9 @@ public void collect(int doc) throws IOException { // Each document must be counted only once per range. long value = values.nextOrd(); if ( uniqueLeafIndicesForDocument.add( value ) ) { - SegmentValue segmentValue = segmentValues.get( value ); + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); if ( segmentValue == null ) { - segmentValue = new SegmentValue( managers ); + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); segmentValues.put( value, segmentValue ); } segmentValue.collect( doc ); @@ -75,25 +68,6 @@ public void collect(int doc) throws IOException { } } - public List results(BucketOrder order, int topN, int minDocCount) { - int size = Math.min( topN, hashValues.size() ); - PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); - - hashValues.forEach( (LongObjectProcedure) (key, value) -> { - if ( value.count >= minDocCount ) { - pq.insertWithOverflow( value ); - } - } ); - - List buckets = new LinkedList<>(); - while ( pq.size() != 0 ) { - LongBucket popped = pq.pop(); - buckets.add( 0, popped ); - } - - return buckets; - } - @Override public CollectorKey[] keys() { return keys; @@ -104,6 +78,10 @@ public List results(BucketOrder order, int topN, int minDocCount) { return managers; } + LongObjectHashMap segmentValues() { + return hashValues; + } + @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; @@ -118,7 +96,7 @@ protected void doSetNextReader(LeafReaderContext context) throws IOException { @Override public void finish() throws IOException { - for ( LongObjectCursor value : segmentValues ) { + for ( LongObjectCursor value : segmentValues ) { long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); LongBucket bucket = hashValues.get( globalOrd ); if ( bucket == null ) { @@ -145,40 +123,4 @@ private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOExcepti } initRootSortedSetDocValues( ctx.parent ); } - - private static class HibernateSearchBucketOrderQueue extends PriorityQueue { - private final Comparator comparator; - - public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { - super( maxSize ); - this.comparator = order.toLongBucketComparator(); - } - - @Override - protected boolean lessThan(LongBucket t1, LongBucket t2) { - return comparator.compare( t1, t2 ) > 0; - } - } - - private class SegmentValue { - final Collector[] collectors; - final LeafCollector[] leafCollectors; - long count = 0L; - - public SegmentValue(CollectorManager[] managers) throws IOException { - this.collectors = new Collector[managers.length]; - this.leafCollectors = new LeafCollector[managers.length]; - for ( int i = 0; i < managers.length; i++ ) { - collectors[i] = managers[i].newCollector(); - leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); - } - } - - public void collect(int doc) throws IOException { - count++; - for ( LeafCollector collector : leafCollectors ) { - collector.collect( doc ); - } - } - } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java index 65d2aded89f..7e95c8e270b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -12,14 +12,14 @@ import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( String field, TextMultiValuesSource valuesSource, List> collectorFactories) { return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + public final CollectorKey key = CollectorKey.create(); private final TextMultiValuesSource valuesSource; private final String field; private final List> collectorFactories; @@ -47,7 +47,7 @@ public TextTermsCollectorManager createCollectorManager(CollectorExecutionContex } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java index 9609e7aca82..37115c1d145 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -12,7 +12,7 @@ import org.apache.lucene.search.CollectorManager; public class TextTermsCollectorManager - implements CollectorManager { + implements CollectorManager { private final TextMultiValuesSource valuesSource; private final String field; @@ -33,8 +33,14 @@ public TextTermsCollector newCollector() { } @Override - public TextTermsCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( TextTermsCollector collector : collection ) { + results.merge( collector.segmentValues() ); + } + return results; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java index 60ef17c2109..306f6c64bbd 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -12,8 +12,8 @@ import java.util.Map; import java.util.Set; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; @@ -76,13 +76,13 @@ public final Map extract(AggregationExtractContext context) throws IOExcep return toMap( context.fromDocumentValueConvertContext(), buckets ); } - protected abstract BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException; + protected abstract TermResults termResults(AggregationExtractContext context) throws IOException; protected R createZeroValue(AggregationExtractContext context) throws IOException { LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - var termsCollector = termsCollector( context ); - CollectorManager[] managers = termsCollector.managers(); - CollectorKey[] keys = termsCollector.keys(); + var termResults = termResults( context ); + CollectorManager[] managers = termResults.collectorManagers(); + CollectorKey[] keys = termResults.collectorKeys(); HashMap, Object> results = new HashMap<>(); for ( int i = 0; i < keys.length; i++ ) { results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); @@ -110,12 +110,12 @@ private Map toMap(FromDocumentValueConvertContext convertContext, List, Object> prepareResults(LongBucket bucket, BaseTermsCollector termsCollector) + protected Map, Object> prepareResults(LongBucket bucket, TermResults termResults) throws IOException { Map, Object> result = new HashMap<>(); List[] collectors = bucket.collectors; - CollectorKey[] collectorKeys = termsCollector.keys(); - CollectorManager[] managers = termsCollector.managers(); + CollectorKey[] collectorKeys = termResults.collectorKeys(); + CollectorManager[] managers = termResults.collectorManagers(); for ( int i = 0; i < collectorKeys.length; i++ ) { result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java index b76319c716f..aad664b24b4 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -26,6 +26,26 @@ public LongBucket(long termOrd, Collector[] collectors, long count) { this.count = count; } + public LongBucket(long termOrd, List[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = collectors; + this.count = count; + } + + public void add(Collector[] collectors, long count) { + this.count += count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].add( collectors[i] ); + } + } + + public void add(LongBucket bucket) { + this.count += bucket.count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].addAll( bucket.collectors[i] ); + } + } + public long count() { return count; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index 96b7a0ff2c2..b070e7b7977 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -14,10 +14,10 @@ import java.util.TreeSet; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; @@ -51,7 +51,7 @@ public class LuceneNumericTermsAggregation private final Comparator termComparator; private final Function decoder; - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneNumericTermsAggregation(Builder builder) { super( builder ); @@ -100,7 +100,7 @@ private LuceneNumericTermsAggregationExtractor(Extractor extractor) { } @Override - protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + protected TermResults termResults(AggregationExtractContext context) throws IOException { return context.getCollectorResults( collectorKey ); } @@ -116,14 +116,14 @@ V termToFieldValue(E key) { @Override List> getTopBuckets(AggregationExtractContext context) throws IOException { - var termsCollector = context.getCollectorResults( collectorKey ); + var termResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - List counts = termsCollector.counts( order, maxTermCount, minDocCount ); + List counts = termResults.counts( order, maxTermCount, minDocCount ); List> buckets = new ArrayList<>(); for ( LongBucket bucket : counts ) { - localContext.setResults( prepareResults( bucket, termsCollector ) ); + localContext.setResults( prepareResults( bucket, termResults ) ); buckets.add( new Bucket<>( numericDomain.sortedDocValueToTerm( bucket.termOrd() ), diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index 8d305759230..d0980d10457 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -12,8 +12,8 @@ import java.util.Set; import java.util.TreeSet; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.BaseTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; @@ -44,7 +44,7 @@ public class LuceneTextTermsAggregation private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneTextTermsAggregation(Builder builder) { super( builder ); @@ -75,7 +75,7 @@ private LuceneTextTermsAggregationExtractor(Extractor extractor) { } @Override - protected BaseTermsCollector termsCollector(AggregationExtractContext context) throws IOException { + protected TermResults termResults(AggregationExtractContext context) throws IOException { return context.getCollectorResults( collectorKey ); } @@ -119,16 +119,16 @@ String termToFieldValue(String key) { @Override List> getTopBuckets(AggregationExtractContext context) throws IOException { - var termsCollector = context.getCollectorResults( collectorKey ); + var termResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - List results = termsCollector.results( order, maxTermCount, minDocCount ); + List results = termResults.counts( order, maxTermCount, minDocCount ); var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); List> buckets = new ArrayList<>(); for ( LongBucket bucket : results ) { - localContext.setResults( prepareResults( bucket, termsCollector ) ); + localContext.setResults( prepareResults( bucket, termResults ) ); buckets.add( new Bucket<>( dv.lookupOrd( bucket.termOrd() ).utf8ToString(), From 36f4ea8cd76d6e622f979d8692a8e36ad8d60025 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Mon, 7 Jul 2025 19:20:05 +0200 Subject: [PATCH 18/23] HSEARCH-3661 Use reducable results in range collectors --- .../collector/impl/RangeCollector.java | 13 +---- .../collector/impl/RangeCollectorFactory.java | 19 ++----- .../collector/impl/RangeCollectorManager.java | 33 ++++++++--- .../lowlevel/collector/impl/RangeResults.java | 55 +++++++++++++++++++ .../impl/LuceneNumericRangeAggregation.java | 17 +++--- .../collector/impl/RangeCollector.java | 13 +---- .../collector/impl/RangeCollectorFactory.java | 19 ++----- .../collector/impl/RangeCollectorManager.java | 33 ++++++++--- .../lowlevel/collector/impl/RangeResults.java | 55 +++++++++++++++++++ .../impl/LuceneNumericRangeAggregation.java | 19 ++++--- 10 files changed, 191 insertions(+), 85 deletions(-) create mode 100644 backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java create mode 100644 lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 57395708b4a..95ba455a320 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; @@ -36,16 +35,14 @@ public class RangeCollector extends SimpleCollector { private final Collector[][] collectors; private final CollectorKey[] keys; private final LeafCollector[][] leafCollectors; - private final CollectorManager[] managers; private LongMultiValues values; public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, - CollectorKey[] keys, CollectorManager[] managers) { + CollectorKey[] keys) { this.valuesSource = valuesSource; this.collectors = collectors; this.keys = keys; - this.managers = managers; // Maps all range inclusive endpoints to int flags; 1 // = start of interval, 2 = end of interval. We need to @@ -194,14 +191,6 @@ public Collector[][] collectors() { return collectors; } - public CollectorKey[] collectorKeys() { - return keys; - } - - public CollectorManager[] managers() { - return managers; - } - @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index 91a398b7b3c..f5cfb3ff06e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -10,18 +10,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class RangeCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + public final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; private final List> collectorFactories; @@ -33,10 +32,9 @@ public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[ this.collectorFactories = collectorFactories; } - @SuppressWarnings({ "rawtypes", "unchecked" }) + @SuppressWarnings({ "unchecked" }) @Override public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { - Collector[][] collectors = new Collector[collectorFactories.size()][]; CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; var managers = new CollectorManager[collectorFactories.size()]; int index = 0; @@ -44,18 +42,13 @@ public RangeCollectorManager createCollectorManager(CollectorExecutionContext co CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); keys[index] = collectorFactory.getCollectorKey(); managers[index] = collectorManager; - Collector[] c = new Collector[ranges.length]; - collectors[index] = c; - for ( int i = 0; i < c.length; i++ ) { - c[i] = collectorManager.newCollector(); - } index++; } - return new RangeCollectorManager( valuesSource, ranges, collectors, keys, managers ); + return new RangeCollectorManager( valuesSource, ranges, keys, managers ); } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index a842782c9ba..8065fc580b1 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -4,6 +4,7 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; import java.util.Collection; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; @@ -12,31 +13,45 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorManager implements CollectorManager { +public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; - private final Collector[][] collectors; private final CollectorKey[] keys; private final CollectorManager[] managers; - public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; this.ranges = ranges; - this.collectors = collectors; this.keys = keys; this.managers = managers; } @Override - public RangeCollector newCollector() { - return new RangeCollector( valuesSource, ranges, collectors, keys, managers ); + public RangeCollector newCollector() throws IOException { + Collector[][] collectors = new Collector[keys.length][]; + int index = 0; + for ( CollectorManager manager : managers ) { + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int j = 0; j < c.length; j++ ) { + c[j] = manager.newCollector(); + } + index++; + } + return new RangeCollector( valuesSource, ranges, collectors, keys ); } @Override - public RangeCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public RangeResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return RangeResults.EMPTY; + } + RangeResults results = new RangeResults( keys, managers, ranges.length ); + for ( RangeCollector collector : collection ) { + results.add( collector.collectors() ); + } + return results; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java new file mode 100644 index 00000000000..c54c5592554 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeResults { + + @SuppressWarnings("unchecked") + static final RangeResults EMPTY = new RangeResults( new CollectorKey[0], new CollectorManager[0], 0 ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final List[][] buckets; + + @SuppressWarnings("unchecked") + RangeResults(CollectorKey[] collectorKeys, CollectorManager[] managers, int ranges) { + this.collectorKeys = collectorKeys; + this.managers = managers; + this.buckets = new List[managers.length][]; + for ( int i = 0; i < buckets.length; i++ ) { + buckets[i] = new List[ranges]; + for ( int j = 0; j < buckets[i].length; j++ ) { + buckets[i][j] = new ArrayList<>(); + } + } + } + + void add(Collector[][] collectors) { + for ( int collectorIndex = 0; collectorIndex < collectors.length; collectorIndex++ ) { + for ( int rangeIndex = 0; rangeIndex < collectors[collectorIndex].length; rangeIndex++ ) { + buckets[collectorIndex][rangeIndex].add( collectors[collectorIndex][rangeIndex] ); + } + } + } + + public List[][] buckets() { + return buckets; + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index e9ebf2f5a85..19cd2204aab 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -15,6 +15,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeResults; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; @@ -48,7 +49,7 @@ public class LuceneNumericRangeAggregation private final List> rangesInOrder; private final List> encodedRangesInOrder; - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneNumericRangeAggregation(Builder builder) { super( builder ); @@ -102,26 +103,26 @@ public LuceneNumericRangeAggregationExtractor(Extractor extractor) { @Override public Map, V> extract(AggregationExtractContext context) throws IOException { - RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); + RangeResults rangeResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - localContext.setResults( prepareResults( i, rangeCollector ) ); + localContext.setResults( prepareResults( i, rangeResults ) ); result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } - private Map, Object> prepareResults(int index, RangeCollector rangeCollector) throws IOException { + private Map, Object> prepareResults(int index, RangeResults rangeResults) throws IOException { Map, Object> result = new HashMap<>(); - Collector[][] collectors = rangeCollector.collectors(); - CollectorKey[] collectorKeys = rangeCollector.collectorKeys(); - CollectorManager[] managers = rangeCollector.managers(); + List[][] collectors = rangeResults.buckets(); + CollectorKey[] collectorKeys = rangeResults.collectorKeys(); + CollectorManager[] managers = rangeResults.collectorManagers(); for ( int i = 0; i < collectorKeys.length; i++ ) { - result.put( collectorKeys[i], managers[i].reduce( List.of( collectors[i][index] ) ) ); + result.put( collectorKeys[i], managers[i].reduce( collectors[i][index] ) ); } return result; } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java index 57395708b4a..95ba455a320 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -22,7 +22,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Collector; -import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.LeafCollector; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.SimpleCollector; @@ -36,16 +35,14 @@ public class RangeCollector extends SimpleCollector { private final Collector[][] collectors; private final CollectorKey[] keys; private final LeafCollector[][] leafCollectors; - private final CollectorManager[] managers; private LongMultiValues values; public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, - CollectorKey[] keys, CollectorManager[] managers) { + CollectorKey[] keys) { this.valuesSource = valuesSource; this.collectors = collectors; this.keys = keys; - this.managers = managers; // Maps all range inclusive endpoints to int flags; 1 // = start of interval, 2 = end of interval. We need to @@ -194,14 +191,6 @@ public Collector[][] collectors() { return collectors; } - public CollectorKey[] collectorKeys() { - return keys; - } - - public CollectorManager[] managers() { - return managers; - } - @Override public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java index 91a398b7b3c..f5cfb3ff06e 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -10,18 +10,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; public class RangeCollectorFactory - implements CollectorFactory { + implements CollectorFactory { - public static CollectorFactory instance( + public static CollectorFactory instance( LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); } - public final CollectorKey key = CollectorKey.create(); + public final CollectorKey key = CollectorKey.create(); private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; private final List> collectorFactories; @@ -33,10 +32,9 @@ public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[ this.collectorFactories = collectorFactories; } - @SuppressWarnings({ "rawtypes", "unchecked" }) + @SuppressWarnings({ "unchecked" }) @Override public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { - Collector[][] collectors = new Collector[collectorFactories.size()][]; CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; var managers = new CollectorManager[collectorFactories.size()]; int index = 0; @@ -44,18 +42,13 @@ public RangeCollectorManager createCollectorManager(CollectorExecutionContext co CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); keys[index] = collectorFactory.getCollectorKey(); managers[index] = collectorManager; - Collector[] c = new Collector[ranges.length]; - collectors[index] = c; - for ( int i = 0; i < c.length; i++ ) { - c[i] = collectorManager.newCollector(); - } index++; } - return new RangeCollectorManager( valuesSource, ranges, collectors, keys, managers ); + return new RangeCollectorManager( valuesSource, ranges, keys, managers ); } @Override - public CollectorKey getCollectorKey() { + public CollectorKey getCollectorKey() { return key; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java index a842782c9ba..8065fc580b1 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -4,6 +4,7 @@ */ package org.hibernate.search.backend.lucene.lowlevel.collector.impl; +import java.io.IOException; import java.util.Collection; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; @@ -12,31 +13,45 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; -public class RangeCollectorManager implements CollectorManager { +public class RangeCollectorManager implements CollectorManager { private final LongMultiValuesSource valuesSource; private final EffectiveRange[] ranges; - private final Collector[][] collectors; private final CollectorKey[] keys; private final CollectorManager[] managers; - public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, CollectorKey[] keys, CollectorManager[] managers) { this.valuesSource = valuesSource; this.ranges = ranges; - this.collectors = collectors; this.keys = keys; this.managers = managers; } @Override - public RangeCollector newCollector() { - return new RangeCollector( valuesSource, ranges, collectors, keys, managers ); + public RangeCollector newCollector() throws IOException { + Collector[][] collectors = new Collector[keys.length][]; + int index = 0; + for ( CollectorManager manager : managers ) { + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int j = 0; j < c.length; j++ ) { + c[j] = manager.newCollector(); + } + index++; + } + return new RangeCollector( valuesSource, ranges, collectors, keys ); } @Override - public RangeCollector reduce(Collection collection) { - // TODO: actually reduce: - return collection.iterator().next(); + public RangeResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return RangeResults.EMPTY; + } + RangeResults results = new RangeResults( keys, managers, ranges.length ); + for ( RangeCollector collector : collection ) { + results.add( collector.collectors() ); + } + return results; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java new file mode 100644 index 00000000000..c54c5592554 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeResults { + + @SuppressWarnings("unchecked") + static final RangeResults EMPTY = new RangeResults( new CollectorKey[0], new CollectorManager[0], 0 ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final List[][] buckets; + + @SuppressWarnings("unchecked") + RangeResults(CollectorKey[] collectorKeys, CollectorManager[] managers, int ranges) { + this.collectorKeys = collectorKeys; + this.managers = managers; + this.buckets = new List[managers.length][]; + for ( int i = 0; i < buckets.length; i++ ) { + buckets[i] = new List[ranges]; + for ( int j = 0; j < buckets[i].length; j++ ) { + buckets[i][j] = new ArrayList<>(); + } + } + } + + void add(Collector[][] collectors) { + for ( int collectorIndex = 0; collectorIndex < collectors.length; collectorIndex++ ) { + for ( int rangeIndex = 0; rangeIndex < collectors[collectorIndex].length; rangeIndex++ ) { + buckets[collectorIndex][rangeIndex].add( collectors[collectorIndex][rangeIndex] ); + } + } + } + + public List[][] buckets() { + return buckets; + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index 35ea9bff9ff..19cd2204aab 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -15,6 +15,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeResults; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; @@ -48,7 +49,7 @@ public class LuceneNumericRangeAggregation private final List> rangesInOrder; private final List> encodedRangesInOrder; - private CollectorKey collectorKey; + private CollectorKey collectorKey; private LuceneNumericRangeAggregation(Builder builder) { super( builder ); @@ -102,26 +103,26 @@ public LuceneNumericRangeAggregationExtractor(Extractor extractor) { @Override public Map, V> extract(AggregationExtractContext context) throws IOException { - RangeCollector rangeCollector = context.getCollectorResults( collectorKey ); + RangeResults rangeResults = context.getCollectorResults( collectorKey ); LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - localContext.setResults( prepareResults( i, rangeCollector ) ); + localContext.setResults( prepareResults( i, rangeResults ) ); result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } - private Map, Object> prepareResults(int index, RangeCollector rangeCollector) throws IOException { + private Map, Object> prepareResults(int index, RangeResults rangeResults) throws IOException { Map, Object> result = new HashMap<>(); - Collector[][] collectors = rangeCollector.collectors(); - CollectorKey[] collectorKeys = rangeCollector.collectorKeys(); - CollectorManager[] managers = rangeCollector.managers(); + List[][] collectors = rangeResults.buckets(); + CollectorKey[] collectorKeys = rangeResults.collectorKeys(); + CollectorManager[] managers = rangeResults.collectorManagers(); for ( int i = 0; i < collectorKeys.length; i++ ) { - result.put( collectorKeys[i], managers[i].reduce( List.of( collectors[i][index] ) ) ); + result.put( collectorKeys[i], managers[i].reduce( collectors[i][index] ) ); } return result; } @@ -149,7 +150,7 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } public static class Builder - extends AbstractBuilder, V> + extends AbstractLuceneBucketAggregation.AbstractBuilder, V> implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; From 170ac56a0dc42894fdd29228fb455674c67d3cff Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Tue, 8 Jul 2025 13:21:15 +0200 Subject: [PATCH 19/23] HSEARCH-3661 Add more tests around aggregations to the backend TCK --- .../RangeAggregationSpecificsIT.java | 340 +++++++++++++++++- .../TermsAggregationSpecificsIT.java | 86 ++++- .../types/FieldTypeDescriptor.java | 4 + .../OffsetDateTimeFieldTypeDescriptor.java | 5 + .../types/OffsetTimeFieldTypeDescriptor.java | 5 + .../ZonedDateTimeFieldTypeDescriptor.java | 5 + 6 files changed, 442 insertions(+), 3 deletions(-) diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java index 67587eeb285..92ac8fe41e9 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java @@ -31,7 +31,9 @@ import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.AggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.RangeAggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.types.IntegerFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModel; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModelsByType; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TckConfiguration; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.ValueWrapper; @@ -558,6 +560,325 @@ void rangeOverlap_parmeters(FieldTypeDescriptor fieldType, DataSet data ); } + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countDocuments(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.countDocuments() ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 3L ); + c.accept( Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 2L ); + c.accept( Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 2L ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_min(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, F>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.min().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 0 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 3 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 5 ) ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_max(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, F>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.max().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 2 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 4 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 6 ) ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countValues(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.countValues().field( index.binding().bucketMultiValue.relativeFieldName ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + 12L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 8L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + 8L + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countDistinctValues(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( f.countDistinctValues() + .field( index.binding().bucketMultiValue.relativeFieldName ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + 5L // 10 * 0 0 0 0 -- hence odd number in this range + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 4L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + 4L + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_terms_countImplicit(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( f.terms().field( index.binding().bucketMultiValue.relativeFieldName, + Integer.class ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Map.of( 0, 1L, 1, 1L, 2, 1L, 10, 1L, 20, 1L ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Map.of( 3, 1L, 4, 1L, 30, 1L, 40, 1L ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( 5, 1L, 6, 1L, 50, 1L, 60, 1L ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_terms_sum(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( f.terms() + .field( index.binding().bucketMultiValue.relativeFieldName, Integer.class ) + .value( f.sum().field( index.binding().bucketMultiValue.relativeFieldName, + Integer.class ) ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Map.of( 0, 0, 1, 13, 2, 26, 10, 13, 20, 26 ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Map.of( 3, 39, 4, 52, 30, 39, 40, 52 ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( 5, 65, 6, 78, 50, 65, 60, 78 ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_range_countExplicit(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map, Long>>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 5 ) ), + Map.of( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 3L, + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), 2L, + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 0L + ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 0L, + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), 0L, + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 2L + ) + ); + } ) + ); + } + + private void assumeNonCanonicalRangesSupported() { assumeTrue( TckConfiguration.get().getBackendFeatures().nonCanonicalRangeInAggregations(), @@ -565,7 +886,7 @@ private void assumeNonCanonicalRangesSupported() { ); } - private SearchQueryOptionsStep matchAllQuery() { + private SearchQueryOptionsStep matchAllQuery() { return index.createScope().query().where( f -> f.matchAll() ); } @@ -593,10 +914,18 @@ private DataSet(FieldTypeDescriptor fieldType) { private void init() { BulkIndexer indexer = index.bulkIndexer(); for ( int i = 0; i < documentFieldValues.size(); i++ ) { - F value = documentFieldValues.get( i ); + final F value = documentFieldValues.get( i ); + final int bucketValue = i; indexer.add( name + "_document_" + i, name, document -> { document.addValue( index.binding().fieldModels.get( fieldType ).reference, value ); document.addValue( index.binding().fieldWithConverterModels.get( fieldType ).reference, value ); + + document.addValue( index.binding().bucketValue.reference, bucketValue ); + + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue * 10 ); } ); } indexer.add( name + "_document_empty", name, document -> {} ); @@ -608,6 +937,8 @@ private static class IndexBinding { final SimpleFieldModelsByType fieldModels; final SimpleFieldModelsByType fieldWithConverterModels; final SimpleFieldModelsByType fieldWithAggregationDisabledModels; + final SimpleFieldModel bucketValue; + final SimpleFieldModel bucketMultiValue; IndexBinding(IndexSchemaElement root) { fieldModels = SimpleFieldModelsByType.mapAll( supportedFieldTypes, root, @@ -622,6 +953,11 @@ private static class IndexBinding { fieldWithAggregationDisabledModels = SimpleFieldModelsByType.mapAll( supportedFieldTypes, root, "nonAggregable_", c -> c.aggregable( Aggregable.NO ) ); + bucketValue = SimpleFieldModel.mapper( IntegerFieldTypeDescriptor.INSTANCE, c -> c.aggregable( Aggregable.YES ) ) + .map( root, "bucketValue" ); + bucketMultiValue = + SimpleFieldModel.mapper( IntegerFieldTypeDescriptor.INSTANCE, c -> c.aggregable( Aggregable.YES ) ) + .mapMultiValued( root, "bucketMultiValue" ); } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java index a3b9fdeaa50..152e7a2996c 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java @@ -622,7 +622,91 @@ void maxTermCount_veryLarge(FieldTypeDescriptor fieldType, DataSet data ); } - private SearchQueryOptionsStep matchAllQuery() { + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_explicitDocCount(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + .value( f.countDocuments() ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, (long) dataSet.documentIdPerTerm.get( value ).size() ); + } + }, fieldType + ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_min(FieldTypeDescriptor fieldType, DataSet dataSet) { + assumeTrue( fieldType.supportsMetricAggregation(), + "Since the value is a metric aggregation on the same field, we want to be sure that only those fields that support it are included." ); + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: + .value( f.min().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, fieldType.normalize( value ) ); + } + }, fieldType + ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_max(FieldTypeDescriptor fieldType, DataSet dataSet) { + assumeTrue( fieldType.supportsMetricAggregation(), + "Since the value is a metric aggregation on the same field, we want to be sure that only those fields that support it are included." ); + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: + .value( f.max().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, fieldType.normalize( value ) ); + } + }, fieldType + ) + ); + } + + private SearchQueryOptionsStep matchAllQuery() { return index.createScope().query().where( f -> f.matchAll() ); } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java index db7fd706351..78b29bb4d6a 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java @@ -244,4 +244,8 @@ public String format(F value) { return Objects.toString( value, null ); } + public F normalize(F value) { + return value; + } + } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java index 4f7f5c73ecb..aea17ac062c 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java @@ -171,4 +171,9 @@ public Optional> getIndex public String format(OffsetDateTime value) { return DateTimeFormatter.ISO_OFFSET_DATE_TIME.format( value ); } + + @Override + public OffsetDateTime normalize(OffsetDateTime value) { + return value == null ? null : value.toInstant().atOffset( ZoneOffset.UTC ); + } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java index 5e01be081e6..5841c7db880 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java @@ -134,4 +134,9 @@ public Optional> getIndexNull public String format(OffsetTime value) { return FormatUtils.format( value ); } + + @Override + public OffsetTime normalize(OffsetTime value) { + return value == null ? null : value.withOffsetSameInstant( ZoneOffset.UTC ); + } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java index 2c5f74349c8..ed434d213a8 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java @@ -193,4 +193,9 @@ public Optional> getIndexN public String format(ZonedDateTime value) { return DateTimeFormatter.ISO_ZONED_DATE_TIME.format( value ); } + + @Override + public ZonedDateTime normalize(ZonedDateTime value) { + return value == null ? null : value.withZoneSameInstant( ZoneOffset.UTC ); + } } From 3b120f920d96619335c837a50b79bed7b08fb6d0 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Tue, 8 Jul 2025 18:12:23 +0200 Subject: [PATCH 20/23] HSEARCH-3661 Document range/terms .values(..) --- .../reference/_search-dsl-aggregation.adoc | 65 +++++++++++++--- .../search/aggregation/AggregationDslIT.java | 77 +++++++++++-------- .../search/aggregation/Book.java | 13 ++++ .../dsl/RangeAggregationRangeValueStep.java | 25 +++++- .../dsl/TermsAggregationRangeValueStep.java | 24 ++++++ 5 files changed, 164 insertions(+), 40 deletions(-) diff --git a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc index e6156785e12..3aeebf925cf 100644 --- a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc +++ b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc @@ -220,6 +220,23 @@ When ordering entries by ascending count in a `terms` aggregation, link:{elasticsearchDocUrl}/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-order[hit counts are approximate]. ==== +[[search-dsl-aggregation-terms-value]] +=== Aggregated value + +By default, the aggregated value represents the number of documents that fall into the group of a particular term. +With the `.value(..)` step in aggregation definition, it is now possible to set the aggregated value to something other than the document count. +The `.value(..)` accepts any other aggregation, which will be applied to the documents within the aggregated group. + +.Total price of books per category +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=terms-sum] +---- +<1> Define the path and type of the field whose values should be considered as terms for the aggregation. +<2> Define what the aggregated value should represent, e.g. the sum of all book prices within the genre. +==== + [[search-dsl-aggregation-terms-other]] === Other options @@ -318,6 +335,23 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre See <> for more information. +[[search-dsl-aggregation-range-value]] +=== Aggregated value + +By default, the aggregated value represents the number of documents that fall into particular, defined range . +With the `.value(..)` step in aggregation definition, it is now possible to set the aggregated value to something other than the document count. +The `.value(..)` accepts any other aggregation, which will be applied to the documents within the aggregated group. + +.Total price of books per category +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=range-avg] +---- +<1> Define the path and type of the field whose value ranges for the aggregation. +<2> Define what the aggregated value should represent, e.g. the average rating of all books within the price range. +==== + [[search-dsl-aggregation-range-other]] === Other options @@ -329,14 +363,8 @@ but that can be < Define the target field path to which you want to apply the aggregation function and the expected returned type. ==== +=== Count documents metric aggregation + +The `count documents` aggregation counts the number of documents. +While it is usually discouraged to use this aggregation at the root level, +as the result would be equivalent to the count returned by the search results in `SearchResultTotal`, +this aggregation can still be useful in defining aggregation values in other, more complex aggregations like +<> or <>. + +.Count the number of the science fiction books +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=count-documents] +---- +<1> Apply the document count aggregation. For this function a `Long.class` value is always returned. +==== + === Count values metric aggregation The `count values` aggregation counts the number of non-empty field values. +This aggregation mostly make sense when the aggregated field is multivalued. +For single-valued fields this aggregation would result in the number of documents where the aggregated field is present. .Count the number of the science fiction books with prices ==== @@ -407,7 +454,7 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre The `count distinct values` aggregation counts the number of unique field values. -.Count anytime the price field has a different value among all the science fiction books +.Count the number of all different price value among all the science fiction books ==== [source, JAVA, indent=0, subs="+callouts"] ---- diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index 75c482ff8ca..26a6cd4c373 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -272,23 +272,21 @@ void terms() { void terms_value() { withinSearchSession( searchSession -> { // tag::terms-sum[] - AggregationKey> sumByPriceKey = AggregationKey.of( "sumByPrice" ); + AggregationKey> sumByCategoryKey = AggregationKey.of( "sumByCategory" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.matchAll() ) .aggregation( - sumByPriceKey, f -> f.terms() - .field( "price", Double.class ) // <1> - .value( f.sum().field( "price", Double.class ) ) + sumByCategoryKey, f -> f.terms() + .field( "genre", Genre.class ) // <1> + .value( f.sum().field( "price", Double.class ) ) // <2> ) .fetch( 20 ); - Map sumByPrice = result.aggregation( sumByPriceKey ); + Map sumByPrice = result.aggregation( sumByCategoryKey ); // end::terms-sum[] assertThat( sumByPrice ) .containsExactly( - entry( 7.99, 7.99 ), - entry( 15.99, 15.99 ), - entry( 19.99, 19.99 ), - entry( 24.99, 24.99 ) + entry( Genre.SCIENCE_FICTION, 60.97 ), + entry( Genre.CRIME_FICTION, 7.99 ) ); } ); @@ -339,26 +337,26 @@ void terms_value() { @Test void range_value() { withinSearchSession( searchSession -> { - // tag::range-sum[] - AggregationKey, Double>> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + // tag::range-avg[] + AggregationKey, Double>> avgRatingByPriceKey = AggregationKey.of( "avgRatingByPrice" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.matchAll() ) .aggregation( - countsByPriceKey, f -> f.range() + avgRatingByPriceKey, f -> f.range() .field( "price", Double.class ) // <1> - .range( 0.0, 10.0 ) // <2> + .range( 0.0, 10.0 ) .range( 10.0, 20.0 ) - .range( 20.0, null ) // <3> - .value( f.sum().field( "price", Double.class ) ) + .range( 20.0, null ) + .value( f.avg().field( "ratings", Double.class, ValueModel.RAW ) ) // <2> ) .fetch( 20 ); - Map, Double> countsByPrice = result.aggregation( countsByPriceKey ); - // end::range-sum[] + Map, Double> countsByPrice = result.aggregation( avgRatingByPriceKey ); + // end::range-avg[] assertThat( countsByPrice ) .containsExactly( - entry( Range.canonical( 0.0, 10.0 ), 7.99 ), - entry( Range.canonical( 10.0, 20.0 ), 35.98 ), - entry( Range.canonical( 20.0, null ), 24.99 ) + entry( Range.canonical( 0.0, 10.0 ), 4.0 ), + entry( Range.canonical( 10.0, 20.0 ), 3.6 ), + entry( Range.canonical( 20.0, null ), 3.2 ) ); } ); @@ -584,8 +582,8 @@ void sum() { .aggregation( sumPricesKey, f -> f.sum().field( "price", Double.class ) ) // <1> .fetch( 20 ); Double sumPrices = result.aggregation( sumPricesKey ); - assertThat( sumPrices ).isEqualTo( 60.97 ); // end::sums[] + assertThat( sumPrices ).isEqualTo( 60.97 ); } ); } @@ -599,8 +597,8 @@ void min() { .aggregation( oldestReleaseKey, f -> f.min().field( "releaseDate", Date.class ) ) // <1> .fetch( 20 ); Date oldestRelease = result.aggregation( oldestReleaseKey ); - assertThat( oldestRelease ).isEqualTo( Date.valueOf( "1950-12-02" ) ); // end::min[] + assertThat( oldestRelease ).isEqualTo( Date.valueOf( "1950-12-02" ) ); } ); } @@ -614,28 +612,43 @@ void max() { .aggregation( mostRecentReleaseKey, f -> f.max().field( "releaseDate", Date.class ) ) // <1> .fetch( 20 ); Date mostRecentRelease = result.aggregation( mostRecentReleaseKey ); - // end::max[] + assertThat( mostRecentRelease ).isEqualTo( Date.valueOf( "1983-01-01" ) ); } ); } @Test - void count() { + void countDocuments() { withinSearchSession( searchSession -> { - // tag::count[] - AggregationKey countPricesKey = AggregationKey.of( "countPrices" ); + // tag::count-documents[] + AggregationKey countBooksKey = AggregationKey.of( "countBooks" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) - .aggregation( countPricesKey, f -> f.countValues().field( "price" ) ) // <1> + .aggregation( countBooksKey, f -> f.countDocuments() ) // <1> .fetch( 20 ); - Long countPrices = result.aggregation( countPricesKey ); + Long countPrices = result.aggregation( countBooksKey ); + // end::count-documents[] assertThat( countPrices ).isEqualTo( 3L ); + } ); + } + + @Test + void countValues() { + withinSearchSession( searchSession -> { + // tag::count[] + AggregationKey countRatingsKey = AggregationKey.of( "countRatings" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) + .aggregation( countRatingsKey, f -> f.countValues().field( "ratings" ) ) // <1> + .fetch( 20 ); + Long countPrices = result.aggregation( countRatingsKey ); // end::count[] + assertThat( countPrices ).isEqualTo( 15L ); } ); } @Test - void countDistinct() { + void countDistinctValues() { withinSearchSession( searchSession -> { // tag::count-distinct[] AggregationKey countDistinctPricesKey = AggregationKey.of( "countDistinctPrices" ); @@ -644,8 +657,8 @@ void countDistinct() { .aggregation( countDistinctPricesKey, f -> f.countDistinctValues().field( "price" ) ) // <1> .fetch( 20 ); Long countDistinctPrices = result.aggregation( countDistinctPricesKey ); - assertThat( countDistinctPrices ).isEqualTo( 3L ); // end::count-distinct[] + assertThat( countDistinctPrices ).isEqualTo( 3L ); } ); } @@ -679,6 +692,7 @@ private void initData() { book1.setPrice( 24.99 ); book1.setGenre( Genre.SCIENCE_FICTION ); book1.setReleaseDate( Date.valueOf( "1950-12-02" ) ); + book1.setRatings( List.of( 5, 5, 4, 2, 0 ) ); addEdition( book1, "Mass Market Paperback, 1st Edition", 9.99 ); addEdition( book1, "Kindle", 9.99 ); @@ -688,6 +702,7 @@ private void initData() { book2.setPrice( 19.99 ); book2.setGenre( Genre.SCIENCE_FICTION ); book2.setReleaseDate( Date.valueOf( "1953-10-01" ) ); + book2.setRatings( List.of( 5, 5, 3, 3, 5 ) ); addEdition( book2, "Mass Market Paperback, 12th Edition", 4.99 ); addEdition( book2, "Kindle", 19.99 ); @@ -697,6 +712,7 @@ private void initData() { book3.setPrice( 15.99 ); book3.setGenre( Genre.SCIENCE_FICTION ); book3.setReleaseDate( Date.valueOf( "1983-01-01" ) ); + book3.setRatings( List.of( 3, 3, 3, 3, 3 ) ); addEdition( book3, "Mass Market Paperback, 59th Edition", 3.99 ); addEdition( book3, "Kindle", 5.99 ); @@ -706,6 +722,7 @@ private void initData() { book4.setPrice( 7.99 ); book4.setGenre( Genre.CRIME_FICTION ); book4.setReleaseDate( Date.valueOf( "2008-02-05" ) ); + book4.setRatings( List.of( 4, 4, 4, 4, 4 ) ); addEdition( book4, "Mass Market Paperback, 2nd Edition", 10.99 ); addEdition( book4, "Kindle", 12.99 ); diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java index 6c58737d78c..458b5e41eee 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java @@ -9,6 +9,7 @@ import java.util.List; import jakarta.persistence.CascadeType; +import jakarta.persistence.ElementCollection; import jakarta.persistence.Entity; import jakarta.persistence.Id; import jakarta.persistence.OneToMany; @@ -42,6 +43,10 @@ public class Book { @GenericField(aggregable = Aggregable.YES) private Date releaseDate; + @GenericField(aggregable = Aggregable.YES) + @ElementCollection + private List ratings; + @OneToMany(mappedBy = "book", cascade = CascadeType.ALL) @OrderColumn @IndexedEmbedded(structure = ObjectStructure.NESTED) @@ -97,4 +102,12 @@ public List getEditions() { public void setEditions(List editions) { this.editions = editions; } + + public List getRatings() { + return ratings; + } + + public void setRatings(List ratings) { + this.ratings = ratings; + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java index 963796503f4..8e8c29f61fd 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java @@ -9,6 +9,7 @@ import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; +import org.hibernate.search.util.common.annotation.Incubating; import org.hibernate.search.util.common.data.Range; /** @@ -18,13 +19,35 @@ * @param The type of factory used to create predicates in {@link RangeAggregationOptionsStep#filter(Function)}. * @param The type of the targeted field. */ +@Incubating public interface RangeAggregationRangeValueStep< SR, PDF extends TypedSearchPredicateFactory, F> { - + /** + * Specify which aggregation to apply to the documents within the range. + *

+ * This allows to "group" the documents by "ranges" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents within each range. + * @return The next step in range aggregation definition. + * @param The type of the aggregated results within a range. + */ + @Incubating RangeAggregationOptionsStep, T>> value(SearchAggregation aggregation); + /** + * Specify which aggregation to apply to the documents within the range. + *

+ * This allows to "group" the documents by "ranges" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents within each range. + * @return The next step in range aggregation definition. + * @param The type of the aggregated results within a range. + */ + @Incubating default RangeAggregationOptionsStep, T>> value(AggregationFinalStep aggregation) { return value( aggregation.toAggregation() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java index 316cad75133..63d71b63602 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java @@ -9,6 +9,7 @@ import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; +import org.hibernate.search.util.common.annotation.Incubating; /** * The step in a "terms" aggregation definition where the aggregation value for the term can be set. @@ -17,6 +18,7 @@ * @param The type of factory used to create predicates in {@link TermsAggregationOptionsStep#filter(Function)}. * @param The type of the targeted field. */ +@Incubating public interface TermsAggregationRangeValueStep< SR, S extends TermsAggregationOptionsStep, @@ -24,8 +26,30 @@ public interface TermsAggregationRangeValueStep< F, A> extends TermsAggregationOptionsStep { + /** + * Specify which aggregation to apply to the documents with same terms. + *

+ * This allows to "group" the documents by "terms" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents for each term. + * @return The next step in terms aggregation definition. + * @param The type of the aggregated results for a term. + */ + @Incubating TermsAggregationOptionsStep> value(SearchAggregation aggregation); + /** + * Specify which aggregation to apply to the documents with same terms. + *

+ * This allows to "group" the documents by "terms" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents for each term. + * @return The next step in terms aggregation definition. + * @param The type of the aggregated results for a term. + */ + @Incubating default TermsAggregationOptionsStep> value(AggregationFinalStep aggregation) { return value( aggregation.toAggregation() ); } From 572e27d49d5296c2e411df5485120101ab300635 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Tue, 8 Jul 2025 18:18:46 +0200 Subject: [PATCH 21/23] HSEARCH-3666 "Enable" "non-default" sorts in Lucene's terms aggregation tests --- .../reference/_search-dsl-aggregation.adoc | 8 -------- .../util/LuceneTckBackendFeatures.java | 6 ------ .../TermsAggregationSpecificsIT.java | 20 ------------------- .../testsupport/util/TckBackendFeatures.java | 4 ---- 4 files changed, 38 deletions(-) diff --git a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc index 3aeebf925cf..ad9e9aafff4 100644 --- a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc +++ b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc @@ -174,14 +174,6 @@ i.e. the terms with the most matching documents appear first. Several other orders are available. -[WARNING] -==== -With the Lucene backend, due to limitations of the current implementation, -using any order other than the default one (by descending count) -may lead to incorrect results. -See https://hibernate.atlassian.net/browse/HSEARCH-3666[HSEARCH-3666] for more information. -==== - You can order entries by ascending term value: // Search 5 anchors backward compatibility diff --git a/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java b/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java index fb41d9eb9da..69ebd14a0f8 100644 --- a/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java +++ b/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java @@ -46,12 +46,6 @@ class LuceneTckBackendFeatures extends TckBackendFeatures { - @Override - public boolean nonDefaultOrderInTermsAggregations() { - // TODO HSEARCH-3666 Lucene terms aggregations (discrete facets) may return wrong results for any sort other than the default one - return false; - } - @Override public boolean projectionPreservesNulls() { return false; diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java index 152e7a2996c..ade12db549d 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java @@ -33,7 +33,6 @@ import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModelsByType; -import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TckConfiguration; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.ValueWrapper; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.extension.SearchSetupHelper; import org.hibernate.search.util.impl.integrationtest.mapper.stub.BulkIndexer; @@ -230,8 +229,6 @@ void orderByCountDescending(FieldTypeDescriptor fieldType, DataSet data @MethodSource("params") @PortedFromSearch5(original = "org.hibernate.search.test.query.facet.SimpleFacetingTest.testCountSortOrderAsc") void orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -258,8 +255,6 @@ void orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataS @ParameterizedTest(name = "{0}") @MethodSource("params") void orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -287,8 +282,6 @@ void orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataS @MethodSource("params") @PortedFromSearch5(original = "org.hibernate.search.test.query.facet.SimpleFacetingTest.testAlphabeticalSortOrder") void orderByTermAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -412,8 +405,6 @@ void minDocumentCount_zero_noMatch(FieldTypeDescriptor fieldType, DataSet< @ParameterizedTest(name = "{0}") @MethodSource("params") void minDocumentCount_zero_noMatch_orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -491,8 +482,6 @@ void maxTermCount_positive(FieldTypeDescriptor fieldType, DataSet dataS @ParameterizedTest(name = "{0}") @MethodSource("params") void maxTermCount_positive_orderByTermAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -521,8 +510,6 @@ void maxTermCount_positive_orderByTermAscending(FieldTypeDescriptor fieldT @ParameterizedTest(name = "{0}") @MethodSource("params") void maxTermCount_positive_orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -710,13 +697,6 @@ void terms_max(FieldTypeDescriptor fieldType, DataSet dataSet) { return index.createScope().query().where( f -> f.matchAll() ); } - private void assumeNonDefaultOrdersSupported() { - assumeTrue( - TckConfiguration.get().getBackendFeatures().nonDefaultOrderInTermsAggregations(), - "Non-default orders are not supported for terms aggregations with this backend" - ); - } - @SuppressWarnings("unchecked") private Consumer> containsExactly(Consumer> expectationBuilder, FieldTypeDescriptor fieldType) { diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java index df62b670195..a563381bf0c 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java @@ -32,10 +32,6 @@ public boolean nonCanonicalRangeInAggregations() { return true; } - public boolean nonDefaultOrderInTermsAggregations() { - return true; - } - public boolean projectionPreservesNulls() { return true; } From bdad81dcf8888ec7ea3497c9e7d803c339808934 Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Thu, 10 Jul 2025 19:17:22 +0200 Subject: [PATCH 22/23] HSEARCH-3661 Fix the interface names --- .../aggregation/dsl/RangeAggregationRangeMoreStep.java | 2 +- ...RangeValueStep.java => RangeAggregationValueStep.java} | 2 +- .../search/aggregation/dsl/TermsAggregationFieldStep.java | 8 ++++---- ...RangeValueStep.java => TermsAggregationValueStep.java} | 2 +- .../dsl/impl/TermsAggregationFieldStepImpl.java | 4 ++-- .../dsl/impl/TermsAggregationOptionsStepImpl.java | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{RangeAggregationRangeValueStep.java => RangeAggregationValueStep.java} (97%) rename engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/{TermsAggregationRangeValueStep.java => TermsAggregationValueStep.java} (97%) diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java index 7d6d561293a..2094836036c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java @@ -31,5 +31,5 @@ public interface RangeAggregationRangeMoreStep< A> extends RangeAggregationOptionsStep, A>>, RangeAggregationRangeStep, - RangeAggregationRangeValueStep { + RangeAggregationValueStep { } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java similarity index 97% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java index 8e8c29f61fd..6b7bce23f0c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeValueStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java @@ -20,7 +20,7 @@ * @param The type of the targeted field. */ @Incubating -public interface RangeAggregationRangeValueStep< +public interface RangeAggregationValueStep< SR, PDF extends TypedSearchPredicateFactory, F> { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java index 249f885b62c..f61a80db2d4 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java @@ -27,7 +27,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationRangeValueStep> field(String fieldPath, Class type) { + default TermsAggregationValueStep> field(String fieldPath, Class type) { return field( fieldPath, type, ValueModel.MAPPING ); } @@ -43,7 +43,7 @@ public interface TermsAggregationFieldStep TermsAggregationRangeValueStep> field(String fieldPath, Class type, + default TermsAggregationValueStep> field(String fieldPath, Class type, org.hibernate.search.engine.search.common.ValueConvert convert) { return field( fieldPath, type, org.hibernate.search.engine.search.common.ValueConvert.toValueModel( convert ) ); @@ -59,7 +59,7 @@ public interface TermsAggregationFieldStep TermsAggregationRangeValueStep> field(String fieldPath, Class type, + TermsAggregationValueStep> field(String fieldPath, Class type, ValueModel valueModel); /** @@ -69,7 +69,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationRangeValueStep> field( + default TermsAggregationValueStep> field( TermsAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath(), fieldReference.aggregationType(), fieldReference.valueModel() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java similarity index 97% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java index 63d71b63602..0e4420e8559 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationRangeValueStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java @@ -19,7 +19,7 @@ * @param The type of the targeted field. */ @Incubating -public interface TermsAggregationRangeValueStep< +public interface TermsAggregationValueStep< SR, S extends TermsAggregationOptionsStep, PDF extends TypedSearchPredicateFactory, diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java index d3f5fd9f342..8b37ad8bde2 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java @@ -7,7 +7,7 @@ import java.util.Map; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationRangeValueStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; @@ -24,7 +24,7 @@ public TermsAggregationFieldStepImpl(SearchAggregationDslContext TermsAggregationRangeValueStep> field(String fieldPath, Class type, + public TermsAggregationValueStep> field(String fieldPath, Class type, ValueModel valueModel) { Contracts.assertNotNull( fieldPath, "fieldPath" ); Contracts.assertNotNull( type, "type" ); diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java index 634417581d7..3c7fef57e17 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java @@ -9,7 +9,7 @@ import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationOptionsStep; -import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationRangeValueStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; @@ -18,7 +18,7 @@ import org.hibernate.search.util.common.impl.Contracts; class TermsAggregationOptionsStepImpl, F, V> - implements TermsAggregationRangeValueStep, PDF, F, Map> { + implements TermsAggregationValueStep, PDF, F, Map> { private final TermsAggregationBuilder builder; private final SearchAggregationDslContext dslContext; From 73371d935a3b7ba41c93d832a358e993b45fd08d Mon Sep 17 00:00:00 2001 From: marko-bekhta Date: Thu, 10 Jul 2025 18:24:41 +0200 Subject: [PATCH 23/23] HSEARCH-3661 Make eclipse compiler happier with generics in more complex aggregations --- .../RangeAggregationSpecificsIT.java | 17 ++++++++++------- .../TermsAggregationSpecificsIT.java | 5 +++-- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java index 92ac8fe41e9..34b45ddaf9a 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java @@ -26,6 +26,7 @@ import org.hibernate.search.engine.backend.types.Aggregable; import org.hibernate.search.engine.backend.types.Searchable; import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.aggregation.dsl.SearchAggregationFactory; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.AggregationDescriptor; @@ -605,7 +606,8 @@ void rangesBucket_min(FieldTypeDescriptor fieldType, DataSet dataSet) { Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), Range.canonical( dataSet.ascendingValues.get( 5 ), null ) - ) ).value( f.min().field( fieldPath, fieldType.getJavaType() ) ) + ) ) + .value( (AggregationFinalStep) f.min().field( fieldPath, fieldType.getJavaType() ) ) ) .routing( dataSet.name ) .toQuery() @@ -644,7 +646,8 @@ void rangesBucket_max(FieldTypeDescriptor fieldType, DataSet dataSet) { Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), Range.canonical( dataSet.ascendingValues.get( 5 ), null ) - ) ).value( f.max().field( fieldPath, fieldType.getJavaType() ) ) + ) ) + .value( (AggregationFinalStep) f.max().field( fieldPath, fieldType.getJavaType() ) ) ) .routing( dataSet.name ) .toQuery() @@ -764,8 +767,8 @@ void rangesBucket_terms_countImplicit(FieldTypeDescriptor fieldType, DataS dataSet.ascendingValues.get( 5 ) ), Range.canonical( dataSet.ascendingValues.get( 5 ), null ) ) ) - .value( f.terms().field( index.binding().bucketMultiValue.relativeFieldName, - Integer.class ) ) + .value( (AggregationFinalStep>) f.terms() + .field( index.binding().bucketMultiValue.relativeFieldName, Integer.class ) ) ) .routing( dataSet.name ) .toQuery() @@ -805,10 +808,10 @@ void rangesBucket_terms_sum(FieldTypeDescriptor fieldType, DataSet data dataSet.ascendingValues.get( 5 ) ), Range.canonical( dataSet.ascendingValues.get( 5 ), null ) ) ) - .value( f.terms() + .value( (AggregationFinalStep>) f.terms() .field( index.binding().bucketMultiValue.relativeFieldName, Integer.class ) - .value( f.sum().field( index.binding().bucketMultiValue.relativeFieldName, - Integer.class ) ) ) + .value( (AggregationFinalStep) f.sum().field( + index.binding().bucketMultiValue.relativeFieldName, Integer.class ) ) ) ) .routing( dataSet.name ) .toQuery() diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java index ade12db549d..bcf966ffa7e 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java @@ -27,6 +27,7 @@ import org.hibernate.search.engine.backend.types.Aggregable; import org.hibernate.search.engine.backend.types.Searchable; import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.AggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.TermsAggregationDescriptor; @@ -648,7 +649,7 @@ void terms_min(FieldTypeDescriptor fieldType, DataSet dataSet) { .aggregation( aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: - .value( f.min().field( fieldPath, fieldType.getJavaType() ) ) + .value( (AggregationFinalStep) f.min().field( fieldPath, fieldType.getJavaType() ) ) ) .routing( dataSet.name ) ) .aggregation( @@ -677,7 +678,7 @@ void terms_max(FieldTypeDescriptor fieldType, DataSet dataSet) { .aggregation( aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: - .value( f.max().field( fieldPath, fieldType.getJavaType() ) ) + .value( (AggregationFinalStep) f.max().field( fieldPath, fieldType.getJavaType() ) ) ) .routing( dataSet.name ) ) .aggregation(