diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java index b5f0f8b0250..20a55aeeffc 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/document/model/dsl/impl/ElasticsearchIndexRootBuilder.java @@ -26,6 +26,7 @@ import org.hibernate.search.backend.elasticsearch.lowlevel.index.mapping.impl.RoutingType; import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.IndexSettings; import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.PropertyMappingIndexSettingsContributor; +import org.hibernate.search.backend.elasticsearch.search.aggregation.impl.ElasticsearchCountDocumentAggregation; import org.hibernate.search.backend.elasticsearch.types.dsl.ElasticsearchIndexFieldTypeFactory; import org.hibernate.search.backend.elasticsearch.types.dsl.provider.impl.ElasticsearchIndexFieldTypeFactoryProvider; import org.hibernate.search.backend.elasticsearch.types.impl.ElasticsearchIndexCompositeNodeType; @@ -45,6 +46,7 @@ import org.hibernate.search.engine.common.tree.spi.TreeNodeInclusion; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class ElasticsearchIndexRootBuilder extends AbstractElasticsearchIndexCompositeNodeBuilder @@ -84,6 +86,8 @@ public ElasticsearchIndexRootBuilder(ElasticsearchIndexFieldTypeFactoryProvider this.customIndexMapping = customIndexMapping; this.defaultDynamicType = DynamicType.create( dynamicMapping ); + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, + ElasticsearchCountDocumentAggregation.factory( false ) ); this.addDefaultImplicitFields(); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java index 3331b0838ac..3808d343d20 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchBucketAggregation.java @@ -8,7 +8,6 @@ import java.util.Map; import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor; -import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog; import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope; import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext; import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate; @@ -29,21 +28,20 @@ public abstract class AbstractElasticsearchBucketAggregation private static final String ROOT_DOC_COUNT_NAME = "root_doc_count"; private static final JsonAccessor REQUEST_AGGREGATIONS_ROOT_DOC_COUNT_ACCESSOR = JsonAccessor.root().property( "aggregations" ).property( ROOT_DOC_COUNT_NAME ).asObject(); - private static final JsonAccessor RESPONSE_DOC_COUNT_ACCESSOR = - JsonAccessor.root().property( "doc_count" ).asLong(); - private static final JsonAccessor RESPONSE_ROOT_DOC_COUNT_ACCESSOR = - JsonAccessor.root().property( ROOT_DOC_COUNT_NAME ).property( "doc_count" ).asLong(); + + protected static final String INNER_EXTRACTOR_KEY = "innerExtractorKey"; + protected static final String INNER_EXTRACTOR = "innerExtractor"; AbstractElasticsearchBucketAggregation(AbstractBuilder builder) { super( builder ); } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); - doRequest( outerObject, innerObject ); + doRequest( outerObject, innerObject, context ); if ( isNested() ) { JsonObject rootDocCountSubAggregationOuterObject = new JsonObject(); @@ -56,20 +54,8 @@ protected final JsonObject doRequest(AggregationRequestContext context) { return outerObject; } - protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject); - - protected final long getBucketDocCount(JsonObject bucket) { - if ( isNested() ) { - // We must return the number of root documents, - // not the number of leaf documents that Elasticsearch returns by default. - return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( bucket ) - .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); - } - else { - return RESPONSE_DOC_COUNT_ACCESSOR.get( bucket ) - .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); - } - } + protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject, + AggregationRequestBuildingContextContext context); protected abstract class AbstractBucketExtractor extends AbstractExtractor> { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java index 54de0a2d3c6..94ce1f2aeda 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AbstractElasticsearchNestableAggregation.java @@ -50,11 +50,12 @@ public abstract class AbstractElasticsearchNestableAggregation extends Abstra @Override public final Extractor request(AggregationRequestContext context, AggregationKey key, JsonObject jsonAggregations) { - jsonAggregations.add( key.name(), request( context ) ); - return extractor( context ); + AggregationRequestBuildingContextContext buildingContext = new AggregationRequestBuildingContextContext( context ); + jsonAggregations.add( key.name(), request( buildingContext ) ); + return extractor( buildingContext ); } - private JsonObject request(AggregationRequestContext context) { + private JsonObject request(AggregationRequestBuildingContextContext context) { JsonObject result = doRequest( context ); if ( nestedPathHierarchy.isEmpty() ) { @@ -90,9 +91,9 @@ private JsonObject request(AggregationRequestContext context) { return result; } - protected abstract JsonObject doRequest(AggregationRequestContext context); + protected abstract JsonObject doRequest(AggregationRequestBuildingContextContext context); - protected abstract Extractor extractor(AggregationRequestContext context); + protected abstract Extractor extractor(AggregationRequestBuildingContextContext context); protected abstract static class AbstractExtractor implements Extractor { diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java new file mode 100644 index 00000000000..35999aadeaa --- /dev/null +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/AggregationRequestBuildingContextContext.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; + +import java.util.HashMap; +import java.util.Map; +import java.util.Objects; + +import org.hibernate.search.backend.elasticsearch.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.util.common.annotation.Incubating; + +/** + * Sometimes we need to pass something we created while building up the json in one of the "doRequest" methods + * in the aggregation build up to the "later" steps e.g. to when we create the extractor. + */ +@Incubating +public final class AggregationRequestBuildingContextContext implements AggregationRequestContext { + private final AggregationRequestContext aggregationRequestContext; + private final Map, Object> buildingContext = new HashMap<>(); + + public AggregationRequestBuildingContextContext(AggregationRequestContext aggregationRequestContext) { + this.aggregationRequestContext = aggregationRequestContext; + } + + public T get(Key key) { + Object value = buildingContext.get( key ); + return key.cast( value ); + } + + public void add(Key key, Object value) { + buildingContext.put( key, value ); + } + + public AggregationRequestContext rootAggregationRequestContext() { + return aggregationRequestContext; + } + + @Override + public PredicateRequestContext getRootPredicateContext() { + return aggregationRequestContext.getRootPredicateContext(); + } + + public static Key buildingContextKey(String name) { + return new Key<>( name ); + } + + public static class Key { + + private final String name; + + private Key(String name) { + this.name = name; + } + + @SuppressWarnings("unchecked") + private V cast(Object value) { + return (V) value; + } + + @Override + public boolean equals(Object o) { + if ( o == null || getClass() != o.getClass() ) { + return false; + } + Key key = (Key) o; + return Objects.equals( name, key.name ); + } + + @Override + public int hashCode() { + return Objects.hashCode( name ); + } + } +} diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java new file mode 100644 index 00000000000..70bb8406883 --- /dev/null +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchCountDocumentAggregation.java @@ -0,0 +1,121 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; + +import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor; +import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog; +import org.hibernate.search.backend.elasticsearch.logging.impl.QueryLog; +import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexCompositeNodeContext; +import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope; +import org.hibernate.search.backend.elasticsearch.search.query.impl.ElasticsearchSearchQueryExtractContext; +import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +import com.google.gson.JsonObject; + +public class ElasticsearchCountDocumentAggregation extends AbstractElasticsearchAggregation { + + private static final JsonAccessor TOTAL_HITS_VALUE_PROPERTY_ACCESSOR = + JsonAccessor.root().property( "hits" ) + .property( "total" ) + .property( "value" ).asLong(); + + private static final JsonAccessor RESPONSE_DOC_COUNT_ACCESSOR = + JsonAccessor.root().property( "doc_count" ).asLong(); + private static final JsonAccessor RESPONSE_ROOT_DOC_COUNT_ACCESSOR = + JsonAccessor.root().property( "root_doc_count" ).property( "doc_count" ).asLong(); + + public static SearchQueryElementFactory, + ElasticsearchSearchIndexCompositeNodeContext> factory(boolean isNested) { + return new ElasticsearchCountDocumentAggregation.Factory( isNested ); + } + + private final boolean isNested; + + private ElasticsearchCountDocumentAggregation(Builder builder) { + super( builder ); + this.isNested = builder.isNested; + } + + @Override + public Extractor request(AggregationRequestContext context, AggregationKey key, JsonObject jsonAggregations) { + return new CountDocumentsExtractor( isNested ); + } + + private record CountDocumentsExtractor(boolean isNested) implements Extractor { + + @Override + public Long extract(JsonObject aggregationResult, AggregationExtractContext context) { + if ( aggregationResult != null ) { + if ( isNested ) { + // We must return the number of root documents, + // not the number of leaf documents that Elasticsearch returns by default. + return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( aggregationResult ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + else { + return RESPONSE_DOC_COUNT_ACCESSOR.get( aggregationResult ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + } + else if ( context instanceof ElasticsearchSearchQueryExtractContext c ) { + return TOTAL_HITS_VALUE_PROPERTY_ACCESSOR.get( c.getResponseBody() ) + .orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData ); + } + throw ElasticsearchClientLog.INSTANCE.elasticsearchResponseMissingData(); + } + } + + private static class Factory + implements + SearchQueryElementFactory, + ElasticsearchSearchIndexCompositeNodeContext> { + private final boolean isNested; + + public Factory(boolean isNested) { + this.isNested = isNested; + } + + @Override + public CountDocumentAggregationBuilder.TypeSelector create(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexCompositeNodeContext node) { + return new ElasticsearchCountDocumentAggregation.TypeSelector( scope, isNested ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + private record TypeSelector(ElasticsearchSearchIndexScope scope, boolean isNested) + implements CountDocumentAggregationBuilder.TypeSelector { + + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope, isNested ); + } + } + + private static class Builder extends AbstractBuilder + implements CountDocumentAggregationBuilder { + private final boolean isNested; + + private Builder(ElasticsearchSearchIndexScope scope, boolean isNested) { + super( scope ); + this.isNested = isNested; + } + + @Override + public ElasticsearchCountDocumentAggregation build() { + return new ElasticsearchCountDocumentAggregation( this ); + } + } +} diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java index 108ca7d15e8..f424efdbcde 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricFieldAggregation.java @@ -74,7 +74,7 @@ private ElasticsearchMetricFieldAggregation(Builder builder) { } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); @@ -84,7 +84,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { } @Override - protected Extractor extractor(AggregationRequestContext context) { + protected Extractor extractor(AggregationRequestBuildingContextContext context) { return metricFieldExtractorCreator.extractor( filter ); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java index a5ab9fcc4c3..a0801179b70 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchMetricLongAggregation.java @@ -47,7 +47,7 @@ private ElasticsearchMetricLongAggregation(Builder builder) { } @Override - protected final JsonObject doRequest(AggregationRequestContext context) { + protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) { JsonObject outerObject = new JsonObject(); JsonObject innerObject = new JsonObject(); @@ -57,7 +57,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) { } @Override - protected Extractor extractor(AggregationRequestContext context) { + protected Extractor extractor(AggregationRequestBuildingContextContext context) { return new MetricLongExtractor( nestedPathHierarchy, filter ); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java index 2ae01c81718..e8d7eaaffbf 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchRangeAggregation.java @@ -4,6 +4,8 @@ */ package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; +import static org.hibernate.search.backend.elasticsearch.search.aggregation.impl.AggregationRequestBuildingContextContext.buildingContextKey; + import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -16,6 +18,8 @@ import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext; import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate; import org.hibernate.search.backend.elasticsearch.types.codec.impl.ElasticsearchFieldCodec; +import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; @@ -29,34 +33,49 @@ /** * @param The type of field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class ElasticsearchRangeAggregation - extends AbstractElasticsearchBucketAggregation, Long> { +public class ElasticsearchRangeAggregation + extends AbstractElasticsearchBucketAggregation, V> { private final String absoluteFieldPath; private final List> rangesInOrder; private final JsonArray rangesJson; - private ElasticsearchRangeAggregation(Builder builder) { + private final ElasticsearchSearchAggregation aggregation; + + private ElasticsearchRangeAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); this.rangesInOrder = builder.rangesInOrder; this.rangesJson = builder.rangesJson; + this.aggregation = builder.aggregation; } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestBuildingContextContext context) { outerObject.add( "range", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); innerObject.addProperty( "keyed", true ); innerObject.add( "ranges", rangesJson ); + + JsonObject subOuterObject = new JsonObject(); + AggregationKey innerExtractorKey = AggregationKey.of( "agg" ); + context.add( buildingContextKey( INNER_EXTRACTOR_KEY ), innerExtractorKey ); + context.add( buildingContextKey( INNER_EXTRACTOR ), aggregation.request( context, innerExtractorKey, subOuterObject ) ); + + if ( !subOuterObject.isEmpty() ) { + outerObject.add( "aggs", subOuterObject ); + } } @Override - protected Extractor, Long>> extractor(AggregationRequestContext context) { - return new RangeBucketExtractor( nestedPathHierarchy, filter, rangesInOrder ); + protected Extractor, V>> extractor(AggregationRequestBuildingContextContext context) { + AggregationKey innerExtractorKey = context.get( buildingContextKey( INNER_EXTRACTOR_KEY ) ); + Extractor innerExtractor = context.get( buildingContextKey( INNER_EXTRACTOR ) ); + return new RangeBucketExtractor( nestedPathHierarchy, filter, rangesInOrder, innerExtractorKey, innerExtractor ); } public static class Factory @@ -83,47 +102,74 @@ private TypeSelector(ElasticsearchSearchIndexScope scope, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( scope, field, field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( scope, field, + field.encodingContext().encoder( scope, field, expectedType, valueModel ) ); } } - protected class RangeBucketExtractor extends AbstractBucketExtractor, Long> { + protected class RangeBucketExtractor extends AbstractBucketExtractor, V> { private final List> rangesInOrder; + private final Extractor innerExtractor; + private final AggregationKey innerExtractorKey; protected RangeBucketExtractor(List nestedPathHierarchy, ElasticsearchSearchPredicate filter, - List> rangesInOrder) { + List> rangesInOrder, AggregationKey innerExtractorKey, Extractor innerExtractor) { super( nestedPathHierarchy, filter ); this.rangesInOrder = rangesInOrder; + this.innerExtractorKey = innerExtractorKey; + this.innerExtractor = innerExtractor; } @Override - protected Map, Long> doExtract(AggregationExtractContext context, JsonElement buckets) { + protected Map, V> doExtract(AggregationExtractContext context, JsonElement buckets) { JsonObject bucketMap = buckets.getAsJsonObject(); - Map, Long> result = CollectionHelper.newLinkedHashMap( rangesInOrder.size() ); + Map, V> result = CollectionHelper.newLinkedHashMap( rangesInOrder.size() ); for ( int i = 0; i < rangesInOrder.size(); i++ ) { JsonObject bucket = bucketMap.get( String.valueOf( i ) ).getAsJsonObject(); Range range = rangesInOrder.get( i ); - long documentCount = getBucketDocCount( bucket ); - result.put( range, documentCount ); + if ( bucket.has( innerExtractorKey.name() ) ) { + bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); + } + result.put( range, innerExtractor.extract( bucket, context ) ); } return result; } } - private static class Builder extends AbstractBuilder, Long> - implements RangeAggregationBuilder { + private static class CountBuilder extends Builder { - private final Function encoder; + protected CountBuilder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + Function encoder) { + super( scope, field, encoder, new ArrayList<>(), new JsonArray(), + ElasticsearchSearchAggregation.from( scope, + ElasticsearchCountDocumentAggregation.factory( !field.nestedPathHierarchy().isEmpty() ) + .create( scope, null ).type().build() ) ); + } + } - private final List> rangesInOrder = new ArrayList<>(); - private final JsonArray rangesJson = new JsonArray(); + private static class Builder extends AbstractBuilder, T> + implements RangeAggregationBuilder { - private Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, - Function encoder) { + private final Function encoder; + + private final List> rangesInOrder; + private final JsonArray rangesJson; + private final ElasticsearchSearchAggregation aggregation; + + protected Builder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + Function encoder, + List> rangesInOrder, + JsonArray rangesJson, + ElasticsearchSearchAggregation aggregation) { super( scope, field ); this.encoder = encoder; + this.rangesInOrder = rangesInOrder; + this.rangesJson = rangesJson; + this.aggregation = aggregation; } @Override @@ -151,9 +197,14 @@ public void range(Range range) { } @Override - public ElasticsearchRangeAggregation build() { - return new ElasticsearchRangeAggregation<>( this ); + public Builder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, encoder, rangesInOrder, rangesJson, + ElasticsearchSearchAggregation.from( scope, aggregation ) ); } + @Override + public ElasticsearchRangeAggregation build() { + return new ElasticsearchRangeAggregation<>( this ); + } } } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java index eb19184b5fa..cb7d8c416a8 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/aggregation/impl/ElasticsearchTermsAggregation.java @@ -4,6 +4,8 @@ */ package org.hibernate.search.backend.elasticsearch.search.aggregation.impl; +import static org.hibernate.search.backend.elasticsearch.search.aggregation.impl.AggregationRequestBuildingContextContext.buildingContextKey; + import java.util.List; import java.util.Map; import java.util.function.BiFunction; @@ -15,6 +17,8 @@ import org.hibernate.search.backend.elasticsearch.types.codec.impl.ElasticsearchFieldCodec; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.impl.CollectionHelper; @@ -28,19 +32,21 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class ElasticsearchTermsAggregation - extends AbstractElasticsearchBucketAggregation { +public class ElasticsearchTermsAggregation + extends AbstractElasticsearchBucketAggregation { private final String absoluteFieldPath; private final ProjectionConverter fromFieldValueConverter; private final BiFunction decodeFunction; + private final ElasticsearchSearchAggregation aggregation; + private final JsonObject order; private final int size; private final int minDocCount; - private ElasticsearchTermsAggregation(Builder builder) { + private ElasticsearchTermsAggregation(Builder builder) { super( builder ); this.absoluteFieldPath = builder.field.absolutePath(); this.fromFieldValueConverter = builder.fromFieldValueConverter; @@ -48,10 +54,11 @@ private ElasticsearchTermsAggregation(Builder builder) { this.order = builder.order; this.size = builder.size; this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; } @Override - protected void doRequest(JsonObject outerObject, JsonObject innerObject) { + protected void doRequest(JsonObject outerObject, JsonObject innerObject, AggregationRequestBuildingContextContext context) { outerObject.add( "terms", innerObject ); innerObject.addProperty( "field", absoluteFieldPath ); if ( order != null ) { @@ -59,11 +66,22 @@ protected void doRequest(JsonObject outerObject, JsonObject innerObject) { } innerObject.addProperty( "size", size ); innerObject.addProperty( "min_doc_count", minDocCount ); + + JsonObject subOuterObject = new JsonObject(); + AggregationKey innerExtractorKey = AggregationKey.of( "agg" ); + context.add( buildingContextKey( INNER_EXTRACTOR_KEY ), innerExtractorKey ); + context.add( buildingContextKey( INNER_EXTRACTOR ), aggregation.request( context, innerExtractorKey, subOuterObject ) ); + + if ( !subOuterObject.isEmpty() ) { + outerObject.add( "aggs", subOuterObject ); + } } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new TermsBucketExtractor( nestedPathHierarchy, filter ); + protected Extractor> extractor(AggregationRequestBuildingContextContext context) { + AggregationKey innerExtractorKey = context.get( buildingContextKey( INNER_EXTRACTOR_KEY ) ); + Extractor innerExtractor = context.get( buildingContextKey( INNER_EXTRACTOR ) ); + return new TermsBucketExtractor( nestedPathHierarchy, filter, innerExtractorKey, innerExtractor ); } public static class Factory @@ -93,11 +111,11 @@ private TypeSelector(ElasticsearchFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( - (key, string) -> string != null && !string.isJsonNull() ? string : key, + return new CountBuilder<>( scope, field, + (key, string) -> string != null && !string.isJsonNull() ? string : key, // unchecked cast to make eclipse-compiler happy // we know that Elasticsearch projection converters work with the String ( (ProjectionConverter) field.type().rawProjectionConverter() ) @@ -105,22 +123,28 @@ private TypeSelector(ElasticsearchFieldCodec codec, ); } else { - return new Builder<>( codec::decodeAggregationKey, scope, field, + return new CountBuilder<>( scope, field, codec::decodeAggregationKey, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - protected class TermsBucketExtractor extends AbstractBucketExtractor { + protected class TermsBucketExtractor extends AbstractBucketExtractor { + private final AggregationKey innerExtractorKey; + private final Extractor innerExtractor; + protected TermsBucketExtractor(List nestedPathHierarchy, - ElasticsearchSearchPredicate filter) { + ElasticsearchSearchPredicate filter, AggregationKey innerExtractorKey, Extractor innerExtractor + ) { super( nestedPathHierarchy, filter ); + this.innerExtractorKey = innerExtractorKey; + this.innerExtractor = innerExtractor; } @Override - protected Map doExtract(AggregationExtractContext context, JsonElement buckets) { + protected Map doExtract(AggregationExtractContext context, JsonElement buckets) { JsonArray bucketArray = buckets.getAsJsonArray(); - Map result = CollectionHelper.newLinkedHashMap( bucketArray.size() ); + Map result = CollectionHelper.newLinkedHashMap( bucketArray.size() ); FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); for ( JsonElement bucketElement : bucketArray ) { JsonObject bucket = bucketElement.getAsJsonObject(); @@ -130,29 +154,60 @@ protected Map doExtract(AggregationExtractContext context, JsonElement decodeFunction.apply( keyJson, keyAsStringJson ), convertContext ); - long documentCount = getBucketDocCount( bucket ); - result.put( key, documentCount ); + + if ( bucket.has( innerExtractorKey.name() ) ) { + bucket = bucket.getAsJsonObject( innerExtractorKey.name() ); + } + result.put( key, innerExtractor.extract( bucket, context ) ); } return result; } } - private static class Builder extends AbstractBuilder - implements TermsAggregationBuilder { + private static class CountBuilder extends Builder { + + protected CountBuilder(ElasticsearchSearchIndexScope scope, + ElasticsearchSearchIndexValueFieldContext field, + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter) { + super( scope, field, decodeFunction, fromFieldValueConverter, + ElasticsearchSearchAggregation.from( scope, + ElasticsearchCountDocumentAggregation.factory( !field.nestedPathHierarchy().isEmpty() ) + .create( scope, null ).type().build() ) ); + } + } + + private static class Builder extends AbstractBuilder + implements TermsAggregationBuilder { private final BiFunction decodeFunction; private final ProjectionConverter fromFieldValueConverter; + private final ElasticsearchSearchAggregation aggregation; private JsonObject order; - private int minDocCount = 1; - private int size = 100; + private int minDocCount; + private int size; - private Builder(BiFunction decodeFunction, ElasticsearchSearchIndexScope scope, + private Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter, + ElasticsearchSearchAggregation aggregation) { + this( scope, field, decodeFunction, fromFieldValueConverter, aggregation, null, 1, 100 ); + } + + public Builder(ElasticsearchSearchIndexScope scope, ElasticsearchSearchIndexValueFieldContext field, + BiFunction decodeFunction, + ProjectionConverter fromFieldValueConverter, + ElasticsearchSearchAggregation aggregation, + JsonObject order, int minDocCount, int size) { super( scope, field ); + this.order = order; this.decodeFunction = decodeFunction; this.fromFieldValueConverter = fromFieldValueConverter; + this.aggregation = aggregation; + this.minDocCount = minDocCount; + this.size = size; } @Override @@ -186,7 +241,13 @@ public void maxTermCount(int maxTermCount) { } @Override - public ElasticsearchTermsAggregation build() { + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, decodeFunction, fromFieldValueConverter, + ElasticsearchSearchAggregation.from( scope, aggregation ), order, minDocCount, size ); + } + + @Override + public ElasticsearchTermsAggregation build() { return new ElasticsearchTermsAggregation<>( this ); } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java index 76ca12c01ca..62a91a01e4f 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/search/query/impl/ElasticsearchSearchQueryExtractContext.java @@ -18,7 +18,7 @@ * The context holding all the useful information pertaining to the extraction of data from * the response to the Elasticsearch search query. */ -class ElasticsearchSearchQueryExtractContext implements AggregationExtractContext { +public class ElasticsearchSearchQueryExtractContext implements AggregationExtractContext { private final ElasticsearchSearchQueryRequestContext requestContext; private final ProjectionHitMapper projectionHitMapper; @@ -41,7 +41,7 @@ public FromDocumentValueConvertContext fromDocumentValueConvertContext() { return fromDocumentValueConvertContext; } - JsonObject getResponseBody() { + public JsonObject getResponseBody() { return responseBody; } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java index 59fe97f552f..4976e0b9ebc 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchNumericFieldTypeOptionsStep.java @@ -69,8 +69,8 @@ protected final void complete() { builder.queryElementFactory( AggregationTypeKeys.MIN, ElasticsearchMetricFieldAggregation.min( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, ElasticsearchMetricFieldAggregation.max( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, ElasticsearchMetricFieldAggregation.avg( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, ElasticsearchMetricLongAggregation.count( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, ElasticsearchMetricLongAggregation.count( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, ElasticsearchMetricLongAggregation.countDistinct( codec ) ); } } diff --git a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java index 7d480ac29d2..a929b11767b 100644 --- a/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java +++ b/backend/elasticsearch/src/main/java/org/hibernate/search/backend/elasticsearch/types/dsl/impl/AbstractElasticsearchTemporalIndexFieldTypeOptionsStep.java @@ -88,8 +88,8 @@ protected final void complete() { builder.queryElementFactory( AggregationTypeKeys.MIN, ElasticsearchMetricFieldAggregation.min( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, ElasticsearchMetricFieldAggregation.max( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, ElasticsearchMetricFieldAggregation.avg( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, ElasticsearchMetricLongAggregation.count( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, ElasticsearchMetricLongAggregation.count( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, ElasticsearchMetricLongAggregation.countDistinct( codec ) ); } } diff --git a/backend/lucene/pom.xml b/backend/lucene/pom.xml index 1be33671f77..07551706e0b 100644 --- a/backend/lucene/pom.xml +++ b/backend/lucene/pom.xml @@ -42,10 +42,6 @@ org.apache.lucene lucene-join - - org.apache.lucene - lucene-facet - com.carrotsearch hppc diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index 5b4099e7b14..574815f8fdd 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -19,6 +19,7 @@ import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexRoot; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueField; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueFieldTemplate; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LuceneCountDocumentAggregation; import org.hibernate.search.backend.lucene.types.dsl.LuceneIndexFieldTypeFactory; import org.hibernate.search.backend.lucene.types.dsl.impl.LuceneIndexFieldTypeFactoryImpl; import org.hibernate.search.backend.lucene.types.impl.LuceneIndexCompositeNodeType; @@ -33,6 +34,7 @@ import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class LuceneIndexRootBuilder extends AbstractLuceneIndexCompositeNodeBuilder @@ -55,6 +57,8 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.backendMapperContext = backendMapperContext; this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; + + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java index 843a4717f93..bdac2557c0f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java @@ -14,4 +14,8 @@ public interface AggregationFunction> { R implementation(); + default boolean acceptMultipleValues() { + return true; + } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index e9d0ddf3bdc..cb515632b8e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -10,16 +10,16 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class AggregationFunctionCollector> implements Collector { +public class AggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final AggregationFunction aggregationFunction; + private LongMultiValues values; + public AggregationFunctionCollector(LongMultiValuesSource valueSource, AggregationFunction aggregationFunction) { this.valueSource = valueSource; this.aggregationFunction = aggregationFunction; @@ -34,8 +34,16 @@ public Long result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } + } + } } @Override @@ -43,26 +51,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( value ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java similarity index 78% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java index ff903d7c4b4..daa0852e4ae 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinct.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java @@ -13,7 +13,7 @@ * The algorithm to collect distinct elements is inspired by {@code org.apache.lucene.facet.LongValueFacetCounts} * of Apache Lucene project. */ -public class CountDistinct implements AggregationFunction { +public class CountDistinctValues implements AggregationFunction { private final BitSet counts = new BitSet( 1024 ); private final LongHashSet hashCounts = new LongHashSet(); @@ -29,8 +29,8 @@ public void apply(long value) { } @Override - public void merge(AggregationFunction sibling) { - CountDistinct other = sibling.implementation(); + public void merge(AggregationFunction sibling) { + CountDistinctValues other = sibling.implementation(); counts.or( other.counts ); hashCounts.addAll( other.hashCounts ); } @@ -41,7 +41,7 @@ public Long result() { } @Override - public CountDistinct implementation() { + public CountDistinctValues implementation() { return this; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java similarity index 59% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java index 93736311be9..e1300ef9fcf 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java @@ -9,26 +9,26 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; -public class CountDistinctCollectorFactory +public class CountDistinctValuesCollectorFactory implements - CollectorFactory, + CollectorFactory, Long, - AggregationFunctionCollectorManager> { + AggregationFunctionCollectorManager> { private final JoiningLongMultiValuesSource source; - private final CollectorKey, Long> key = CollectorKey.create(); + private final CollectorKey, Long> key = CollectorKey.create(); - public CountDistinctCollectorFactory(JoiningLongMultiValuesSource source) { + public CountDistinctValuesCollectorFactory(JoiningLongMultiValuesSource source) { this.source = source; } @Override - public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new AggregationFunctionCollectorManager<>( source, CountDistinct::new ); + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountDistinctValues::new ); } @Override - public CollectorKey, Long> getCollectorKey() { + public CollectorKey, Long> getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java new file mode 100644 index 00000000000..6e3c0e419d1 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; + +public class CountDocuemntsCollectorFactory + implements CollectorFactory { + + private final CollectorKey key = CollectorKey.create(); + + public static CountDocuemntsCollectorFactory instance() { + return new CountDocuemntsCollectorFactory(); + } + + @Override + public CountDocumentsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + return new CountDocumentsCollectorManager(); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java new file mode 100644 index 00000000000..b494f0e45c8 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class CountDocumentsCollector extends SimpleCollector { + + private long count = 0L; + + @Override + public void collect(int doc) throws IOException { + count++; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + public long count() { + return count; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java new file mode 100644 index 00000000000..5f0c875d08c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.search.CollectorManager; + +public class CountDocumentsCollectorManager implements CollectorManager { + + @Override + public CountDocumentsCollector newCollector() throws IOException { + return new CountDocumentsCollector(); + } + + @Override + public Long reduce(Collection collectors) throws IOException { + long count = 0L; + for ( CountDocumentsCollector collector : collectors ) { + count += collector.count(); + } + return count; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java similarity index 70% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java index 5afcf3c6a1f..fb4b7dedce0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/Count.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; -public class Count implements AggregationFunction { +public class CountValues implements AggregationFunction { private long count = 0L; @@ -14,7 +14,7 @@ public void apply(long value) { } @Override - public void merge(AggregationFunction sibling) { + public void merge(AggregationFunction sibling) { count += sibling.implementation().count; } @@ -24,7 +24,7 @@ public Long result() { } @Override - public Count implementation() { + public CountValues implementation() { return this; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java similarity index 56% rename from backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java rename to backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java index 0180c571b58..337615e81d2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountCollectorFactory.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java @@ -9,24 +9,24 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; -public class CountCollectorFactory +public class CountValuesCollectorFactory implements - CollectorFactory, Long, AggregationFunctionCollectorManager> { + CollectorFactory, Long, AggregationFunctionCollectorManager> { private final JoiningLongMultiValuesSource source; - private final CollectorKey, Long> key = CollectorKey.create(); + private final CollectorKey, Long> key = CollectorKey.create(); - public CountCollectorFactory(JoiningLongMultiValuesSource source) { + public CountValuesCollectorFactory(JoiningLongMultiValuesSource source) { this.source = source; } @Override - public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new AggregationFunctionCollectorManager<>( source, Count::new ); + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountValues::new ); } @Override - public CollectorKey, Long> getCollectorKey() { + public CollectorKey, Long> getCollectorKey() { return key; } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java index 3a441653d58..e5d121d27c7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java @@ -11,17 +11,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class DoubleAggregationFunctionCollector> implements Collector { +public class DoubleAggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final DoubleAggregationFunction aggregationFunction; private final Function longToDouble; + private LongMultiValues values; + public DoubleAggregationFunctionCollector(LongMultiValuesSource valueSource, DoubleAggregationFunction aggregationFunction, Function longToDouble) { this.valueSource = valueSource; @@ -38,8 +38,13 @@ public Double result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( longToDouble.apply( value ) ); + } + } } @Override @@ -47,26 +52,14 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( longToDouble.apply( value ) ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java new file mode 100644 index 00000000000..c139c430de6 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public interface BaseTermsCollector { + + CollectorKey[] keys(); + + CollectorManager[] managers(); + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java deleted file mode 100644 index ec3bf698880..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.collector.impl; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollectorManager; - -public class FacetsCollectorFactory implements CollectorFactory { - public static final CollectorKey KEY = CollectorKey.create(); - - public static final CollectorFactory INSTANCE = - new FacetsCollectorFactory(); - - @Override - public FacetsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new FacetsCollectorManager(); - } - - @Override - public CollectorKey getCollectorKey() { - return KEY; - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java new file mode 100644 index 00000000000..9e3875f0757 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final LongMultiValuesSource valuesSource; + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + private LongMultiValues values; + private LeafReaderContext leafReaderContext; + + public NumericTermsCollector(LongMultiValuesSource valuesSource, CollectorKey[] keys, + CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextValue(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); + } + } + } + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + this.values = valuesSource.getValues( context ); + this.leafReaderContext = context; + for ( LongObjectCursor value : segmentValues ) { + value.value.resetLeafCollectors( context ); + } + } + + @Override + public void finish() { + values = null; + } + + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + + LongObjectHashMap segmentValues() { + return segmentValues; + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java new file mode 100644 index 00000000000..81e0e9a9e92 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource, List> collectorFactories) { + return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); + } + + private final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final List> collectorFactories; + + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource, + List> collectorFactories) { + this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new NumericTermsCollectorManager( valuesSource, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java new file mode 100644 index 00000000000..be8ec930b8d --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorManager + implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public NumericTermsCollector newCollector() { + return new NumericTermsCollector( valuesSource, keys, managers ); + } + + @Override + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( NumericTermsCollector collector : collection ) { + results.add( collector.segmentValues() ); + } + return results; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java new file mode 100644 index 00000000000..95ba455a320 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -0,0 +1,214 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.IntCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class RangeCollector extends SimpleCollector { + + private final LongMultiValuesSource valuesSource; + private final long[] boundaries; + private final IntArrayList[] countsPerBoundaries; + + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final LeafCollector[][] leafCollectors; + + private LongMultiValues values; + + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys) { + this.valuesSource = valuesSource; + this.collectors = collectors; + this.keys = keys; + + // Maps all range inclusive endpoints to int flags; 1 + // = start of interval, 2 = end of interval. We need to + // track the start vs end case separately because if a + // given point is both, then it must be its own + // elementary interval: + LongIntMap endsMap = new LongIntHashMap(); + + endsMap.put( Long.MIN_VALUE, 1 ); + endsMap.put( Long.MAX_VALUE, 2 ); + + for ( EffectiveRange range : ranges ) { + long min = range.min(); + long max = range.max(); + int cur = endsMap.get( min ); + if ( cur == 0 ) { + endsMap.put( min, 1 ); + } + else { + endsMap.put( min, cur | 1 ); + } + cur = endsMap.get( max ); + if ( cur == 0 ) { + endsMap.put( max, 2 ); + } + else { + endsMap.put( max, cur | 2 ); + } + } + + LongArrayList endsList = new LongArrayList( endsMap.keys() ); + Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); + + // Build elementaryIntervals (a 1D Venn diagram): + List elementaryIntervals = new ArrayList<>(); + int upto0 = 1; + long v = endsList.get( 0 ); + long prev; + if ( endsMap.get( v ) == 3 ) { + elementaryIntervals.add( new EffectiveRange( v, v ) ); + prev = v + 1; + } + else { + prev = v; + } + + while ( upto0 < endsList.size() ) { + v = endsList.get( upto0 ); + int flags = endsMap.get( v ); + if ( flags == 3 ) { + // This point is both an end and a start; we need to + // separate it: + if ( v > prev ) { + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); + } + elementaryIntervals.add( new EffectiveRange( v, v ) ); + prev = v + 1; + } + else if ( flags == 1 ) { + // This point is only the start of an interval; + // attach it to next interval: + if ( v > prev ) { + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); + } + prev = v; + } + else { + assert flags == 2; + // This point is only the end of an interval; attach + // it to last interval: + elementaryIntervals.add( new EffectiveRange( prev, v ) ); + prev = v + 1; + } + upto0++; + } + + // Set boundaries (ends of each elementary interval): + boundaries = new long[elementaryIntervals.size()]; + countsPerBoundaries = new IntArrayList[boundaries.length]; + for ( int i = 0; i < boundaries.length; i++ ) { + EffectiveRange interval = elementaryIntervals.get( i ); + boundaries[i] = interval.max(); + IntArrayList list = new IntArrayList(); + countsPerBoundaries[i] = list; + for ( int j = 0; j < ranges.length; j++ ) { + if ( interval.min() >= ranges[j].min() && interval.max() <= ranges[j].max() ) { + list.add( j ); + } + } + } + + leafCollectors = new LeafCollector[keys.length][]; + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = new LeafCollector[ranges.length]; + } + } + + private void processLeafWithIndex(int index, int doc) throws IOException { + for ( IntCursor cursor : countsPerBoundaries[index] ) { + for ( int i = 0; i < keys.length; i++ ) { + leafCollectors[i][cursor.value].collect( doc ); + } + } + } + + private int findLeafIndex(long v) { + // Binary search to find matched elementary range; we + // are guaranteed to find a match because the last + // boundary is Long.MAX_VALUE: + int lo = 0; + int hi = boundaries.length - 1; + while ( true ) { + int mid = ( lo + hi ) >>> 1; + if ( v <= boundaries[mid] ) { + if ( mid == 0 ) { + return 0; + } + else { + hi = mid - 1; + } + } + else if ( v > boundaries[mid + 1] ) { + lo = mid + 1; + } + else { + return mid + 1; + } + } + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + int leafIndex = findLeafIndex( values.nextValue() ); + if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { + processLeafWithIndex( leafIndex, doc ); + } + } + } + } + + public Collector[][] collectors() { + return collectors; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + for ( int i = 0; i < collectors.length; i++ ) { + for ( int j = 0; j < collectors[i].length; j++ ) { + leafCollectors[i][j] = collectors[i][j].getLeafCollector( context ); + } + } + } + + @Override + public void finish() throws IOException { + values = null; + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java new file mode 100644 index 00000000000..f5cfb3ff06e --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { + return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + private final List> collectorFactories; + + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + List> collectorFactories) { + this.valuesSource = valuesSource; + this.ranges = ranges; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory collectorFactory : collectorFactories ) { + CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); + keys[index] = collectorFactory.getCollectorKey(); + managers[index] = collectorManager; + index++; + } + return new RangeCollectorManager( valuesSource, ranges, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java new file mode 100644 index 00000000000..8065fc580b1 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorManager implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + CollectorKey[] keys, CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.ranges = ranges; + this.keys = keys; + this.managers = managers; + } + + @Override + public RangeCollector newCollector() throws IOException { + Collector[][] collectors = new Collector[keys.length][]; + int index = 0; + for ( CollectorManager manager : managers ) { + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int j = 0; j < c.length; j++ ) { + c[j] = manager.newCollector(); + } + index++; + } + return new RangeCollector( valuesSource, ranges, collectors, keys ); + } + + @Override + public RangeResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return RangeResults.EMPTY; + } + RangeResults results = new RangeResults( keys, managers, ranges.length ); + for ( RangeCollector collector : collection ) { + results.add( collector.collectors() ); + } + return results; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java new file mode 100644 index 00000000000..c54c5592554 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeResults { + + @SuppressWarnings("unchecked") + static final RangeResults EMPTY = new RangeResults( new CollectorKey[0], new CollectorManager[0], 0 ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final List[][] buckets; + + @SuppressWarnings("unchecked") + RangeResults(CollectorKey[] collectorKeys, CollectorManager[] managers, int ranges) { + this.collectorKeys = collectorKeys; + this.managers = managers; + this.buckets = new List[managers.length][]; + for ( int i = 0; i < buckets.length; i++ ) { + buckets[i] = new List[ranges]; + for ( int j = 0; j < buckets[i].length; j++ ) { + buckets[i][j] = new ArrayList<>(); + } + } + } + + void add(Collector[][] collectors) { + for ( int collectorIndex = 0; collectorIndex < collectors.length; collectorIndex++ ) { + for ( int rangeIndex = 0; rangeIndex < collectors[collectorIndex].length; rangeIndex++ ) { + buckets[collectorIndex][rangeIndex].add( collectors[collectorIndex][rangeIndex] ); + } + } + } + + public List[][] buckets() { + return buckets; + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java new file mode 100644 index 00000000000..24cce336229 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; + +class TermCollectorSegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + TermCollectorSegmentValue(CollectorManager[] managers, LeafReaderContext leafReaderContext) + throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java new file mode 100644 index 00000000000..2f8e517a1ef --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.util.PriorityQueue; + +public class TermResults { + + @SuppressWarnings("unchecked") + static final TermResults EMPTY = new TermResults( new CollectorKey[0], new CollectorManager[0] ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final LongObjectHashMap buckets = new LongObjectHashMap<>(); + + TermResults(CollectorKey[] collectorKeys, CollectorManager[] managers) { + this.collectorKeys = collectorKeys; + this.managers = managers; + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, buckets.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + buckets.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); + } + } ); + + List results = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + results.add( 0, popped ); + } + + return results; + } + + void add(LongObjectHashMap segmentValues) { + for ( var segment : segmentValues ) { + LongBucket bucket = buckets.get( segment.key ); + if ( bucket == null ) { + bucket = new LongBucket( segment.key, segment.value.collectors, segment.value.count ); + buckets.put( segment.key, bucket ); + } + else { + bucket.add( segment.value.collectors, segment.value.count ); + } + } + } + + public void merge(LongObjectHashMap values) { + for ( var toadd : values ) { + LongBucket bucket = buckets.get( toadd.key ); + if ( bucket == null ) { + bucket = new LongBucket( toadd.key, toadd.value.collectors, toadd.value.count ); + buckets.put( toadd.key, bucket ); + } + else { + bucket.add( toadd.value ); + } + } + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java new file mode 100644 index 00000000000..de1dd651aad --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; + +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final TextMultiValuesSource valuesSource; + private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final String field; + private SortedSetDocValues sortedSetValues; + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + private TextMultiValues values; + private LeafReaderContext leafReaderContext; + + public TextTermsCollector(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.field = field; + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextOrd(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); + } + } + } + } + + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + + LongObjectHashMap segmentValues() { + return hashValues; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + initRootSortedSetDocValues( context ); + this.values = valuesSource.getValues( context ); + leafReaderContext = context; + } + + @Override + public void finish() throws IOException { + for ( LongObjectCursor value : segmentValues ) { + long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); + LongBucket bucket = hashValues.get( globalOrd ); + if ( bucket == null ) { + bucket = new LongBucket( globalOrd, value.value.collectors, value.value.count ); + hashValues.put( globalOrd, bucket ); + } + else { + bucket.count += value.value.count; + for ( int i = 0; i < bucket.collectors.length; i++ ) { + bucket.collectors[i].add( value.value.collectors[i] ); + } + } + } + this.values = null; + this.segmentValues.clear(); + } + + private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { + if ( sortedSetValues != null || ctx == null ) { + return; + } + if ( ctx.isTopLevel ) { + this.sortedSetValues = MultiDocValues.getSortedSetValues( ctx.reader(), field ); + } + initRootSortedSetDocValues( ctx.parent ); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java new file mode 100644 index 00000000000..7e95c8e270b --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + String field, TextMultiValuesSource valuesSource, List> collectorFactories) { + return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); + } + + public final CollectorKey key = CollectorKey.create(); + private final TextMultiValuesSource valuesSource; + private final String field; + private final List> collectorFactories; + + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource, + List> collectorFactories) { + this.field = field; + this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new TextTermsCollectorManager( field, valuesSource, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java new file mode 100644 index 00000000000..37115c1d145 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorManager + implements CollectorManager { + + private final TextMultiValuesSource valuesSource; + private final String field; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.field = field; + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public TextTermsCollector newCollector() { + return new TextTermsCollector( field, valuesSource, keys, managers ); + } + + @Override + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( TextTermsCollector collector : collection ) { + results.merge( collector.segmentValues() ); + } + return results; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java index 0c50566780d..0749fff4f99 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java @@ -7,6 +7,7 @@ import java.io.IOException; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; /** * A per-document, unordered sequence of text ordinals. @@ -46,6 +47,8 @@ protected TextMultiValues() { */ public abstract long nextOrd() throws IOException; + public abstract BytesRef lookupOrd(long ord) throws IOException; + /** * Returns the number of unique values. * @return number of unique values in this SortedDocValues. This is @@ -77,6 +80,11 @@ public long nextOrd() { throw new UnsupportedOperationException(); } + @Override + public BytesRef lookupOrd(long ord) { + throw new UnsupportedOperationException(); + } + @Override public long getValueCount() { return 0; @@ -113,6 +121,11 @@ public long nextOrd() throws IOException { return values.nextOrd(); } + @Override + public BytesRef lookupOrd(long ord) throws IOException { + return values.lookupOrd( ord ); + } + @Override public long getValueCount() { return values.getValueCount(); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java deleted file mode 100644 index 1a62ef39013..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.Collection; -import java.util.function.ToLongFunction; - -import org.hibernate.search.util.common.data.Range; -import org.hibernate.search.util.common.data.RangeBoundInclusion; - -import org.apache.lucene.facet.range.LongRange; - -public class FacetCountsUtils { - - private FacetCountsUtils() { - } - - public static < - T extends Number> LongRange[] createLongRangesForIntegralValues(Collection> ranges) { - return createLongRanges( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); - } - - public static LongRange[] createLongRangesForFloatingPointValues(Collection> ranges, - ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createLongRanges( ranges, encoder, negativeInfinity, positiveInfinity, true ); - } - - private static LongRange[] createLongRanges(Collection> ranges, - ToLongFunction encoder, - T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - LongRange[] longRanges = new LongRange[ranges.size()]; - int i = 0; - for ( Range range : ranges ) { - T lowerBoundValue = range.lowerBoundValue().orElse( null ); - T upperBoundValue = range.upperBoundValue().orElse( null ); - longRanges[i] = new LongRange( - String.valueOf( i ), - encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ), - // The lower bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.lowerBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && lowerBoundValue == null, - encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ), - // The upper bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.upperBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && upperBoundValue == null - ); - ++i; - } - return longRanges; - } - -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java deleted file mode 100644 index f1662ffdb12..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.LongValueFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueFacetCounts extends Facets { - - private final int[] counts = new int[1024]; - - private final LongIntMap hashCounts = new LongIntHashMap(); - - private final String field; - - private int totCount; - - public LongMultiValueFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits) throws IOException { - this.field = field; - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongHashSet uniqueValuesForDocument = new LongHashSet(); - LongProcedure incrementCountForDocumentId = this::increment; - - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context ); - - DocIdSetIterator docs = hits.bits.iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( fv.advanceExact( doc ) ) { - totCount++; - while ( fv.hasNextValue() ) { - // Each document must be counted only once per value. - uniqueValuesForDocument.add( fv.nextValue() ); - } - - uniqueValuesForDocument.forEach( incrementCountForDocumentId ); - uniqueValuesForDocument.clear(); - } - } - } - } - - private void increment(long value) { - if ( value >= 0 && value < counts.length ) { - counts[(int) value]++; - } - else { - hashCounts.addTo( value, 1 ); - } - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private static class Entry { - int count; - long value; - } - - public FacetResult getTopChildrenSortByCount(int topN) { - PriorityQueue pq = new PriorityQueue( Math.min( topN, counts.length + hashCounts.size() ) ) { - @Override - protected boolean lessThan(Entry a, Entry b) { - // sort by count descending, breaking ties by value ascending: - return a.count < b.count || ( a.count == b.count && a.value > b.value ); - } - }; - - int childCount = 0; - Entry e = null; - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - childCount++; - if ( e == null ) { - e = new Entry(); - } - e.value = i; - e.count = counts[i]; - e = pq.insertWithOverflow( e ); - } - } - - if ( hashCounts.size() != 0 ) { - childCount += hashCounts.size(); - for ( LongIntCursor c : hashCounts ) { - int count = c.value; - if ( count != 0 ) { - e = insertEntry( pq, e, c, count ); - } - } - } - - LabelAndValue[] results = new LabelAndValue[pq.size()]; - while ( pq.size() != 0 ) { - Entry entry = pq.pop(); - results[pq.size()] = new LabelAndValue( Long.toString( entry.value ), entry.count ); - } - - return new FacetResult( field, new String[0], totCount, results, childCount ); - } - - private Entry insertEntry(PriorityQueue pq, - Entry e, LongIntCursor c, int count) { - if ( e == null ) { - e = new Entry(); - } - e.value = c.key; - e.count = count; - e = pq.insertWithOverflow( e ); - return e; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "LongValueFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - b.append( " " ); - b.append( i ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - } - - if ( hashCounts.size() != 0 ) { - for ( LongIntCursor c : hashCounts ) { - if ( c.value != 0 ) { - b.append( " " ); - b.append( c.key ); - b.append( " -> count=" ); - b.append( c.value ); - b.append( '\n' ); - } - } - } - - return b.toString(); - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java deleted file mode 100644 index b5f8673b46f..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.LongArrayList; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.IntCursor; - -import org.apache.lucene.facet.range.LongRange; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeCounter} - * of Apache Lucene project. - */ -class LongMultiValueRangeCounter { - - final LongRangeNode root; - final long[] boundaries; - final int[] leafCounts; - - // Used during rollup - private int leafUpto; - private int missingCount; - - public LongMultiValueRangeCounter(LongRange[] ranges) { - // Maps all range inclusive endpoints to int flags; 1 - // = start of interval, 2 = end of interval. We need to - // track the start vs end case separately because if a - // given point is both, then it must be its own - // elementary interval: - LongIntMap endsMap = new LongIntHashMap(); - - endsMap.put( Long.MIN_VALUE, 1 ); - endsMap.put( Long.MAX_VALUE, 2 ); - - for ( LongRange range : ranges ) { - int cur = endsMap.get( range.min ); - if ( cur == 0 ) { - endsMap.put( range.min, 1 ); - } - else { - endsMap.put( range.min, cur | 1 ); - } - cur = endsMap.get( range.max ); - if ( cur == 0 ) { - endsMap.put( range.max, 2 ); - } - else { - endsMap.put( range.max, cur | 2 ); - } - } - - LongArrayList endsList = new LongArrayList( endsMap.keys() ); - Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); - - // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); - int upto0 = 1; - long v = endsList.get( 0 ); - long prev; - if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else { - prev = v; - } - - while ( upto0 < endsList.size() ) { - v = endsList.get( upto0 ); - int flags = endsMap.get( v ); - if ( flags == 3 ) { - // This point is both an end and a start; we need to - // separate it: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else if ( flags == 1 ) { - // This point is only the start of an interval; - // attach it to next interval: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - prev = v; - } - else { - assert flags == 2; - // This point is only the end of an interval; attach - // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); - prev = v + 1; - } - upto0++; - } - - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - - // Set boundaries (ends of each elementary interval): - boundaries = new long[elementaryIntervals.size()]; - for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; - } - - leafCounts = new int[boundaries.length]; - } - - public void incrementCountForLeafWithIndex(int index) { - leafCounts[index]++; - } - - public int findLeafIndex(long v) { - // Binary search to find matched elementary range; we - // are guaranteed to find a match because the last - // boundary is Long.MAX_VALUE: - int lo = 0; - int hi = boundaries.length - 1; - while ( true ) { - int mid = ( lo + hi ) >>> 1; - if ( v <= boundaries[mid] ) { - if ( mid == 0 ) { - return 0; - } - else { - hi = mid - 1; - } - } - else if ( v > boundaries[mid + 1] ) { - lo = mid + 1; - } - else { - return mid + 1; - } - } - } - - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - public int fillCounts(int[] counts) { - missingCount = 0; - leafUpto = 0; - rollup( root, counts, false ); - return missingCount; - } - - private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { - int count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = leafCounts[leafUpto]; - leafUpto++; - if ( !sawOutputs ) { - // This is a missing count (no output ranges were - // seen "above" us): - missingCount += count; - } - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private static final class InclusiveRange { - public final long start; - public final long end; - - public InclusiveRange(long start, long end) { - assert end >= start; - this.start = start; - this.end = end; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - public static final class LongRangeNode { - final LongRangeNode left; - final LongRangeNode right; - - // Our range, inclusive: - final long start; - final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, LongRange range) { - if ( start >= range.min && end <= range.max ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java deleted file mode 100644 index cfaff59829c..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.range.LongRange; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueRangeFacetCounts extends MultiValueRangeFacetCounts { - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - LongRange... ranges) - throws IOException { - this( field, valueSource, hits, null, ranges ); - } - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - Query fastMatchQuery, LongRange... ranges) - throws IOException { - super( field, ranges, fastMatchQuery ); - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = (LongRange[]) this.ranges; - - IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); - LongMultiValueRangeCounter counter = new LongMultiValueRangeCounter( longRanges ); - IntProcedure incrementCountForLeafWithIndex = counter::incrementCountForLeafWithIndex; - - int missingCount = 0; - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context ); - - totCount += hits.totalHits; - final DocIdSetIterator fastMatchDocs; - if ( fastMatchQuery != null ) { - final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext( hits.context ); - final IndexSearcher searcher = new IndexSearcher( topLevelContext ); - searcher.setQueryCache( null ); - final Weight fastMatchWeight = - searcher.createWeight( searcher.rewrite( fastMatchQuery ), ScoreMode.COMPLETE_NO_SCORES, 1 ); - Scorer s = fastMatchWeight.scorer( hits.context ); - if ( s == null ) { - continue; - } - fastMatchDocs = s.iterator(); - } - else { - fastMatchDocs = null; - } - - DocIdSetIterator docs = hits.bits.iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { - if ( fastMatchDocs != null ) { - int fastMatchDoc = fastMatchDocs.docID(); - if ( fastMatchDoc < doc ) { - fastMatchDoc = fastMatchDocs.advance( doc ); - } - - if ( doc != fastMatchDoc ) { - doc = docs.advance( fastMatchDoc ); - continue; - } - } - - if ( fv.advanceExact( doc ) ) { - while ( fv.hasNextValue() ) { - // Each document must be counted only once per range. - int leafIndex = counter.findLeafIndex( fv.nextValue() ); - uniqueLeafIndicesForDocument.add( leafIndex ); - } - - uniqueLeafIndicesForDocument.forEach( incrementCountForLeafWithIndex ); - uniqueLeafIndicesForDocument.clear(); - } - else { - missingCount++; - } - - doc = docs.nextDoc(); - } - } - - int x = counter.fillCounts( counts ); - - missingCount += x; - - totCount -= missingCount; - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java deleted file mode 100644 index 43cf7269294..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.range.Range; -import org.apache.lucene.search.Query; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.RangeFacetCounts} - * of Apache Lucene project. - */ -public class MultiValueRangeFacetCounts extends Facets { - - protected final Range[] ranges; - protected final int[] counts; - protected final Query fastMatchQuery; - protected final String field; - protected int totCount; - - protected MultiValueRangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) { - this.field = field; - this.ranges = ranges; - this.fastMatchQuery = fastMatchQuery; - counts = new int[ranges.length]; - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for ( int i = 0; i < counts.length; i++ ) { - labelValues[i] = new LabelAndValue( ranges[i].label, counts[i] ); - } - return new FacetResult( dim, path, totCount, labelValues, labelValues.length ); - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "MultiValueRangeFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < ranges.length; i++ ) { - b.append( " " ); - b.append( ranges[i].label ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - return b.toString(); - } -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java deleted file mode 100644 index ee9d41c575e..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java +++ /dev/null @@ -1,258 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.TopOrdAndIntQueue; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LongValues; - -/** - * Copied with some changes from {@code org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts} - * of Apache Lucene project. - */ -public class TextMultiValueFacetCounts extends Facets { - - final SortedSetDocValues dv; - final String field; - final int ordCount; - final int[] counts; - - public TextMultiValueFacetCounts(IndexReader reader, String field, TextMultiValuesSource valuesSource, FacetsCollector hits) - throws IOException { - this.field = field; - dv = MultiDocValues.getSortedSetValues( reader, field ); - if ( dv != null && dv.getValueCount() > Integer.MAX_VALUE ) { - // We may want to remove this limitation? - // Note that DefaultSortedSetDocValuesReaderState has the same limitation, - // so this is no worse than the "legacy" facets from Search 5. - throw new IllegalStateException( - "Cannot aggregate when more than " + Integer.MAX_VALUE + " terms are indexed" ); - } - ordCount = dv == null ? 0 : (int) dv.getValueCount(); - counts = new int[ordCount]; - count( reader, valuesSource, hits.getMatchingDocs() ); - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - if ( topN <= 0 ) { - throw new IllegalArgumentException( "topN must be > 0 (got: " + topN + ")" ); - } - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private FacetResult getTopChildrenSortByCount(int topN) throws IOException { - if ( topN > ordCount ) { - // HSEARCH-4544 Avoid OutOfMemoryError when passing crazy high topN values - // We know there will never be more than "ordCount" values anyway. - topN = ordCount; - } - - TopOrdAndIntQueue q = null; - - int bottomCount = 0; - - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndValue reuse = null; - - for ( int ord = 0; ord < ordCount; ord++ ) { - if ( counts[ord] > 0 ) { - totCount += counts[ord]; - childCount++; - if ( counts[ord] > bottomCount ) { - if ( reuse == null ) { - reuse = new TopOrdAndIntQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - if ( q == null ) { - // Lazy init, so we don't create this for the - // sparse case unnecessarily - q = new TopOrdAndIntQueue( topN ); - } - reuse = q.insertWithOverflow( reuse ); - if ( q.size() == topN ) { - bottomCount = q.top().value; - } - } - } - } - - if ( q == null ) { - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for ( int i = labelValues.length - 1; i >= 0; i-- ) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - final BytesRef term = dv.lookupOrd( ordAndValue.ord ); - labelValues[i] = new LabelAndValue( term.utf8ToString(), ordAndValue.value ); - } - - return new FacetResult( field, new String[0], totCount, labelValues, childCount ); - } - - private void countOneSegment(OrdinalMap ordinalMap, TextMultiValues segValues, int segOrd, MatchingDocs hits) - throws IOException { - if ( segValues == null ) { - // nothing to count - return; - } - IntHashSet uniqueOrdinalsForDocument = new IntHashSet(); - - DocIdSetIterator docs = hits.bits.iterator(); - - // TODO: yet another option is to count all segs - // first, only in seg-ord space, and then do a - // merge-sort-PQ in the end to only "resolve to - // global" those seg ords that can compete, if we know - // we just want top K? ie, this is the same algo - // that'd be used for merging facets across shards - // (distributed faceting). but this has much higher - // temp ram req'ts (sum of number of ords across all - // segs) - if ( ordinalMap != null ) { - final LongValues ordMap = ordinalMap.getGlobalOrds( segOrd ); - - int numSegOrds = (int) segValues.getValueCount(); - - if ( hits.totalHits < numSegOrds / 10 ) { - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - // Remap every ord to global ord as we iterate: - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - int globalOrd = (int) ordMap.get( term ); - uniqueOrdinalsForDocument.add( globalOrd ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - else { - // First count in seg-ord space: - final int[] segCounts = new int[numSegOrds]; - IntProcedure incrementCountForOrdinal = ord -> segCounts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - - // Then, migrate to global ords: - for ( int ord = 0; ord < numSegOrds; ord++ ) { - int count = segCounts[ord]; - if ( count != 0 ) { - counts[(int) ordMap.get( ord )] += count; - } - } - } - } - else { - // No ord mapping (e.g., single segment index): - // just aggregate directly into counts. - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - } - - /** - * Does all the "real work" of tallying up the counts. - */ - private void count(IndexReader reader, TextMultiValuesSource valuesSource, List matchingDocs) - throws IOException { - OrdinalMap ordinalMap; - - // TODO: is this right? really, we need a way to - // verify that this ordinalMap "matches" the leaves in - // matchingDocs... - if ( dv instanceof MultiSortedSetDocValues && matchingDocs.size() > 1 ) { - ordinalMap = ( (MultiSortedSetDocValues) dv ).mapping; - } - else { - ordinalMap = null; - } - - for ( MatchingDocs hits : matchingDocs ) { - - // LUCENE-5090: make sure the provided reader context "matches" - // the top-level reader passed to the - // SortedSetDocValuesReaderState, else cryptic - // AIOOBE can happen: - if ( ReaderUtil.getTopLevelContext( hits.context ).reader() != reader ) { - throw new IllegalStateException( - "the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader" ); - } - - countOneSegment( ordinalMap, valuesSource.getValues( hits.context ), hits.context.ord, hits ); - } - } - - @Override - public Number getSpecificValue(String dim, String... path) { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index f8ad0b4fca4..edb3f90d4f3 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -4,63 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -public class AggregationExtractContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final IndexReader indexReader; - private final FromDocumentValueConvertContext fromDocumentValueConvertContext; - private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; - private final Set routingKeys; - private final QueryParameters parameters; - - public AggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - IndexReader indexReader, - FromDocumentValueConvertContext fromDocumentValueConvertContext, - HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.indexReader = indexReader; - this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; - this.multiCollectedResults = multiCollectedResults; - this.routingKeys = routingKeys; - this.parameters = parameters; - } - - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } +public interface AggregationExtractContext { + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public IndexReader getIndexReader() { - return indexReader; - } + IndexReader getIndexReader(); - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return fromDocumentValueConvertContext; - } + FromDocumentValueConvertContext fromDocumentValueConvertContext(); - public T getFacets(CollectorKey key) { - return multiCollectedResults.get( key ); - } + T getCollectorResults(CollectorKey key); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java index b7b87fc573a..fbd865f17b7 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java @@ -4,54 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.search.common.NamedValues; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.Query; -public final class AggregationRequestContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final Set routingKeys; - private final ExtractionRequirements.Builder extractionRequirementsBuilder; - private final QueryParameters parameters; - - public AggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.routingKeys = routingKeys; - this.extractionRequirementsBuilder = extractionRequirementsBuilder; - this.parameters = parameters; - } - - public > void requireCollector( - CollectorFactory collectorFactory) { - extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); - } +public interface AggregationRequestContext { + > void requireCollector( + CollectorFactory collectorFactory + ); - public NamedValues queryParameters() { - return parameters; - } + NamedValues queryParameters(); - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java new file mode 100644 index 00000000000..c285af570d4 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +public class RootAggregationExtractContext implements AggregationExtractContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final IndexReader indexReader; + private final FromDocumentValueConvertContext fromDocumentValueConvertContext; + private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; + private final Set routingKeys; + private final QueryParameters parameters; + + public RootAggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + IndexReader indexReader, + FromDocumentValueConvertContext fromDocumentValueConvertContext, + HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.indexReader = indexReader; + this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; + this.multiCollectedResults = multiCollectedResults; + this.routingKeys = routingKeys; + this.parameters = parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return indexReader; + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return fromDocumentValueConvertContext; + } + + @Override + public T getCollectorResults(CollectorKey key) { + return multiCollectedResults.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java new file mode 100644 index 00000000000..39e8fcd3bb6 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.search.common.NamedValues; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +public final class RootAggregationRequestContext implements AggregationRequestContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final Set routingKeys; + private final ExtractionRequirements.Builder extractionRequirementsBuilder; + private final QueryParameters parameters; + + public RootAggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.routingKeys = routingKeys; + this.extractionRequirementsBuilder = extractionRequirementsBuilder; + this.parameters = parameters; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java index 9b84c33d833..9a90f34eaba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java @@ -15,6 +15,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollectorExecutionContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationExtractContext; import org.hibernate.search.backend.lucene.search.extraction.impl.LuceneCollectors; import org.hibernate.search.backend.lucene.search.projection.impl.LuceneSearchProjection; import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext; @@ -112,7 +113,7 @@ private List extractHits(ProjectionHitMapper projectionHitMapper, int } private Map, ?> extractAggregations() throws IOException { - AggregationExtractContext aggregationExtractContext = new AggregationExtractContext( + AggregationExtractContext aggregationExtractContext = new RootAggregationExtractContext( requestContext.getQueryIndexScope(), requestContext.getSessionContext(), indexSearcher.getIndexReader(), diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 6bcf85e9ac6..18e904ce1d2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -20,6 +20,7 @@ import org.hibernate.search.backend.lucene.orchestration.impl.LuceneSyncWorkOrchestrator; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationRequestContext; import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.highlighter.impl.LuceneAbstractSearchHighlighter; import org.hibernate.search.backend.lucene.search.predicate.impl.LuceneSearchPredicate; @@ -266,7 +267,7 @@ public LuceneSearchQuery build() { if ( aggregations != null ) { aggregationExtractors = new LinkedHashMap<>(); AggregationRequestContext aggregationRequestContext = - new AggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, + new RootAggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, parameters ); for ( Map.Entry, LuceneSearchAggregation> entry : aggregations.entrySet() ) { aggregationExtractors.put( entry.getKey(), entry.getValue().request( aggregationRequestContext ) ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java deleted file mode 100644 index 566a92e567f..00000000000 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.types.aggregation.impl; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; -import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; -import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; -import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; -import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; -import org.hibernate.search.engine.search.common.ValueModel; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.index.IndexReader; - -/** - * @param The type of field values exposed to the mapper. - * @param The type of terms returned by the Lucene Facets. - * @param The type of keys in the returned map. It can be {@code F} - * or a different type if value converters are used. - */ -public abstract class AbstractLuceneFacetsBasedTermsAggregation - extends AbstractLuceneBucketAggregation { - - private final ProjectionConverter fromFieldValueConverter; - - private final BucketOrder order; - private final int maxTermCount; - private final int minDocCount; - - AbstractLuceneFacetsBasedTermsAggregation(AbstractBuilder builder) { - super( builder ); - this.fromFieldValueConverter = builder.fromFieldValueConverter; - this.order = builder.order; - this.maxTermCount = builder.maxTermCount; - this.minDocCount = builder.minDocCount; - } - - @Override - public Extractor> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); - - return extractor( context ); - } - - protected abstract Extractor> extractor(AggregationRequestContext context); - - protected abstract class AbstractExtractor implements Extractor> { - @Override - public final Map extract(AggregationExtractContext context) throws IOException { - FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); - - List> buckets = getTopBuckets( context ); - - if ( BucketOrder.COUNT_DESC.equals( order ) && ( minDocCount > 0 || buckets.size() >= maxTermCount ) ) { - /* - * Optimization: in this case, minDocCount and sorting can be safely ignored. - * We already have all the buckets we need, and they are already sorted. - */ - return toMap( convertContext, buckets ); - } - - if ( minDocCount <= 0 ) { - Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - // If some of the first terms are already in non-zero buckets, ignore them in the next step - for ( Bucket bucket : buckets ) { - firstTerms.remove( bucket.term ); - } - // Complete the list of buckets with zero-count terms - for ( T term : firstTerms ) { - buckets.add( new Bucket<>( term, 0L ) ); - } - } - - // Sort the list of buckets and trim it if necessary (there may be more buckets than we want in some cases) - buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); - if ( buckets.size() > maxTermCount ) { - buckets.subList( maxTermCount, buckets.size() ).clear(); - } - - return toMap( convertContext, buckets ); - } - - abstract FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException; - - abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) - throws IOException; - - abstract Comparator getAscendingTermComparator(); - - abstract T labelToTerm(String label); - - abstract V termToFieldValue(T key); - - private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - /* - * TODO HSEARCH-3666 What if the sort order is by term value? - * Lucene returns facets in descending count order. - * If that's what we need, then we can ask Lucene to apply the "maxTermCount" limit directly. - * This is what we do here. - * But if we need a different sort, then having to retrieve the "top N" facets by document count - * becomes clearly sub-optimal: to properly implement this, we would need to retrieve - * *all* facets, and Lucene would allocate an array of Integer.MAX_VALUE elements. - * To improve on this, we would need to re-implement the facet collections. - */ - int limit = maxTermCount; - FacetResult facetResult = getTopChildren( context.getIndexReader(), facetsCollector, nestedDocsProvider, limit ); - - List> buckets = new ArrayList<>(); - - if ( facetResult != null ) { - // Add results for matching documents - for ( LabelAndValue labelAndValue : facetResult.labelValues ) { - long count = (Integer) labelAndValue.value; - if ( count >= minDocCount ) { - buckets.add( new Bucket<>( labelToTerm( labelAndValue.label ), count ) ); - } - } - } - - return buckets; - } - - private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { - Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering - for ( Bucket bucket : buckets ) { - V decoded = termToFieldValue( bucket.term ); - K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count ); - } - return result; - } - } - - abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { - protected final LuceneSearchIndexScope scope; - protected final LuceneSearchIndexValueFieldContext field; - - protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { - this.scope = scope; - this.field = field; - } - - @Override - public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); - } - - abstract static class AbstractBuilder - extends AbstractLuceneBucketAggregation.AbstractBuilder - implements TermsAggregationBuilder { - - private final ProjectionConverter fromFieldValueConverter; - - private BucketOrder order = BucketOrder.COUNT_DESC; - private int minDocCount = 1; - private int maxTermCount = 100; - - AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { - super( scope, field ); - this.fromFieldValueConverter = fromFieldValueConverter; - } - - @Override - public void orderByCountDescending() { - order( BucketOrder.COUNT_DESC ); - } - - @Override - public void orderByCountAscending() { - order( BucketOrder.COUNT_ASC ); - } - - @Override - public void orderByTermAscending() { - order( BucketOrder.TERM_ASC ); - } - - @Override - public void orderByTermDescending() { - order( BucketOrder.TERM_DESC ); - } - - @Override - public void minDocumentCount(int minDocumentCount) { - this.minDocCount = minDocumentCount; - } - - @Override - public void maxTermCount(int maxTermCount) { - this.maxTermCount = maxTermCount; - } - - @Override - public abstract AbstractLuceneFacetsBasedTermsAggregation build(); - - protected final void order(BucketOrder order) { - this.order = order; - } - } - -} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java index bcdfae57cf0..5d0a866d193 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java @@ -73,7 +73,7 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); E e = codec.getDomain().sortedDocValueToTerm( collector ); F decode = codec.decode( e ); @@ -111,7 +111,7 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @SuppressWarnings("unchecked") @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); return (K) numericDomain.sortedDocValueToTerm( collector ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java index 0e0c326fe22..67c9fe5da43 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java @@ -44,7 +44,7 @@ public Set indexNames() { private class LuceneNumericMetricLongAggregationExtraction implements Extractor { @Override public Long extract(AggregationExtractContext context) { - return context.getFacets( collectorKey ); + return context.getCollectorResults( collectorKey ); } } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java new file mode 100644 index 00000000000..306f6c64bbd --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; +import org.hibernate.search.engine.search.common.ValueModel; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +/** + * @param The type of field values exposed to the mapper. + * @param The type of terms returned by the Lucene Facets. + * @param The type of keys in the returned map. It can be {@code F} + * or a different type if value converters are used. + */ +public abstract class AbstractLuceneMultivaluedTermsAggregation + extends AbstractLuceneBucketAggregation { + + protected final ProjectionConverter fromFieldValueConverter; + + protected final BucketOrder order; + protected final int maxTermCount; + protected final int minDocCount; + protected final LuceneSearchAggregation aggregation; + + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { + super( builder ); + this.fromFieldValueConverter = builder.fromFieldValueConverter; + this.order = builder.order; + this.maxTermCount = builder.maxTermCount; + this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; + } + + protected abstract class AbstractExtractor implements Extractor> { + protected final Extractor extractor; + + protected AbstractExtractor(Extractor extractor) { + this.extractor = extractor; + } + + @Override + public final Map extract(AggregationExtractContext context) throws IOException { + List> buckets = getTopBuckets( context ); + + if ( minDocCount == 0 && buckets.size() < maxTermCount ) { + Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); + for ( Bucket bucket : buckets ) { + firstTerms.remove( bucket.term() ); + } + R zeroValue = createZeroValue( context ); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0, zeroValue ) ) ); + buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); + } + + return toMap( context.fromDocumentValueConvertContext(), buckets ); + } + + protected abstract TermResults termResults(AggregationExtractContext context) throws IOException; + + protected R createZeroValue(AggregationExtractContext context) throws IOException { + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + var termResults = termResults( context ); + CollectorManager[] managers = termResults.collectorManagers(); + CollectorKey[] keys = termResults.collectorKeys(); + HashMap, Object> results = new HashMap<>(); + for ( int i = 0; i < keys.length; i++ ) { + results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); + } + localContext.setResults( results ); + return extractor.extract( localContext ); + } + + abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) + throws IOException; + + abstract Comparator getAscendingTermComparator(); + + abstract V termToFieldValue(T key); + + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; + + private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { + Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering + for ( Bucket bucket : buckets ) { + V decoded = termToFieldValue( bucket.term() ); + K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); + result.put( key, bucket.value() ); + } + return result; + } + + protected Map, Object> prepareResults(LongBucket bucket, TermResults termResults) + throws IOException { + Map, Object> result = new HashMap<>(); + List[] collectors = bucket.collectors; + CollectorKey[] collectorKeys = termResults.collectorKeys(); + CollectorManager[] managers = termResults.collectorManagers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); + } + return result; + } + } + + abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { + protected final LuceneSearchIndexScope scope; + protected final LuceneSearchIndexValueFieldContext field; + + protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + this.scope = scope; + this.field = field; + } + + @Override + public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); + } + + abstract static class AbstractBuilder + extends AbstractLuceneBucketAggregation.AbstractBuilder + implements TermsAggregationBuilder { + + protected final LuceneSearchAggregation aggregation; + protected final ProjectionConverter fromFieldValueConverter; + protected BucketOrder order; + protected int minDocCount; + protected int maxTermCount; + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + this( scope, field, aggregation, fromFieldValueConverter, BucketOrder.COUNT_DESC, 1, 100 ); + } + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field ); + this.aggregation = aggregation; + this.fromFieldValueConverter = fromFieldValueConverter; + this.order = order; + this.minDocCount = minDocCount; + this.maxTermCount = maxTermCount; + } + + @Override + public void orderByCountDescending() { + order( BucketOrder.COUNT_DESC ); + } + + @Override + public void orderByCountAscending() { + order( BucketOrder.COUNT_ASC ); + } + + @Override + public void orderByTermAscending() { + order( BucketOrder.TERM_ASC ); + } + + @Override + public void orderByTermDescending() { + order( BucketOrder.TERM_DESC ); + } + + @Override + public void minDocumentCount(int minDocumentCount) { + this.minDocCount = minDocumentCount; + } + + @Override + public void maxTermCount(int maxTermCount) { + this.maxTermCount = maxTermCount; + } + + @Override + public abstract AbstractLuceneMultivaluedTermsAggregation build(); + + protected final void order(BucketOrder order) { + this.order = order; + } + } + +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index 6c1c27c662a..7f01fc781ba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,12 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -final class Bucket { - final F term; - final long count; - - Bucket(F term, long count) { - this.term = term; - this.count = count; - } +public record Bucket(F term, long count, R value) { } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 4767879901c..2f9ab4292ad 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -6,37 +6,59 @@ import java.util.Comparator; -enum BucketOrder { +public enum BucketOrder { COUNT_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( left.count, right.count ); + int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); return order; }; } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::termOrd ); + } }, COUNT_DESC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( right.count, left.count ); // reversed, because desc + int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); + return order; + }; + } + + @Override + Comparator toLongBucketComparatorInternal() { + return (left, right) -> { + int order = Long.compare( right.count(), left.count() ); // reversed, because desc + if ( order != 0 ) { + return order; + } + order = Long.compare( left.termOrd(), right.termOrd() ); return order; }; } }, TERM_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + Comparator> toBucketComparatorInternal(Comparator termComparator) { + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::termOrd ); } }, TERM_DESC { @@ -46,17 +68,28 @@ boolean isTermOrderDescending() { } @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + Comparator> toBucketComparatorInternal(Comparator termComparator) { + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::termOrd ).reversed(); } }; - Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } - abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + public Comparator toLongBucketComparator() { + return toLongBucketComparatorInternal(); + } + + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + + abstract Comparator toLongBucketComparatorInternal(); boolean isTermOrderDescending() { return false; diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java new file mode 100644 index 00000000000..2882036b99c --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Map; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java new file mode 100644 index 00000000000..97dc028f22a --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.search.common.NamedValues; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + + public AggregationRequestContext actualContext() { + return delegate; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java new file mode 100644 index 00000000000..aad664b24b4 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.search.Collector; + +public class LongBucket { + public final long termOrd; + public final List[] collectors; + public long count; + + @SuppressWarnings("unchecked") + public LongBucket(long termOrd, Collector[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = new List[collectors.length]; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i] = new ArrayList<>(); + this.collectors[i].add( collectors[i] ); + } + this.count = count; + } + + public LongBucket(long termOrd, List[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = collectors; + this.count = count; + } + + public void add(Collector[] collectors, long count) { + this.count += count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].add( collectors[i] ); + } + } + + public void add(LongBucket bucket) { + this.count += bucket.count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].addAll( bucket.collectors[i] ); + } + } + + public long count() { + return count; + } + + public long termOrd() { + return termOrd; + } + + @Override + public String toString() { + return "LongBucket{" + + "termOrd=" + termOrd + + ", count=" + count + + ", collectors=" + Arrays.toString( collectors ) + + '}'; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index c392eee44b8..964419b735e 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -5,7 +5,7 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CompensatedSumCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -34,15 +34,15 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte compensatedSumCollectorKey = sumCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); - collectorKey = countCollectorFactory.getCollectorKey(); - context.requireCollector( countCollectorFactory ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); + collectorKey = countValuesCollectorFactory.getCollectorKey(); + context.requireCollector( countValuesCollectorFactory ); } @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); - Long counts = context.getFacets( collectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); + Long counts = context.getCollectorResults( collectorKey ); double avg = ( sum / counts ); return numericDomain.doubleToTerm( avg ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 4ab59041635..597f8d57f9f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -5,8 +5,8 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.AggregationFunctionCollector; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.Count; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValues; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.SumCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; @@ -28,7 +28,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec } // Supplementary collector used by the avg function - protected CollectorKey, Long> countCollectorKey; + protected CollectorKey, Long> countCollectorKey; LuceneAvgNumericFieldAggregation(Builder builder) { super( builder ); @@ -37,11 +37,11 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { SumCollectorFactory sumCollectorFactory = new SumCollectorFactory( source ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); collectorKey = sumCollectorFactory.getCollectorKey(); - countCollectorKey = countCollectorFactory.getCollectorKey(); + countCollectorKey = countValuesCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - context.requireCollector( countCollectorFactory ); + context.requireCollector( countValuesCollectorFactory ); } private static class LuceneNumericMetricFieldAggregationExtraction implements Extractor { @@ -61,8 +61,8 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); @@ -106,8 +106,8 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @Override public E extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); return codec.getDomain().sortedDocValueToTerm( collector ); diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java index 7e055d18150..f83e3d09acc 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountDistinctCollectorFactory collectorFactory = new CountDistinctCollectorFactory( source ); + CountDistinctValuesCollectorFactory collectorFactory = new CountDistinctValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java new file mode 100644 index 00000000000..b35fd6dc169 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.logging.impl.QueryLog; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDocuemntsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexCompositeNodeContext; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { + + public static Factory factory() { + return Factory.INSTANCE; + } + + private final Set indexNames; + + LuceneCountDocumentAggregation(Builder builder) { + this.indexNames = builder.scope.hibernateSearchIndexNames(); + } + + @Override + public Extractor request(AggregationRequestContext context) { + CountDocuemntsCollectorFactory collectorFactory = CountDocuemntsCollectorFactory.instance(); + var collectorKey = collectorFactory.getCollectorKey(); + + context.requireCollector( collectorFactory ); + return new CountDocumentsExtractor( collectorKey ); + } + + private record CountDocumentsExtractor(CollectorKey collectorKey) implements Extractor { + + @Override + public Long extract(AggregationExtractContext context) { + return context.getCollectorResults( collectorKey ); + } + } + + @Override + public Set indexNames() { + return indexNames; + } + + protected static class Factory + implements + SearchQueryElementFactory, + LuceneSearchIndexCompositeNodeContext> { + + private static final Factory INSTANCE = new Factory(); + + private Factory() { + } + + @Override + public CountDocumentAggregationBuilder.TypeSelector create(LuceneSearchIndexScope scope, + LuceneSearchIndexCompositeNodeContext node) { + return new TypeSelector( scope ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + protected record TypeSelector(LuceneSearchIndexScope scope) implements CountDocumentAggregationBuilder.TypeSelector { + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope ); + } + } + + public static class Builder implements CountDocumentAggregationBuilder { + + protected final LuceneSearchIndexScope scope; + + public Builder(LuceneSearchIndexScope scope) { + this.scope = scope; + } + + @Override + public LuceneCountDocumentAggregation build() { + return new LuceneCountDocumentAggregation( this ); + } + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java index 7369a8fd21c..b9128f0eae4 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountCollectorFactory collectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory collectorFactory = new CountValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index a60a074113a..19cd2204aab 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -6,54 +6,77 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeResults; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; /** * @param The type of field values. * @param The type of encoded field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class LuceneNumericRangeAggregation - extends AbstractLuceneBucketAggregation, Long> { +public class LuceneNumericRangeAggregation + extends AbstractLuceneBucketAggregation, V> { + private final LuceneSearchAggregation aggregation; private final AbstractLuceneNumericFieldCodec codec; private final List> rangesInOrder; private final List> encodedRangesInOrder; - private LuceneNumericRangeAggregation(Builder builder) { + private CollectorKey collectorKey; + + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); + this.aggregation = builder.aggregation; this.codec = builder.codec; this.rangesInOrder = builder.rangesInOrder; this.encodedRangesInOrder = builder.encodedRangesInOrder; } @Override - public Extractor, Long>> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); + public Extractor, V>> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var rangeFactory = RangeCollectorFactory.instance( source, + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ), + localAggregationContext.localCollectorFactories() ); - return new LuceneNumericRangeAggregationExtractor(); + collectorKey = rangeFactory.getCollectorKey(); + context.requireCollector( rangeFactory ); + + return new LuceneNumericRangeAggregationExtractor( extractor ); } public static class Factory @@ -71,30 +94,38 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { } } - private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { - - @Override - public Map, Long> extract(AggregationExtractContext context) throws IOException { - LuceneNumericDomain numericDomain = codec.getDomain(); - - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + private class LuceneNumericRangeAggregationExtractor implements Extractor, V>> { + private final Extractor extractor; - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + public LuceneNumericRangeAggregationExtractor(Extractor extractor) { + this.extractor = extractor; + } - Facets facetsCount = numericDomain.createRangeFacetCounts( - absoluteFieldPath, facetsCollector, encodedRangesInOrder, - nestedDocsProvider - ); + @Override + public Map, V> extract(AggregationExtractContext context) throws IOException { + RangeResults rangeResults = context.getCollectorResults( collectorKey ); - FacetResult facetResult = facetsCount.getTopChildren( rangesInOrder.size(), absoluteFieldPath ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - Map, Long> result = new LinkedHashMap<>(); + Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), (long) (Integer) facetResult.labelValues[i].value ); + localContext.setResults( prepareResults( i, rangeResults ) ); + result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } + + private Map, Object> prepareResults(int index, RangeResults rangeResults) throws IOException { + Map, Object> result = new HashMap<>(); + List[][] collectors = rangeResults.buckets(); + CollectorKey[] collectorKeys = rangeResults.collectorKeys(); + CollectorManager[] managers = rangeResults.collectorManagers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i][index] ) ); + } + return result; + } } public static class TypeSelector implements RangeAggregationBuilder.TypeSelector { @@ -110,28 +141,34 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( codec, - field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), - scope, field ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( + codec, field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), + scope, field + ); } } - public static class Builder - extends AbstractLuceneBucketAggregation.AbstractBuilder, Long> - implements RangeAggregationBuilder { + public static class Builder + extends AbstractLuceneBucketAggregation.AbstractBuilder, V> + implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function convertAndEncode; - private final List> rangesInOrder = new ArrayList<>(); - private final List> encodedRangesInOrder = new ArrayList<>(); + private final LuceneSearchAggregation aggregation; + private final List> rangesInOrder; + private final List> encodedRangesInOrder; - public Builder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, - LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + protected Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchAggregation aggregation, List> rangesInOrder, List> encodedRangesInOrder) { super( scope, field ); this.codec = codec; this.convertAndEncode = convertAndEncode; + this.aggregation = aggregation; + this.rangesInOrder = rangesInOrder; + this.encodedRangesInOrder = encodedRangesInOrder; } @Override @@ -141,8 +178,26 @@ public void range(Range range) { } @Override - public LuceneNumericRangeAggregation build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, aggregation ), + new ArrayList<>( rangesInOrder ), new ArrayList<>( encodedRangesInOrder ) ); + } + + @Override + public LuceneNumericRangeAggregation build() { return new LuceneNumericRangeAggregation<>( this ); } } + + public static class CountBuilder extends Builder { + + protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + super( scope, field, codec, convertAndEncode, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + new ArrayList<>(), new ArrayList<>() ); + } + } + } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index e32d2da2d29..b070e7b7977 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -5,26 +5,34 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -36,15 +44,16 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class LuceneNumericTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { +public class LuceneNumericTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; private final Comparator termComparator; private final Function decoder; + private CollectorKey collectorKey; - private LuceneNumericTermsAggregation(Builder builder) { + private LuceneNumericTermsAggregation(Builder builder) { super( builder ); this.numericDomain = builder.codec.getDomain(); this.termComparator = numericDomain.createComparator(); @@ -52,8 +61,20 @@ private LuceneNumericTermsAggregation(Builder builder) { } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneNumericTermsAggregationExtractor(); + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = + NumericTermsCollectorFactory.instance( source, localAggregationContext.localCollectorFactories() ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return new LuceneNumericTermsAggregationExtractor( extractor ); } public static class Factory @@ -73,20 +94,51 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco } private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + + private LuceneNumericTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - Facets facetCounts = numericDomain.createTermsFacetCounts( - absoluteFieldPath, facetsCollector, nestedDocsProvider - ); - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + protected TermResults termResults(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + + @Override + Comparator getAscendingTermComparator() { + return termComparator; + } + + @Override + V termToFieldValue(E key) { + return decoder.apply( key ); + } + + @Override + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termResults = context.getCollectorResults( collectorKey ); + + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List counts = termResults.counts( order, maxTermCount, minDocCount ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + localContext.setResults( prepareResults( bucket, termResults ) ); + buckets.add( + new Bucket<>( + numericDomain.sortedDocValueToTerm( bucket.termOrd() ), + bucket.count(), + extractor.extract( localContext ) + ) + ); + } + return buckets; } @Override - SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit) + Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { - TreeSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); + SortedSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); for ( LeafReaderContext leaf : reader.leaves() ) { final LeafReader atomicReader = leaf.reader(); SortedNumericDocValues docValues = atomicReader.getSortedNumericDocValues( absoluteFieldPath ); @@ -107,23 +159,9 @@ SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit return collectedTerms; } - @Override - Comparator getAscendingTermComparator() { - return termComparator; - } - - @Override - E labelToTerm(String termAsString) { - return numericDomain.sortedDocValueToTerm( Long.parseLong( termAsString ) ); - } - - @Override - V termToFieldValue(E term) { - return decoder.apply( term ); - } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private final AbstractLuceneNumericFieldCodec codec; private TypeSelector(AbstractLuceneNumericFieldCodec codec, @@ -134,16 +172,16 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ), Function.identity() ); } else { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ), codec::decode ); @@ -151,24 +189,52 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { + + private CountBuilder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + ProjectionConverter fromFieldValueConverter, + Function decoder) { + super( codec, scope, field, LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), fromFieldValueConverter, + decoder ); + } + } + + private static class Builder + extends AbstractBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function decoder; - public Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, - LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter, + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, LuceneSearchAggregation aggregation, + ProjectionConverter fromFieldValueConverter, Function decoder) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, aggregation, fromFieldValueConverter ); + this.codec = codec; + this.decoder = decoder; + } + + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + Function decoder, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); this.codec = codec; this.decoder = decoder; } @Override - public LuceneNumericTermsAggregation build() { + public LuceneNumericTermsAggregation build() { return new LuceneNumericTermsAggregation<>( this ); } - } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( codec, scope, field, LuceneSearchAggregation.from( scope, aggregation ), + fromFieldValueConverter, decoder, order, minDocCount, maxTermCount ); + } + } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java index b91a81ca70f..2eee45d884b 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java @@ -36,7 +36,7 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); return numericDomain.doubleToTerm( sum ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index f8428ade788..d0980d10457 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -5,60 +5,78 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.TextMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; /** * @param The type of keys in the returned map. It can be {@code String} * or a different type if value converters are used. */ -public class LuceneTextTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { +public class LuceneTextTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); - private LuceneTextTermsAggregation(Builder builder) { + private CollectorKey collectorKey; + + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneTextTermsAggregationExtractor(); + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( + absoluteFieldPath, nestedDocsProvider + ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source, + localAggregationContext.localCollectorFactories() ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return new LuceneTextTermsAggregationExtractor( extractor ); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { + + private LuceneTextTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - JoiningTextMultiValuesSource valueSource = JoiningTextMultiValuesSource.fromField( - absoluteFieldPath, nestedDocsProvider - ); - TextMultiValueFacetCounts facetCounts = new TextMultiValueFacetCounts( - reader, absoluteFieldPath, valueSource, facetsCollector - ); - - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + protected TermResults termResults(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); } @Override @@ -95,13 +113,31 @@ Comparator getAscendingTermComparator() { } @Override - String labelToTerm(String label) { - return label; + String termToFieldValue(String key) { + return key; } @Override - String termToFieldValue(String key) { - return key; + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termResults = context.getCollectorResults( collectorKey ); + + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List results = termResults.counts( order, maxTermCount, minDocCount ); + + var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : results ) { + localContext.setResults( prepareResults( bucket, termResults ) ); + buckets.add( + new Bucket<>( + dv.lookupOrd( bucket.termOrd() ).utf8ToString(), + bucket.count(), + extractor.extract( localContext ) + ) + ); + } + return buckets; } } @@ -113,40 +149,62 @@ public TypeSelector create(LuceneSearchIndexScope scope, LuceneSearchIndexVal } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private TypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { super( scope, field ); } @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ) ); } else { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { - private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + private CountBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + fromFieldValueConverter ); + } + } + + private static class Builder + extends AbstractBuilder { + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + super( scope, field, aggregation, fromFieldValueConverter ); + } + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); } @Override - public LuceneTextTermsAggregation build() { + public LuceneTextTermsAggregation build() { return new LuceneTextTermsAggregation<>( this ); } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, LuceneSearchAggregation.from( scope, aggregation ), fromFieldValueConverter, + order, minDocCount, maxTermCount ); + } } } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java index a5e8dfa4b42..250387a6fba 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java @@ -96,8 +96,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { builder.queryElementFactory( AggregationTypeKeys.SUM, sumMetricAggregationFactory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, avgMetricAggregationFactory( codec ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java index c4a777fa8b9..08e8311752f 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java @@ -102,8 +102,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { } builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, LuceneAvgNumericFieldAggregation.factory( codec ) ); } diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java new file mode 100644 index 00000000000..f04c116c3d5 --- /dev/null +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.lowlevel.impl; + +import java.util.Collection; +import java.util.function.ToLongFunction; + +import org.hibernate.search.util.common.data.Range; +import org.hibernate.search.util.common.data.RangeBoundInclusion; + +public record EffectiveRange(long min, long max) { + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + } + + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, + ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); + } + + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, + ToLongFunction encoder, + T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; + int i = 0; + for ( Range range : ranges ) { + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max + ); + ++i; + } + return effectiveRanges; + } +} diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index d4ed1241e9a..12f319907b2 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -4,24 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.DoubleValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,37 +86,9 @@ public Double doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, NumericUtils::doubleToSortableLong, - Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 48a7218ac16..333f1799939 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.FloatValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -94,37 +87,9 @@ public Float doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, value -> (long) NumericUtils.floatToSortableInt( value ), - Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index db1a866a90b..e57cb0f0ae0 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.IntValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,31 +86,8 @@ public double sortedDocValueToDouble(long longValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index b8df3919af0..6aab8e3a5d5 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.LongValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,31 +86,8 @@ public Long doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 2b82c7a9807..a25ade3387a 100644 --- a/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -4,7 +4,6 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -12,8 +11,6 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -43,14 +40,7 @@ public interface LuceneNumericDomain { E doubleToTerm(double doubleValue); - Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException; - - Facets createRangeFacetCounts(String absoluteFieldPath, - FacetsCollector facetsCollector, Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException; + EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); diff --git a/build/parents/build/pom.xml b/build/parents/build/pom.xml index 39bc988ac80..7aeb35bb7b1 100644 --- a/build/parents/build/pom.xml +++ b/build/parents/build/pom.xml @@ -448,11 +448,6 @@ lucene-join ${version.org.apache.lucene} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene} - org.apache.lucene lucene-highlighter diff --git a/documentation/src/main/asciidoc/migration/index.adoc b/documentation/src/main/asciidoc/migration/index.adoc index 52b9ec7974b..26a50389cbf 100644 --- a/documentation/src/main/asciidoc/migration/index.adoc +++ b/documentation/src/main/asciidoc/migration/index.adoc @@ -84,6 +84,9 @@ The https://hibernate.org/community/compatibility-policy/#code-categorization[AP in Hibernate Search {hibernateSearchVersion} is, in general, backward-compatible with Hibernate Search {hibernateSearchPreviousStableVersionShort}. +* Metic aggregations `count()` and `countDistinct()` are deprecated in favor of more +descriptive `countValues()` and `countDistinctValues()`, as these aggregations are counting the field values rather than documents. + [[spi]] == SPI diff --git a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc index 2833ced27ba..ad9e9aafff4 100644 --- a/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc +++ b/documentation/src/main/asciidoc/public/reference/_search-dsl-aggregation.adoc @@ -174,14 +174,6 @@ i.e. the terms with the most matching documents appear first. Several other orders are available. -[WARNING] -==== -With the Lucene backend, due to limitations of the current implementation, -using any order other than the default one (by descending count) -may lead to incorrect results. -See https://hibernate.atlassian.net/browse/HSEARCH-3666[HSEARCH-3666] for more information. -==== - You can order entries by ascending term value: // Search 5 anchors backward compatibility @@ -220,6 +212,23 @@ When ordering entries by ascending count in a `terms` aggregation, link:{elasticsearchDocUrl}/search-aggregations-bucket-terms-aggregation.html#search-aggregations-bucket-terms-aggregation-order[hit counts are approximate]. ==== +[[search-dsl-aggregation-terms-value]] +=== Aggregated value + +By default, the aggregated value represents the number of documents that fall into the group of a particular term. +With the `.value(..)` step in aggregation definition, it is now possible to set the aggregated value to something other than the document count. +The `.value(..)` accepts any other aggregation, which will be applied to the documents within the aggregated group. + +.Total price of books per category +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=terms-sum] +---- +<1> Define the path and type of the field whose values should be considered as terms for the aggregation. +<2> Define what the aggregated value should represent, e.g. the sum of all book prices within the genre. +==== + [[search-dsl-aggregation-terms-other]] === Other options @@ -318,6 +327,23 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre See <> for more information. +[[search-dsl-aggregation-range-value]] +=== Aggregated value + +By default, the aggregated value represents the number of documents that fall into particular, defined range . +With the `.value(..)` step in aggregation definition, it is now possible to set the aggregated value to something other than the document count. +The `.value(..)` accepts any other aggregation, which will be applied to the documents within the aggregated group. + +.Total price of books per category +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=range-avg] +---- +<1> Define the path and type of the field whose value ranges for the aggregation. +<2> Define what the aggregated value should represent, e.g. the average rating of all books within the price range. +==== + [[search-dsl-aggregation-range-other]] === Other options @@ -329,14 +355,8 @@ but that can be < Define the target field path to which you want to apply the aggregation function and the expected returned type. ==== -=== Count metric aggregation +=== Count documents metric aggregation + +The `count documents` aggregation counts the number of documents. +While it is usually discouraged to use this aggregation at the root level, +as the result would be equivalent to the count returned by the search results in `SearchResultTotal`, +this aggregation can still be useful in defining aggregation values in other, more complex aggregations like +<> or <>. + +.Count the number of the science fiction books +==== +[source, JAVA, indent=0, subs="+callouts"] +---- +include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java[tags=count-documents] +---- +<1> Apply the document count aggregation. For this function a `Long.class` value is always returned. +==== + +=== Count values metric aggregation -The `count` aggregation counts the number of documents in which the field is not empty. +The `count values` aggregation counts the number of non-empty field values. +This aggregation mostly make sense when the aggregated field is multivalued. +For single-valued fields this aggregation would result in the number of documents where the aggregated field is present. .Count the number of the science fiction books with prices ==== @@ -403,11 +442,11 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre <1> Define the target field path to which you want to apply the aggregation function. For this function a `Long.class` value is always returned. ==== -=== Count distinct metric aggregation +=== Count distinct values metric aggregation -The `count distinct` aggregation counts the number of unique field values. +The `count distinct values` aggregation counts the number of unique field values. -.Count anytime the price field has a different value among all the science fiction books +.Count the number of all different price value among all the science fiction books ==== [source, JAVA, indent=0, subs="+callouts"] ---- @@ -418,7 +457,7 @@ include::{sourcedir}/org/hibernate/search/documentation/search/aggregation/Aggre === Avg metric aggregation -The `avg` aggregation calculates the average value of a given numeric or temporal field among the selected documents. +The `avg` aggregation calculates the average value of a given numeric or temporal field among the matched documents. .Compute the average price of all science fiction books ==== diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java index fb1e2ef83a1..26a6cd4c373 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/AggregationDslIT.java @@ -248,6 +248,141 @@ void terms() { entry( Genre.SCIENCE_FICTION, 3L ) ); } ); + + withinSearchSession( searchSession -> { + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( countsByPriceKey, f -> f.terms() + .field( "price", Double.class ) + .orderByCountAscending() ) + .fetch( 20 ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + assertThat( countsByPrice ) + .containsExactly( + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) + ); + } ); + } + + @Test + void terms_value() { + withinSearchSession( searchSession -> { + // tag::terms-sum[] + AggregationKey> sumByCategoryKey = AggregationKey.of( "sumByCategory" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + sumByCategoryKey, f -> f.terms() + .field( "genre", Genre.class ) // <1> + .value( f.sum().field( "price", Double.class ) ) // <2> + ) + .fetch( 20 ); + Map sumByPrice = result.aggregation( sumByCategoryKey ); + // end::terms-sum[] + assertThat( sumByPrice ) + .containsExactly( + entry( Genre.SCIENCE_FICTION, 60.97 ), + entry( Genre.CRIME_FICTION, 7.99 ) + ); + } ); + + withinSearchSession( searchSession -> { + // tag::terms-count[] + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.terms() + .field( "price", Double.class ) // <1> + .value( f.countDocuments() ) // <4> + ) + .fetch( 20 ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + // end::terms-count[] + assertThat( countsByPrice ) + .containsExactly( + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) + ); + } ); + + withinSearchSession( searchSession -> { + // tag::terms-count-implicit[] + AggregationKey> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.terms() + .field( "price", Double.class ) // <1> + ) + .fetch( 20 ); + Map countsByPrice = result.aggregation( countsByPriceKey ); + // end::terms-count-implicit[] + assertThat( countsByPrice ) + .containsExactly( + entry( 7.99, 1L ), + entry( 15.99, 1L ), + entry( 19.99, 1L ), + entry( 24.99, 1L ) + ); + } ); + } + + @Test + void range_value() { + withinSearchSession( searchSession -> { + // tag::range-avg[] + AggregationKey, Double>> avgRatingByPriceKey = AggregationKey.of( "avgRatingByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + avgRatingByPriceKey, f -> f.range() + .field( "price", Double.class ) // <1> + .range( 0.0, 10.0 ) + .range( 10.0, 20.0 ) + .range( 20.0, null ) + .value( f.avg().field( "ratings", Double.class, ValueModel.RAW ) ) // <2> + ) + .fetch( 20 ); + Map, Double> countsByPrice = result.aggregation( avgRatingByPriceKey ); + // end::range-avg[] + assertThat( countsByPrice ) + .containsExactly( + entry( Range.canonical( 0.0, 10.0 ), 4.0 ), + entry( Range.canonical( 10.0, 20.0 ), 3.6 ), + entry( Range.canonical( 20.0, null ), 3.2 ) + ); + } ); + + withinSearchSession( searchSession -> { + // tag::range-count[] + AggregationKey, Long>> countsByPriceKey = AggregationKey.of( "countsByPrice" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.matchAll() ) + .aggregation( + countsByPriceKey, f -> f.range() + .field( "price", Double.class ) // <1> + .range( 0.0, 10.0 ) // <2> + .range( 10.0, 20.0 ) + .range( 20.0, null ) // <3> + .value( f.countDocuments() ) // <4> + ) + .fetch( 20 ); + Map, Long> countsByPrice = result.aggregation( countsByPriceKey ); + // end::range-count[] + assertThat( countsByPrice ) + .containsExactly( + entry( Range.canonical( 0.0, 10.0 ), 1L ), + entry( Range.canonical( 10.0, 20.0 ), 2L ), + entry( Range.canonical( 20.0, null ), 1L ) + ); + } ); } @Test @@ -447,8 +582,8 @@ void sum() { .aggregation( sumPricesKey, f -> f.sum().field( "price", Double.class ) ) // <1> .fetch( 20 ); Double sumPrices = result.aggregation( sumPricesKey ); - assertThat( sumPrices ).isEqualTo( 60.97 ); // end::sums[] + assertThat( sumPrices ).isEqualTo( 60.97 ); } ); } @@ -462,8 +597,8 @@ void min() { .aggregation( oldestReleaseKey, f -> f.min().field( "releaseDate", Date.class ) ) // <1> .fetch( 20 ); Date oldestRelease = result.aggregation( oldestReleaseKey ); - assertThat( oldestRelease ).isEqualTo( Date.valueOf( "1950-12-02" ) ); // end::min[] + assertThat( oldestRelease ).isEqualTo( Date.valueOf( "1950-12-02" ) ); } ); } @@ -477,38 +612,53 @@ void max() { .aggregation( mostRecentReleaseKey, f -> f.max().field( "releaseDate", Date.class ) ) // <1> .fetch( 20 ); Date mostRecentRelease = result.aggregation( mostRecentReleaseKey ); - // end::max[] + assertThat( mostRecentRelease ).isEqualTo( Date.valueOf( "1983-01-01" ) ); } ); } @Test - void count() { + void countDocuments() { withinSearchSession( searchSession -> { - // tag::count[] - AggregationKey countPricesKey = AggregationKey.of( "countPrices" ); + // tag::count-documents[] + AggregationKey countBooksKey = AggregationKey.of( "countBooks" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) - .aggregation( countPricesKey, f -> f.count().field( "price" ) ) // <1> + .aggregation( countBooksKey, f -> f.countDocuments() ) // <1> .fetch( 20 ); - Long countPrices = result.aggregation( countPricesKey ); + Long countPrices = result.aggregation( countBooksKey ); + // end::count-documents[] assertThat( countPrices ).isEqualTo( 3L ); + } ); + } + + @Test + void countValues() { + withinSearchSession( searchSession -> { + // tag::count[] + AggregationKey countRatingsKey = AggregationKey.of( "countRatings" ); + SearchResult result = searchSession.search( Book.class ) + .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) + .aggregation( countRatingsKey, f -> f.countValues().field( "ratings" ) ) // <1> + .fetch( 20 ); + Long countPrices = result.aggregation( countRatingsKey ); // end::count[] + assertThat( countPrices ).isEqualTo( 15L ); } ); } @Test - void countDistinct() { + void countDistinctValues() { withinSearchSession( searchSession -> { // tag::count-distinct[] AggregationKey countDistinctPricesKey = AggregationKey.of( "countDistinctPrices" ); SearchResult result = searchSession.search( Book.class ) .where( f -> f.match().field( "genre" ).matching( Genre.SCIENCE_FICTION ) ) - .aggregation( countDistinctPricesKey, f -> f.countDistinct().field( "price" ) ) // <1> + .aggregation( countDistinctPricesKey, f -> f.countDistinctValues().field( "price" ) ) // <1> .fetch( 20 ); Long countDistinctPrices = result.aggregation( countDistinctPricesKey ); - assertThat( countDistinctPrices ).isEqualTo( 3L ); // end::count-distinct[] + assertThat( countDistinctPrices ).isEqualTo( 3L ); } ); } @@ -542,6 +692,7 @@ private void initData() { book1.setPrice( 24.99 ); book1.setGenre( Genre.SCIENCE_FICTION ); book1.setReleaseDate( Date.valueOf( "1950-12-02" ) ); + book1.setRatings( List.of( 5, 5, 4, 2, 0 ) ); addEdition( book1, "Mass Market Paperback, 1st Edition", 9.99 ); addEdition( book1, "Kindle", 9.99 ); @@ -551,6 +702,7 @@ private void initData() { book2.setPrice( 19.99 ); book2.setGenre( Genre.SCIENCE_FICTION ); book2.setReleaseDate( Date.valueOf( "1953-10-01" ) ); + book2.setRatings( List.of( 5, 5, 3, 3, 5 ) ); addEdition( book2, "Mass Market Paperback, 12th Edition", 4.99 ); addEdition( book2, "Kindle", 19.99 ); @@ -560,6 +712,7 @@ private void initData() { book3.setPrice( 15.99 ); book3.setGenre( Genre.SCIENCE_FICTION ); book3.setReleaseDate( Date.valueOf( "1983-01-01" ) ); + book3.setRatings( List.of( 3, 3, 3, 3, 3 ) ); addEdition( book3, "Mass Market Paperback, 59th Edition", 3.99 ); addEdition( book3, "Kindle", 5.99 ); @@ -569,6 +722,7 @@ private void initData() { book4.setPrice( 7.99 ); book4.setGenre( Genre.CRIME_FICTION ); book4.setReleaseDate( Date.valueOf( "2008-02-05" ) ); + book4.setRatings( List.of( 4, 4, 4, 4, 4 ) ); addEdition( book4, "Mass Market Paperback, 2nd Edition", 10.99 ); addEdition( book4, "Kindle", 12.99 ); diff --git a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java index 6c58737d78c..458b5e41eee 100644 --- a/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java +++ b/documentation/src/test/java/org/hibernate/search/documentation/search/aggregation/Book.java @@ -9,6 +9,7 @@ import java.util.List; import jakarta.persistence.CascadeType; +import jakarta.persistence.ElementCollection; import jakarta.persistence.Entity; import jakarta.persistence.Id; import jakarta.persistence.OneToMany; @@ -42,6 +43,10 @@ public class Book { @GenericField(aggregable = Aggregable.YES) private Date releaseDate; + @GenericField(aggregable = Aggregable.YES) + @ElementCollection + private List ratings; + @OneToMany(mappedBy = "book", cascade = CascadeType.ALL) @OrderColumn @IndexedEmbedded(structure = ObjectStructure.NESTED) @@ -97,4 +102,12 @@ public List getEditions() { public void setEditions(List editions) { this.editions = editions; } + + public List getRatings() { + return ratings; + } + + public void setRatings(List ratings) { + this.ratings = ratings; + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java b/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java index e020d46c071..70a7811f6a0 100644 --- a/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java +++ b/engine/src/main/java/org/hibernate/search/engine/backend/types/IndexFieldTraits.java @@ -103,8 +103,19 @@ private Aggregations() { public static final String SUM = "aggregation:sum"; public static final String MIN = "aggregation:min"; public static final String MAX = "aggregation:max"; - public static final String COUNT = "aggregation:count"; - public static final String COUNT_DISTINCT = "aggregation:countDistinct"; + /** + * @deprecated Use {@link #COUNT_VALUES} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + public static final String COUNT = "aggregation:countValues"; + /** + * @deprecated Use {@link #COUNT_DISTINCT_VALUES} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + public static final String COUNT_DISTINCT = "aggregation:countDistinctValues"; + public static final String COUNT_VALUES = "aggregation:countValues"; + public static final String COUNT_DISTINCT_VALUES = "aggregation:countDistinctValues"; + public static final String COUNT_DOCUMENTS = "aggregation:countDocuments"; public static final String AVG = "aggregation:avg"; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java similarity index 69% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java index ab189dde2b3..a5661cc6e35 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationFieldStep.java @@ -7,7 +7,7 @@ import java.util.function.Function; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.util.common.annotation.Incubating; /** @@ -17,24 +17,25 @@ * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. */ @Incubating -public interface CountDistinctAggregationFieldStep> { +public interface CountDistinctValuesAggregationFieldStep> { /** - * Target the given field in the count distinct aggregation. + * Target the given field in the count distinct values aggregation. * * @param fieldPath The path to the index field to aggregate. * @return The next step. */ - CountDistinctAggregationOptionsStep field(String fieldPath); + CountDistinctValuesAggregationOptionsStep field(String fieldPath); /** - * Target the given field in the avg aggregation. + * Target the given field in the count distinct values aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @return The next step. */ @Incubating - default CountDistinctAggregationOptionsStep field(CountAggregationFieldReference fieldReference) { + default CountDistinctValuesAggregationOptionsStep field( + CountValuesAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath() ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java similarity index 86% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java index 2aca785be30..10eeda388ca 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctAggregationOptionsStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDistinctValuesAggregationOptionsStep.java @@ -17,9 +17,9 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. */ @Incubating -public interface CountDistinctAggregationOptionsStep< +public interface CountDistinctValuesAggregationOptionsStep< SR, - S extends CountDistinctAggregationOptionsStep, + S extends CountDistinctValuesAggregationOptionsStep, PDF extends TypedSearchPredicateFactory> extends AggregationFinalStep, AggregationFilterStep { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java new file mode 100644 index 00000000000..dd92e01c396 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountDocumentsAggregationFinalStep.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import org.hibernate.search.util.common.annotation.Incubating; + +/** + * The initial and final step in a "count documents" aggregation definition. + */ +@Incubating +public interface CountDocumentsAggregationFinalStep extends AggregationFinalStep { + +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java similarity index 64% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java index a2ccf66f9f9..0f6a699173e 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationFieldStep.java @@ -7,34 +7,34 @@ import java.util.function.Function; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.util.common.annotation.Incubating; /** - * The initial step in a "count" aggregation definition, where the target field can be set. + * The initial step in a "count values" aggregation definition, where the target field can be set. * * @param Scope root type. * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. */ @Incubating -public interface CountAggregationFieldStep> { +public interface CountValuesAggregationFieldStep> { /** - * Target the given field in the count aggregation. + * Target the given field in the count values aggregation. * * @param fieldPath The path to the index field to aggregate. * @return The next step. */ - CountAggregationOptionsStep field(String fieldPath); + CountValuesAggregationOptionsStep field(String fieldPath); /** - * Target the given field in the avg aggregation. + * Target the given field in the count values aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @return The next step. */ @Incubating - default CountAggregationOptionsStep field(CountAggregationFieldReference fieldReference) { + default CountValuesAggregationOptionsStep field(CountValuesAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath() ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java similarity index 87% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java index 1e960e720f5..f11ce3cd017 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountAggregationOptionsStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/CountValuesAggregationOptionsStep.java @@ -17,9 +17,9 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. */ @Incubating -public interface CountAggregationOptionsStep< +public interface CountValuesAggregationOptionsStep< SR, - S extends CountAggregationOptionsStep, + S extends CountValuesAggregationOptionsStep, PDF extends TypedSearchPredicateFactory> extends AggregationFinalStep, AggregationFilterStep { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java index 35d76c953f1..dfbaad2d0c5 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/ExtendedSearchAggregationFactory.java @@ -43,11 +43,28 @@ public interface ExtendedSearchAggregationFactory< @Override MaxAggregationFieldStep max(); + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) @Override - CountAggregationFieldStep count(); + default CountValuesAggregationFieldStep count() { + return countValues(); + } @Override - CountDistinctAggregationFieldStep countDistinct(); + CountValuesAggregationFieldStep countValues(); + + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Override + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } + + @Override + CountDistinctValuesAggregationFieldStep countDistinctValues(); + + @Override + CountDocumentsAggregationFinalStep countDocuments(); @Override AvgAggregationFieldStep avg(); diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java index 3d2c8bf4977..c26d6c8e2d7 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MaxAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface MaxAggregationFieldStep MaxAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the max aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java index 22e9a6991fa..4e5447b07fe 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/MinAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface MinAggregationFieldStep MinAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the min aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java index 6c4c05a3a63..e717e361954 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationFieldStep.java @@ -26,7 +26,7 @@ public interface RangeAggregationFieldStep The type of field values. * @return The next step. */ - default RangeAggregationRangeStep field(String fieldPath, Class type) { + default RangeAggregationRangeStep field(String fieldPath, Class type) { return field( fieldPath, type, ValueModel.MAPPING ); } @@ -42,7 +42,7 @@ public interface RangeAggregationFieldStep RangeAggregationRangeStep field(String fieldPath, Class type, + default RangeAggregationRangeStep field(String fieldPath, Class type, org.hibernate.search.engine.search.common.ValueConvert convert) { return field( fieldPath, type, org.hibernate.search.engine.search.common.ValueConvert.toValueModel( convert ) ); @@ -58,7 +58,7 @@ public interface RangeAggregationFieldStep RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel); + RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel); /** * Target the given field in the range aggregation. @@ -67,7 +67,8 @@ public interface RangeAggregationFieldStep The type of field values. * @return The next step. */ - default RangeAggregationRangeStep field(RangeAggregationFieldReference fieldReference) { + default RangeAggregationRangeStep field( + RangeAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath(), fieldReference.aggregationType(), fieldReference.valueModel() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java index a9ec19feda3..2094836036c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeMoreStep.java @@ -20,14 +20,16 @@ * @param The type of factory used to create predicates in {@link #filter(Function)}. * @param The type of the next step. * @param The type of the targeted field. + * @param The type of the aggregated value. */ public interface RangeAggregationRangeMoreStep< SR, - S extends RangeAggregationRangeMoreStep, - N extends RangeAggregationOptionsStep, Long>>, + S extends RangeAggregationRangeMoreStep, + N extends RangeAggregationOptionsStep, A>>, PDF extends TypedSearchPredicateFactory, - F> - extends RangeAggregationOptionsStep, Long>>, - RangeAggregationRangeStep { - + F, + A> + extends RangeAggregationOptionsStep, A>>, + RangeAggregationRangeStep, + RangeAggregationValueStep { } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java index 42e99fc17dd..c1a6c1f8e0e 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationRangeStep.java @@ -17,12 +17,14 @@ * @param The type of the next step. * @param The type of factory used to create predicates in {@link AggregationFilterStep#filter(Function)}. * @param The type of the targeted field. + * @param The type of the aggregated value. */ public interface RangeAggregationRangeStep< SR, - N extends RangeAggregationRangeMoreStep, + N extends RangeAggregationRangeMoreStep, PDF extends TypedSearchPredicateFactory, - F> { + F, + A> { /** * Add a bucket for the range {@code [lowerBound, upperBound)} (lower bound included, upper bound excluded), diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java new file mode 100644 index 00000000000..6b7bce23f0c --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/RangeAggregationValueStep.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import java.util.Map; +import java.util.function.Function; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; +import org.hibernate.search.util.common.annotation.Incubating; +import org.hibernate.search.util.common.data.Range; + +/** + * The step in a "range" aggregation definition where the aggregation value for the range can be set. + * + * @param Scope root type. + * @param The type of factory used to create predicates in {@link RangeAggregationOptionsStep#filter(Function)}. + * @param The type of the targeted field. + */ +@Incubating +public interface RangeAggregationValueStep< + SR, + PDF extends TypedSearchPredicateFactory, + F> { + /** + * Specify which aggregation to apply to the documents within the range. + *

+ * This allows to "group" the documents by "ranges" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents within each range. + * @return The next step in range aggregation definition. + * @param The type of the aggregated results within a range. + */ + @Incubating + RangeAggregationOptionsStep, T>> value(SearchAggregation aggregation); + + /** + * Specify which aggregation to apply to the documents within the range. + *

+ * This allows to "group" the documents by "ranges" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents within each range. + * @return The next step in range aggregation definition. + * @param The type of the aggregated results within a range. + */ + @Incubating + default RangeAggregationOptionsStep, T>> value(AggregationFinalStep aggregation) { + return value( aggregation.toAggregation() ); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java index d2cac72618b..d4066a83569 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SearchAggregationFactory.java @@ -72,6 +72,8 @@ public interface SearchAggregationFactory { /** * Perform the sum metric aggregation. + *

+ * Sums up the field values. * * @return The next step. */ @@ -80,6 +82,8 @@ public interface SearchAggregationFactory { /** * Perform the min metric aggregation. + *

+ * Provides the minimum value among the field values. * * @return The next step. */ @@ -88,6 +92,8 @@ public interface SearchAggregationFactory { /** * Perform the max metric aggregation. + *

+ * Provides the maximum value among the field values. * * @return The next step. */ @@ -95,23 +101,73 @@ public interface SearchAggregationFactory { MaxAggregationFieldStep max(); /** - * Perform the count metric aggregation. + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. + * + * @return The next step. + * @deprecated Use {@link #countValues()} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + default CountValuesAggregationFieldStep count() { + return countValues(); + } + + /** + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. + * + * @return The next step. + */ + CountValuesAggregationFieldStep countValues(); + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. + * + * @return The next step. + * @deprecated Use {@link #countDistinctValues()} instead. + */ + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. * * @return The next step. */ @Incubating - CountAggregationFieldStep count(); + CountDistinctValuesAggregationFieldStep countDistinctValues(); /** - * Perform the count distinct metric aggregation. + * Perform the count documents metric aggregation. + *

+ * Counts the number of matched documents. + * This aggregation may be useful for building {@link #range()} or {@link #terms()} aggregations. * * @return The next step. */ @Incubating - CountDistinctAggregationFieldStep countDistinct(); + CountDocumentsAggregationFinalStep countDocuments(); /** - * Perform the avg metric aggregation. + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. * * @return the next step. */ diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java index 9d7e1aba7c5..d4570a76200 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/SumAggregationFieldStep.java @@ -45,7 +45,7 @@ public interface SumAggregationFieldStep SumAggregationOptionsStep field(String fieldPath, Class type, ValueModel valueModel); /** - * Target the given field in the avg aggregation. + * Target the given field in the sum aggregation. * * @param fieldReference The field reference representing a definition of the index field to aggregate. * @param The type of field values. diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java index 6d4ae9fd595..f61a80db2d4 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationFieldStep.java @@ -27,7 +27,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationOptionsStep> field(String fieldPath, Class type) { + default TermsAggregationValueStep> field(String fieldPath, Class type) { return field( fieldPath, type, ValueModel.MAPPING ); } @@ -43,7 +43,7 @@ public interface TermsAggregationFieldStep TermsAggregationOptionsStep> field(String fieldPath, Class type, + default TermsAggregationValueStep> field(String fieldPath, Class type, org.hibernate.search.engine.search.common.ValueConvert convert) { return field( fieldPath, type, org.hibernate.search.engine.search.common.ValueConvert.toValueModel( convert ) ); @@ -59,7 +59,7 @@ public interface TermsAggregationFieldStep TermsAggregationOptionsStep> field(String fieldPath, Class type, + TermsAggregationValueStep> field(String fieldPath, Class type, ValueModel valueModel); /** @@ -69,7 +69,7 @@ public interface TermsAggregationFieldStep The type of field values. * @return The next step. */ - default TermsAggregationOptionsStep> field( + default TermsAggregationValueStep> field( TermsAggregationFieldReference fieldReference) { return field( fieldReference.absolutePath(), fieldReference.aggregationType(), fieldReference.valueModel() ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java new file mode 100644 index 00000000000..0e4420e8559 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TermsAggregationValueStep.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl; + +import java.util.Map; +import java.util.function.Function; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; +import org.hibernate.search.util.common.annotation.Incubating; + +/** + * The step in a "terms" aggregation definition where the aggregation value for the term can be set. + * + * @param Scope root type. + * @param The type of factory used to create predicates in {@link TermsAggregationOptionsStep#filter(Function)}. + * @param The type of the targeted field. + */ +@Incubating +public interface TermsAggregationValueStep< + SR, + S extends TermsAggregationOptionsStep, + PDF extends TypedSearchPredicateFactory, + F, + A> extends TermsAggregationOptionsStep { + + /** + * Specify which aggregation to apply to the documents with same terms. + *

+ * This allows to "group" the documents by "terms" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents for each term. + * @return The next step in terms aggregation definition. + * @param The type of the aggregated results for a term. + */ + @Incubating + TermsAggregationOptionsStep> value(SearchAggregation aggregation); + + /** + * Specify which aggregation to apply to the documents with same terms. + *

+ * This allows to "group" the documents by "terms" and then apply one of the aggregations from {@link SearchAggregationFactory} + * to the documents in that group. + * + * @param aggregation The aggregation to apply to the documents for each term. + * @return The next step in terms aggregation definition. + * @param The type of the aggregated results for a term. + */ + @Incubating + default TermsAggregationOptionsStep> value(AggregationFinalStep aggregation) { + return value( aggregation.toAggregation() ); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java index 517e72a8e4d..3139f2eab7f 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/TypedSearchAggregationFactory.java @@ -73,6 +73,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the sum metric aggregation. + *

+ * Sums up the field values. * * @return The next step. */ @@ -82,6 +84,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the min metric aggregation. + *

+ * Provides the minimum value among the field values. * * @return The next step. */ @@ -91,6 +95,8 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact /** * Perform the max metric aggregation. + *

+ * Provides the maximum value among the field values. * * @return The next step. */ @@ -98,26 +104,83 @@ public interface TypedSearchAggregationFactory extends SearchAggregationFact @Incubating MaxAggregationFieldStep max(); + /** - * Perform the count metric aggregation. + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. + * + * @return The next step. + * @deprecated Use {@link #countValues()} instead. + */ + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + @Override + default CountValuesAggregationFieldStep count() { + return countValues(); + } + + /** + * Perform the count values metric aggregation. + *

+ * Counts the number of non-empty field values. + *

+ * As this aggregation counts the field values for a multi-valued fields the resulting count + * may be greater as the number of the matched documents. * * @return The next step. */ @Override @Incubating - CountAggregationFieldStep count(); + CountValuesAggregationFieldStep countValues(); /** - * Perform the count distinct metric aggregation. + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. + * + * @return The next step. + * @deprecated Use {@link #countDistinctValues()} instead. + */ + @SuppressWarnings("removal") + @Deprecated(since = "8.1", forRemoval = true) + @Incubating + @Override + default CountDistinctValuesAggregationFieldStep countDistinct() { + return countDistinctValues(); + } + + /** + * Perform the count distinct values metric aggregation. + *

+ * Counts the number of unique field values. + * + * @return The next step. + */ + @Override + @Incubating + CountDistinctValuesAggregationFieldStep countDistinctValues(); + + /** + * Perform the count documents metric aggregation. + *

+ * Counts the number of matched documents. + * This aggregation may be useful for building {@link #range()} or {@link #terms()} aggregations. * * @return The next step. */ @Override @Incubating - CountDistinctAggregationFieldStep countDistinct(); + CountDocumentsAggregationFinalStep countDocuments(); /** * Perform the avg metric aggregation. + *

+ * Calculates the average value of a given numeric or temporal field among the matched documents. * * @return the next step. */ diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java similarity index 62% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java index fd67730f29f..d98d0ffbaeb 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationFieldStepImpl.java @@ -4,25 +4,25 @@ */ package org.hibernate.search.engine.search.aggregation.dsl.impl; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -public class CountDistinctAggregationFieldStepImpl> - implements CountDistinctAggregationFieldStep { +public class CountDistinctValuesAggregationFieldStepImpl> + implements CountDistinctValuesAggregationFieldStep { private final SearchAggregationDslContext dslContext; - public CountDistinctAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { + public CountDistinctValuesAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { this.dslContext = dslContext; } @Override - public CountDistinctAggregationOptionsStep field(String fieldPath) { + public CountDistinctValuesAggregationOptionsStep field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() - .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_DISTINCT ); - return new CountDistinctAggregationOptionsStepImpl<>( builder, dslContext ); + .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_DISTINCT_VALUES ); + return new CountDistinctValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java similarity index 72% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java index 293349b2e5a..f776f109a61 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDistinctValuesAggregationOptionsStepImpl.java @@ -7,33 +7,33 @@ import java.util.function.Function; import org.hibernate.search.engine.search.aggregation.SearchAggregation; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -class CountDistinctAggregationOptionsStepImpl> - implements CountDistinctAggregationOptionsStep, PDF> { +class CountDistinctValuesAggregationOptionsStepImpl> + implements CountDistinctValuesAggregationOptionsStep, PDF> { private final SearchFilterableAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - CountDistinctAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, + CountDistinctValuesAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public CountDistinctAggregationOptionsStepImpl filter( + public CountDistinctValuesAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); return filter( predicate ); } @Override - public CountDistinctAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public CountDistinctValuesAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java new file mode 100644 index 00000000000..ef32043df96 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountDocumentsAggregationFinalStepImpl.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.dsl.impl; + +import org.hibernate.search.engine.search.aggregation.SearchAggregation; +import org.hibernate.search.engine.search.aggregation.dsl.CountDocumentsAggregationFinalStep; +import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; + +public class CountDocumentsAggregationFinalStepImpl + implements CountDocumentsAggregationFinalStep { + private final SearchAggregationDslContext dslContext; + + public CountDocumentsAggregationFinalStepImpl(SearchAggregationDslContext dslContext) { + this.dslContext = dslContext; + } + + @Override + public SearchAggregation toAggregation() { + return dslContext.scope() + .rootQueryElement( AggregationTypeKeys.COUNT_DOCUMENTS ).type().build(); + } +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java similarity index 51% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java index 4a2ee3c98dd..3f890b8b6d3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationFieldStepImpl.java @@ -4,25 +4,25 @@ */ package org.hibernate.search.engine.search.aggregation.dsl.impl; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -public class CountAggregationFieldStepImpl> - implements CountAggregationFieldStep { +public class CountValuesAggregationFieldStepImpl> + implements CountValuesAggregationFieldStep { private final SearchAggregationDslContext dslContext; - public CountAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { + public CountValuesAggregationFieldStepImpl(SearchAggregationDslContext dslContext) { this.dslContext = dslContext; } @Override - public CountAggregationOptionsStep field(String fieldPath) { + public CountValuesAggregationOptionsStep field(String fieldPath) { SearchFilterableAggregationBuilder builder = dslContext.scope() - .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT ); - return new CountAggregationOptionsStepImpl<>( builder, dslContext ); + .fieldQueryElement( fieldPath, AggregationTypeKeys.COUNT_VALUES ); + return new CountValuesAggregationOptionsStepImpl<>( builder, dslContext ); } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java similarity index 71% rename from engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java rename to engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java index 58cb8bad195..b1547ef39f3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/CountValuesAggregationOptionsStepImpl.java @@ -7,33 +7,33 @@ import java.util.function.Function; import org.hibernate.search.engine.search.aggregation.SearchAggregation; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationOptionsStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.SearchFilterableAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.engine.search.predicate.dsl.PredicateFinalStep; import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; -class CountAggregationOptionsStepImpl> - implements CountAggregationOptionsStep, PDF> { +class CountValuesAggregationOptionsStepImpl> + implements CountValuesAggregationOptionsStep, PDF> { private final SearchFilterableAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - CountAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, + CountValuesAggregationOptionsStepImpl(SearchFilterableAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public CountAggregationOptionsStepImpl filter( + public CountValuesAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); return filter( predicate ); } @Override - public CountAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public CountValuesAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java index dcfb149186d..f696fa7337d 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationFieldStepImpl.java @@ -22,11 +22,11 @@ public RangeAggregationFieldStepImpl(SearchAggregationDslContext RangeAggregationRangeStep field(String fieldPath, Class type, + public RangeAggregationRangeStep field(String fieldPath, Class type, ValueModel valueModel) { Contracts.assertNotNull( fieldPath, "fieldPath" ); Contracts.assertNotNull( type, "type" ); - RangeAggregationBuilder builder = dslContext.scope() + RangeAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.RANGE ).type( type, valueModel ); return new RangeAggregationRangeStepImpl<>( builder, dslContext ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java index ff5e3c575f3..cd18a0f8580 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/RangeAggregationRangeStepImpl.java @@ -19,31 +19,32 @@ import org.hibernate.search.util.common.data.Range; import org.hibernate.search.util.common.impl.Contracts; -class RangeAggregationRangeStepImpl, F> - implements RangeAggregationRangeStep, PDF, F>, +class RangeAggregationRangeStepImpl, F, A> + implements RangeAggregationRangeStep, PDF, F, A>, RangeAggregationRangeMoreStep, - RangeAggregationRangeStepImpl, + RangeAggregationRangeStepImpl, + RangeAggregationRangeStepImpl, PDF, - F> { - private final RangeAggregationBuilder builder; + F, + A> { + private final RangeAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - RangeAggregationRangeStepImpl(RangeAggregationBuilder builder, + RangeAggregationRangeStepImpl(RangeAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public RangeAggregationRangeStepImpl range(Range range) { + public RangeAggregationRangeStepImpl range(Range range) { Contracts.assertNotNull( range, "range" ); builder.range( range ); return this; } @Override - public RangeAggregationRangeStepImpl ranges(Collection> ranges) { + public RangeAggregationRangeStepImpl ranges(Collection> ranges) { Contracts.assertNotNull( ranges, "ranges" ); for ( Range range : ranges ) { range( range ); @@ -52,7 +53,7 @@ public RangeAggregationRangeStepImpl ranges(Collection filter( + public RangeAggregationRangeStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); @@ -60,13 +61,18 @@ public RangeAggregationRangeStepImpl filter( } @Override - public RangeAggregationRangeStepImpl filter(SearchPredicate searchPredicate) { + public RangeAggregationRangeStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } @Override - public SearchAggregation, Long>> toAggregation() { + public SearchAggregation, A>> toAggregation() { return builder.build(); } + + @Override + public RangeAggregationRangeStepImpl value(SearchAggregation aggregation) { + return new RangeAggregationRangeStepImpl<>( builder.withValue( aggregation ), dslContext ); + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java index 6f4d7ac29af..8b37ad8bde2 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationFieldStepImpl.java @@ -7,7 +7,7 @@ import java.util.Map; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; @@ -24,11 +24,11 @@ public TermsAggregationFieldStepImpl(SearchAggregationDslContext TermsAggregationOptionsStep> field(String fieldPath, Class type, + public TermsAggregationValueStep> field(String fieldPath, Class type, ValueModel valueModel) { Contracts.assertNotNull( fieldPath, "fieldPath" ); Contracts.assertNotNull( type, "type" ); - TermsAggregationBuilder builder = dslContext.scope() + TermsAggregationBuilder builder = dslContext.scope() .fieldQueryElement( fieldPath, AggregationTypeKeys.TERMS ).type( type, valueModel ); return new TermsAggregationOptionsStepImpl<>( builder, dslContext ); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java index 687a58cb583..3c7fef57e17 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/impl/TermsAggregationOptionsStepImpl.java @@ -9,6 +9,7 @@ import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationOptionsStep; +import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationValueStep; import org.hibernate.search.engine.search.aggregation.dsl.spi.SearchAggregationDslContext; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.predicate.SearchPredicate; @@ -16,57 +17,57 @@ import org.hibernate.search.engine.search.predicate.dsl.TypedSearchPredicateFactory; import org.hibernate.search.util.common.impl.Contracts; -class TermsAggregationOptionsStepImpl, F> - implements TermsAggregationOptionsStep, PDF, F, Map> { - private final TermsAggregationBuilder builder; +class TermsAggregationOptionsStepImpl, F, V> + implements TermsAggregationValueStep, PDF, F, Map> { + private final TermsAggregationBuilder builder; private final SearchAggregationDslContext dslContext; - TermsAggregationOptionsStepImpl(TermsAggregationBuilder builder, + TermsAggregationOptionsStepImpl(TermsAggregationBuilder builder, SearchAggregationDslContext dslContext) { this.builder = builder; this.dslContext = dslContext; } @Override - public TermsAggregationOptionsStepImpl orderByCountDescending() { + public TermsAggregationOptionsStepImpl orderByCountDescending() { builder.orderByCountDescending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByCountAscending() { + public TermsAggregationOptionsStepImpl orderByCountAscending() { builder.orderByCountAscending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByTermAscending() { + public TermsAggregationOptionsStepImpl orderByTermAscending() { builder.orderByTermAscending(); return this; } @Override - public TermsAggregationOptionsStepImpl orderByTermDescending() { + public TermsAggregationOptionsStepImpl orderByTermDescending() { builder.orderByTermDescending(); return this; } @Override - public TermsAggregationOptionsStepImpl minDocumentCount(int minDocumentCount) { + public TermsAggregationOptionsStepImpl minDocumentCount(int minDocumentCount) { Contracts.assertPositiveOrZero( minDocumentCount, "minDocumentCount" ); builder.minDocumentCount( minDocumentCount ); return this; } @Override - public TermsAggregationOptionsStepImpl maxTermCount(int maxTermCount) { + public TermsAggregationOptionsStepImpl maxTermCount(int maxTermCount) { Contracts.assertStrictlyPositive( maxTermCount, "maxTermCount" ); builder.maxTermCount( maxTermCount ); return this; } @Override - public TermsAggregationOptionsStepImpl filter( + public TermsAggregationOptionsStepImpl filter( Function clauseContributor) { SearchPredicate predicate = clauseContributor.apply( dslContext.predicateFactory() ).toPredicate(); @@ -74,13 +75,18 @@ public TermsAggregationOptionsStepImpl filter( } @Override - public TermsAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { + public TermsAggregationOptionsStepImpl filter(SearchPredicate searchPredicate) { builder.filter( searchPredicate ); return this; } @Override - public SearchAggregation> toAggregation() { + public SearchAggregation> toAggregation() { return builder.build(); } + + @Override + public TermsAggregationOptionsStep> value(SearchAggregation aggregation) { + return new TermsAggregationOptionsStepImpl<>( builder.withValue( aggregation ), dslContext ); + } } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java index 61c1d352c07..582964579c2 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/dsl/spi/AbstractSearchAggregationFactory.java @@ -9,8 +9,9 @@ import org.hibernate.search.engine.common.dsl.spi.DslExtensionState; import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.aggregation.dsl.AvgAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountAggregationFieldStep; -import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDistinctValuesAggregationFieldStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountDocumentsAggregationFinalStep; +import org.hibernate.search.engine.search.aggregation.dsl.CountValuesAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.ExtendedSearchAggregationFactory; import org.hibernate.search.engine.search.aggregation.dsl.MaxAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.MinAggregationFieldStep; @@ -19,8 +20,9 @@ import org.hibernate.search.engine.search.aggregation.dsl.SumAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.TermsAggregationFieldStep; import org.hibernate.search.engine.search.aggregation.dsl.impl.AvgAggregationFieldStepImpl; -import org.hibernate.search.engine.search.aggregation.dsl.impl.CountAggregationFieldStepImpl; -import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDistinctAggregationFieldStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDistinctValuesAggregationFieldStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountDocumentsAggregationFinalStepImpl; +import org.hibernate.search.engine.search.aggregation.dsl.impl.CountValuesAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MaxAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.MinAggregationFieldStepImpl; import org.hibernate.search.engine.search.aggregation.dsl.impl.RangeAggregationFieldStepImpl; @@ -70,13 +72,18 @@ public MaxAggregationFieldStep max() { } @Override - public CountAggregationFieldStep count() { - return new CountAggregationFieldStepImpl<>( dslContext ); + public CountValuesAggregationFieldStep countValues() { + return new CountValuesAggregationFieldStepImpl<>( dslContext ); } @Override - public CountDistinctAggregationFieldStep countDistinct() { - return new CountDistinctAggregationFieldStepImpl<>( dslContext ); + public CountDistinctValuesAggregationFieldStep countDistinctValues() { + return new CountDistinctValuesAggregationFieldStepImpl<>( dslContext ); + } + + @Override + public CountDocumentsAggregationFinalStep countDocuments() { + return new CountDocumentsAggregationFinalStepImpl( dslContext ); } public AvgAggregationFieldStep avg() { diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java index 0c791a21ece..3edc3e6a5e5 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/AggregationTypeKeys.java @@ -24,11 +24,14 @@ private AggregationTypeKeys() { of( IndexFieldTraits.Aggregations.MIN ); public static final SearchQueryElementTypeKey MAX = of( IndexFieldTraits.Aggregations.MAX ); - public static final SearchQueryElementTypeKey> COUNT = - of( IndexFieldTraits.Aggregations.COUNT ); - public static final SearchQueryElementTypeKey> COUNT_DISTINCT = - of( IndexFieldTraits.Aggregations.COUNT_DISTINCT ); + public static final SearchQueryElementTypeKey> COUNT_VALUES = + of( IndexFieldTraits.Aggregations.COUNT_VALUES ); + public static final SearchQueryElementTypeKey> COUNT_DISTINCT_VALUES = + of( IndexFieldTraits.Aggregations.COUNT_DISTINCT_VALUES ); public static final SearchQueryElementTypeKey AVG = of( IndexFieldTraits.Aggregations.AVG ); + public static final SearchQueryElementTypeKey COUNT_DOCUMENTS = + of( IndexFieldTraits.Aggregations.COUNT_DOCUMENTS ); + } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java new file mode 100644 index 00000000000..29c7a79f423 --- /dev/null +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/CountDocumentAggregationBuilder.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.engine.search.aggregation.spi; + +public interface CountDocumentAggregationBuilder extends SearchAggregationBuilder { + + interface TypeSelector { + CountDocumentAggregationBuilder type(); + } + +} diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java index 18cb723153d..15c3258e2c3 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/RangeAggregationBuilder.java @@ -6,17 +6,20 @@ import java.util.Map; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.SearchPredicate; import org.hibernate.search.util.common.data.Range; -public interface RangeAggregationBuilder extends SearchAggregationBuilder, Long>> { +public interface RangeAggregationBuilder extends SearchAggregationBuilder, A>> { interface TypeSelector { - RangeAggregationBuilder type(Class expectedType, ValueModel valueModel); + RangeAggregationBuilder type(Class expectedType, ValueModel valueModel); } void filter(SearchPredicate filter); void range(Range range); + + RangeAggregationBuilder withValue(SearchAggregation aggregation); } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java index 40414b5cd92..f6ca70c19ba 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/aggregation/spi/TermsAggregationBuilder.java @@ -6,13 +6,14 @@ import java.util.Map; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.engine.search.predicate.SearchPredicate; -public interface TermsAggregationBuilder extends SearchAggregationBuilder> { +public interface TermsAggregationBuilder extends SearchAggregationBuilder> { interface TypeSelector { - TermsAggregationBuilder type(Class expectedType, ValueModel valueModel); + TermsAggregationBuilder type(Class expectedType, ValueModel valueModel); } void filter(SearchPredicate filter); @@ -29,4 +30,6 @@ interface TypeSelector { void maxTermCount(int maxTermCount); + TermsAggregationBuilder withValue(SearchAggregation aggregation); + } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java index 0e148348e39..e8886faf11c 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/AnyAggregationReference.java @@ -26,8 +26,8 @@ public record AnyAggregationReference( String absolutePath, Class scopeRootType, ValueModel valueModel, Class aggregationType) implements AvgAggregationFieldReference, - CountAggregationFieldReference, - CountDistinctAggregationFieldReference, + CountValuesAggregationFieldReference, + CountDistinctValuesAggregationFieldReference, MaxAggregationFieldReference, MinAggregationFieldReference, RangeAggregationFieldReference, diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java similarity index 69% rename from engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java rename to engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java index 34902cc99b2..bce6e722387 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctAggregationFieldReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountDistinctValuesAggregationFieldReference.java @@ -7,5 +7,5 @@ import org.hibernate.search.util.common.annotation.Incubating; @Incubating -public interface CountDistinctAggregationFieldReference extends AggregationFieldReference { +public interface CountDistinctValuesAggregationFieldReference extends AggregationFieldReference { } diff --git a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java similarity index 70% rename from engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java rename to engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java index 68e64836aab..7b1fd725532 100644 --- a/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountAggregationFieldReference.java +++ b/engine/src/main/java/org/hibernate/search/engine/search/reference/aggregation/CountValuesAggregationFieldReference.java @@ -7,5 +7,5 @@ import org.hibernate.search.util.common.annotation.Incubating; @Incubating -public interface CountAggregationFieldReference extends AggregationFieldReference { +public interface CountValuesAggregationFieldReference extends AggregationFieldReference { } diff --git a/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java b/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java index fb41d9eb9da..69ebd14a0f8 100644 --- a/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java +++ b/integrationtest/backend/lucene/src/test/java/org/hibernate/search/integrationtest/backend/lucene/testsupport/util/LuceneTckBackendFeatures.java @@ -46,12 +46,6 @@ class LuceneTckBackendFeatures extends TckBackendFeatures { - @Override - public boolean nonDefaultOrderInTermsAggregations() { - // TODO HSEARCH-3666 Lucene terms aggregations (discrete facets) may return wrong results for any sort other than the default one - return false; - } - @Override public boolean projectionPreservesNulls() { return false; diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java index 994c6e7eae2..0d6c297aefe 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricNumericFieldsAggregationsIT.java @@ -70,6 +70,10 @@ class MetricNumericFieldsAggregationsIT { private final AggregationKey avgFloats = AggregationKey.of( "avgFloats" ); private final AggregationKey avgBigIntegers = AggregationKey.of( "avgBigIntegers" ); private final AggregationKey avgBigDecimals = AggregationKey.of( "avgBigDecimals" ); + private final AggregationKey countDocuments = AggregationKey.of( "countDocuments" ); + private final AggregationKey countValuesIntegerMultiValued = AggregationKey.of( "countValuesIntegerMultiValued" ); + private final AggregationKey countDistinctValuesIntegerMultiValued = + AggregationKey.of( "countDistinctValuesIntegerMultiValued" ); @BeforeEach void setup() { @@ -117,6 +121,9 @@ void test_filteringResults() { assertThat( result.aggregation( avgBigIntegers ) ).isEqualTo( BigInteger.valueOf( 5 ) ); assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 580, 2 ) ); + assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); + assertThat( result.aggregation( countValuesIntegerMultiValued ) ).isEqualTo( 25 ); + assertThat( result.aggregation( countDistinctValuesIntegerMultiValued ) ).isEqualTo( 3 ); } @Test @@ -159,6 +166,9 @@ void test_allResults() { assertThat( result.aggregation( avgBigIntegers ) ).isEqualTo( BigInteger.valueOf( 5 ) ); assertThat( result.aggregation( avgBigDecimals ).setScale( 2, RoundingMode.CEILING ) ) .isEqualTo( BigDecimal.valueOf( 550, 2 ) ); + assertThat( result.aggregation( countDocuments ) ).isEqualTo( result.total().hitCount() ); + assertThat( result.aggregation( countValuesIntegerMultiValued ) ).isEqualTo( 50 ); + assertThat( result.aggregation( countDistinctValuesIntegerMultiValued ) ).isEqualTo( 6 ); } private SearchQuery defineAggregations( @@ -180,10 +190,10 @@ private SearchQuery defineAggregations( .aggregation( maxIntegers, f -> f.max().field( "integer", Integer.class ) ) .aggregation( maxIntegersAsString, f -> f.max().field( "integer", String.class, ValueModel.STRING ) ) .aggregation( maxConverted, f -> f.max().field( "converted", String.class ) ) - .aggregation( countIntegers, f -> f.count().field( "integer" ) ) - .aggregation( countConverted, f -> f.count().field( "converted" ) ) - .aggregation( countDistinctIntegers, f -> f.countDistinct().field( "integer" ) ) - .aggregation( countDistinctConverted, f -> f.countDistinct().field( "converted" ) ) + .aggregation( countIntegers, f -> f.countValues().field( "integer" ) ) + .aggregation( countConverted, f -> f.countValues().field( "converted" ) ) + .aggregation( countDistinctIntegers, f -> f.countDistinctValues().field( "integer" ) ) + .aggregation( countDistinctConverted, f -> f.countDistinctValues().field( "converted" ) ) .aggregation( avgIntegers, f -> f.avg().field( "integer", Integer.class ) ) .aggregation( avgIntegersAsString, f -> f.avg().field( "integer", String.class, ValueModel.STRING ) ) .aggregation( avgConverted, f -> f.avg().field( "converted", String.class ) ) @@ -201,6 +211,10 @@ private SearchQuery defineAggregations( .aggregation( avgFloats, f -> f.avg().field( "floatF", Float.class ) ) .aggregation( avgBigIntegers, f -> f.avg().field( "bigInteger", BigInteger.class ) ) .aggregation( avgBigDecimals, f -> f.avg().field( "bigDecimal", BigDecimal.class ) ) + .aggregation( countDocuments, f -> f.countDocuments() ) + .aggregation( countDistinctValuesIntegerMultiValued, + f -> f.countDistinctValues().field( "integerMultiValued" ) ) + .aggregation( countValuesIntegerMultiValued, f -> f.countValues().field( "integerMultiValued" ) ) .toQuery(); } @@ -223,6 +237,10 @@ private void initData() { document.addValue( mainIndex.binding().bigDecimal, BigDecimal.valueOf( value ) ); document.addValue( mainIndex.binding().style, style ); + for ( int j = 0; j < 5; j++ ) { + document.addValue( mainIndex.binding().integerMultiValued, value ); + } + DocumentElement object = document.addObject( mainIndex.binding().object ); object.addValue( mainIndex.binding().nestedInteger, value ); } ); @@ -234,6 +252,7 @@ private void initData() { @SuppressWarnings("unused") private static class IndexBinding { final IndexFieldReference integer; + final IndexFieldReference integerMultiValued; final IndexFieldReference converted; final IndexFieldReference doubleF; final IndexFieldReference floatF; @@ -245,6 +264,8 @@ private static class IndexBinding { IndexBinding(IndexSchemaElement root) { integer = root.field( "integer", f -> f.asInteger().aggregable( Aggregable.YES ) ).toReference(); + integerMultiValued = root.field( "integerMultiValued", f -> f.asInteger().aggregable( Aggregable.YES ) ) + .multiValued().toReference(); converted = root.field( "converted", f -> f.asInteger().aggregable( Aggregable.YES ) .projectionConverter( String.class, (value, context) -> value.toString() ) ).toReference(); doubleF = root.field( "doubleF", f -> f.asDouble().aggregable( Aggregable.YES ) ).toReference(); diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java index 090edb93a82..0175d77251f 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/MetricTemporalFieldsAggregationsIT.java @@ -121,10 +121,10 @@ private SearchQuery defineAggregations( .aggregation( minConverted, f -> f.min().field( "converted", String.class ) ) .aggregation( maxDates, f -> f.max().field( "date", LocalDate.class ) ) .aggregation( maxConverted, f -> f.max().field( "converted", String.class ) ) - .aggregation( countDates, f -> f.count().field( "date" ) ) - .aggregation( countConverted, f -> f.count().field( "converted" ) ) - .aggregation( countDistinctDates, f -> f.countDistinct().field( "date" ) ) - .aggregation( countDistinctConverted, f -> f.countDistinct().field( "converted" ) ) + .aggregation( countDates, f -> f.countValues().field( "date" ) ) + .aggregation( countConverted, f -> f.countValues().field( "converted" ) ) + .aggregation( countDistinctDates, f -> f.countDistinctValues().field( "date" ) ) + .aggregation( countDistinctConverted, f -> f.countDistinctValues().field( "converted" ) ) .aggregation( avgDates, f -> f.avg().field( "date", LocalDate.class ) ) .aggregation( avgConverted, f -> f.avg().field( "converted", String.class ) ) .toQuery(); diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java index 67587eeb285..34b45ddaf9a 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/RangeAggregationSpecificsIT.java @@ -26,12 +26,15 @@ import org.hibernate.search.engine.backend.types.Aggregable; import org.hibernate.search.engine.backend.types.Searchable; import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.aggregation.dsl.SearchAggregationFactory; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.AggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.RangeAggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.types.IntegerFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModel; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModelsByType; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TckConfiguration; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.ValueWrapper; @@ -558,6 +561,327 @@ void rangeOverlap_parmeters(FieldTypeDescriptor fieldType, DataSet data ); } + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countDocuments(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.countDocuments() ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 3L ); + c.accept( Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 2L ); + c.accept( Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 2L ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_min(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, F>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( (AggregationFinalStep) f.min().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 0 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 3 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 5 ) ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_max(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, F>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( (AggregationFinalStep) f.max().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 2 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 4 ) ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + dataSet.fieldType.normalize( dataSet.ascendingValues.get( 6 ) ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countValues(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.countValues().field( index.binding().bucketMultiValue.relativeFieldName ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + 12L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 8L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + 8L + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_countDistinctValues(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Long>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( f.countDistinctValues() + .field( index.binding().bucketMultiValue.relativeFieldName ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + 5L // 10 * 0 0 0 0 -- hence odd number in this range + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + 4L + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + 4L + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_terms_countImplicit(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( (AggregationFinalStep>) f.terms() + .field( index.binding().bucketMultiValue.relativeFieldName, Integer.class ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Map.of( 0, 1L, 1, 1L, 2, 1L, 10, 1L, 20, 1L ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Map.of( 3, 1L, 4, 1L, 30, 1L, 40, 1L ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( 5, 1L, 6, 1L, 50, 1L, 60, 1L ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_terms_sum(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) + .value( (AggregationFinalStep>) f.terms() + .field( index.binding().bucketMultiValue.relativeFieldName, Integer.class ) + .value( (AggregationFinalStep) f.sum().field( + index.binding().bucketMultiValue.relativeFieldName, Integer.class ) ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Map.of( 0, 0, 1, 13, 2, 26, 10, 13, 20, 26 ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), + Map.of( 3, 39, 4, 52, 30, 39, 40, 52 ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( 5, 65, 6, 78, 50, 65, 60, 78 ) + ); + } ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void rangesBucket_range_countExplicit(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey, Map, Long>>> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( + matchAllQuery() + .aggregation( + aggregationKey, f -> f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ).value( f.range().field( fieldPath, fieldType.getJavaType() ) + .ranges( Arrays.asList( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), + Range.canonical( dataSet.ascendingValues.get( 3 ), + dataSet.ascendingValues.get( 5 ) ), + Range.canonical( dataSet.ascendingValues.get( 5 ), null ) + ) ) ) + ) + .routing( dataSet.name ) + .toQuery() + ).aggregation( + aggregationKey, + containsExactly( c -> { + c.accept( + Range.canonical( null, dataSet.ascendingValues.get( 5 ) ), + Map.of( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 3L, + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), 2L, + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 0L + ) + ); + c.accept( + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), + Map.of( + Range.canonical( null, dataSet.ascendingValues.get( 3 ) ), 0L, + Range.canonical( dataSet.ascendingValues.get( 3 ), dataSet.ascendingValues.get( 5 ) ), 0L, + Range.canonical( dataSet.ascendingValues.get( 5 ), null ), 2L + ) + ); + } ) + ); + } + + private void assumeNonCanonicalRangesSupported() { assumeTrue( TckConfiguration.get().getBackendFeatures().nonCanonicalRangeInAggregations(), @@ -565,7 +889,7 @@ private void assumeNonCanonicalRangesSupported() { ); } - private SearchQueryOptionsStep matchAllQuery() { + private SearchQueryOptionsStep matchAllQuery() { return index.createScope().query().where( f -> f.matchAll() ); } @@ -593,10 +917,18 @@ private DataSet(FieldTypeDescriptor fieldType) { private void init() { BulkIndexer indexer = index.bulkIndexer(); for ( int i = 0; i < documentFieldValues.size(); i++ ) { - F value = documentFieldValues.get( i ); + final F value = documentFieldValues.get( i ); + final int bucketValue = i; indexer.add( name + "_document_" + i, name, document -> { document.addValue( index.binding().fieldModels.get( fieldType ).reference, value ); document.addValue( index.binding().fieldWithConverterModels.get( fieldType ).reference, value ); + + document.addValue( index.binding().bucketValue.reference, bucketValue ); + + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue ); + document.addValue( index.binding().bucketMultiValue.reference, bucketValue * 10 ); } ); } indexer.add( name + "_document_empty", name, document -> {} ); @@ -608,6 +940,8 @@ private static class IndexBinding { final SimpleFieldModelsByType fieldModels; final SimpleFieldModelsByType fieldWithConverterModels; final SimpleFieldModelsByType fieldWithAggregationDisabledModels; + final SimpleFieldModel bucketValue; + final SimpleFieldModel bucketMultiValue; IndexBinding(IndexSchemaElement root) { fieldModels = SimpleFieldModelsByType.mapAll( supportedFieldTypes, root, @@ -622,6 +956,11 @@ private static class IndexBinding { fieldWithAggregationDisabledModels = SimpleFieldModelsByType.mapAll( supportedFieldTypes, root, "nonAggregable_", c -> c.aggregable( Aggregable.NO ) ); + bucketValue = SimpleFieldModel.mapper( IntegerFieldTypeDescriptor.INSTANCE, c -> c.aggregable( Aggregable.YES ) ) + .map( root, "bucketValue" ); + bucketMultiValue = + SimpleFieldModel.mapper( IntegerFieldTypeDescriptor.INSTANCE, c -> c.aggregable( Aggregable.YES ) ) + .mapMultiValued( root, "bucketMultiValue" ); } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java index 673095287e0..3a0381984b3 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/SingleFieldAggregationBaseIT.java @@ -32,6 +32,7 @@ import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.expectations.AggregationScenario; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.expectations.SupportedSingleFieldAggregationExpectations; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; +import org.hibernate.search.integrationtest.backend.tck.testsupport.types.LocalDateFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TestedFieldStructure; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.extension.SearchSetupHelper; @@ -66,8 +67,12 @@ class SingleFieldAggregationBaseIT { Optional> expectations = aggregationDescriptor.getSingleFieldAggregationExpectations( fieldTypeDescriptor ).getSupported(); if ( expectations.isPresent() ) { + if ( !LocalDateFieldTypeDescriptor.INSTANCE.equals( fieldTypeDescriptor ) ) { + continue; + } + + supportedFieldTypes.add( fieldTypeDescriptor ); for ( TestedFieldStructure fieldStructure : TestedFieldStructure.all() ) { - supportedFieldTypes.add( fieldTypeDescriptor ); DataSet dataSet = new DataSet<>( expectations.get(), fieldStructure ); dataSets.add( dataSet ); parameters.add( Arguments.of( expectations.get(), fieldStructure, dataSet ) ); diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java index a3b9fdeaa50..bcf966ffa7e 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/search/aggregation/TermsAggregationSpecificsIT.java @@ -27,13 +27,13 @@ import org.hibernate.search.engine.backend.types.Aggregable; import org.hibernate.search.engine.backend.types.Searchable; import org.hibernate.search.engine.search.aggregation.AggregationKey; +import org.hibernate.search.engine.search.aggregation.dsl.AggregationFinalStep; import org.hibernate.search.engine.search.query.dsl.SearchQueryOptionsStep; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.AggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.operations.TermsAggregationDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.FieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.types.StandardFieldTypeDescriptor; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.SimpleFieldModelsByType; -import org.hibernate.search.integrationtest.backend.tck.testsupport.util.TckConfiguration; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.ValueWrapper; import org.hibernate.search.integrationtest.backend.tck.testsupport.util.extension.SearchSetupHelper; import org.hibernate.search.util.impl.integrationtest.mapper.stub.BulkIndexer; @@ -230,8 +230,6 @@ void orderByCountDescending(FieldTypeDescriptor fieldType, DataSet data @MethodSource("params") @PortedFromSearch5(original = "org.hibernate.search.test.query.facet.SimpleFacetingTest.testCountSortOrderAsc") void orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -258,8 +256,6 @@ void orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataS @ParameterizedTest(name = "{0}") @MethodSource("params") void orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -287,8 +283,6 @@ void orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataS @MethodSource("params") @PortedFromSearch5(original = "org.hibernate.search.test.query.facet.SimpleFacetingTest.testAlphabeticalSortOrder") void orderByTermAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -412,8 +406,6 @@ void minDocumentCount_zero_noMatch(FieldTypeDescriptor fieldType, DataSet< @ParameterizedTest(name = "{0}") @MethodSource("params") void minDocumentCount_zero_noMatch_orderByTermDescending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -491,8 +483,6 @@ void maxTermCount_positive(FieldTypeDescriptor fieldType, DataSet dataS @ParameterizedTest(name = "{0}") @MethodSource("params") void maxTermCount_positive_orderByTermAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -521,8 +511,6 @@ void maxTermCount_positive_orderByTermAscending(FieldTypeDescriptor fieldT @ParameterizedTest(name = "{0}") @MethodSource("params") void maxTermCount_positive_orderByCountAscending(FieldTypeDescriptor fieldType, DataSet dataSet) { - assumeNonDefaultOrdersSupported(); - String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); @@ -622,15 +610,92 @@ void maxTermCount_veryLarge(FieldTypeDescriptor fieldType, DataSet data ); } - private SearchQueryOptionsStep matchAllQuery() { - return index.createScope().query().where( f -> f.matchAll() ); + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_explicitDocCount(FieldTypeDescriptor fieldType, DataSet dataSet) { + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + .value( f.countDocuments() ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, (long) dataSet.documentIdPerTerm.get( value ).size() ); + } + }, fieldType + ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_min(FieldTypeDescriptor fieldType, DataSet dataSet) { + assumeTrue( fieldType.supportsMetricAggregation(), + "Since the value is a metric aggregation on the same field, we want to be sure that only those fields that support it are included." ); + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: + .value( (AggregationFinalStep) f.min().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, fieldType.normalize( value ) ); + } + }, fieldType + ) + ); + } + + @ParameterizedTest(name = "{0}") + @MethodSource("params") + void terms_max(FieldTypeDescriptor fieldType, DataSet dataSet) { + assumeTrue( fieldType.supportsMetricAggregation(), + "Since the value is a metric aggregation on the same field, we want to be sure that only those fields that support it are included." ); + String fieldPath = index.binding().fieldModels.get( fieldType ).relativeFieldName; + + AggregationKey> aggregationKey = AggregationKey.of( AGGREGATION_NAME ); + + assertThatQuery( matchAllQuery() + .aggregation( + aggregationKey, f -> f.terms().field( fieldPath, fieldType.getJavaType() ) + // while maybe silly as min/max == the same term as the key it is here just to test the nesting and aggregations: + .value( (AggregationFinalStep) f.max().field( fieldPath, fieldType.getJavaType() ) ) + ) + .routing( dataSet.name ) ) + .aggregation( + aggregationKey, + // All buckets should be returned. + containsInAnyOrder( + c -> { + for ( F value : dataSet.valuesInDescendingOrder ) { + c.accept( value, fieldType.normalize( value ) ); + } + }, fieldType + ) + ); } - private void assumeNonDefaultOrdersSupported() { - assumeTrue( - TckConfiguration.get().getBackendFeatures().nonDefaultOrderInTermsAggregations(), - "Non-default orders are not supported for terms aggregations with this backend" - ); + private SearchQueryOptionsStep matchAllQuery() { + return index.createScope().query().where( f -> f.matchAll() ); } @SuppressWarnings("unchecked") diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java index 58dcf4ee8a6..0e273c4d8ff 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/operations/MetricAggregationsTestCase.java @@ -79,8 +79,8 @@ public Result testMetricsAggregation(StubMappingScope scope, SingleFieldI .where( SearchPredicateFactory::matchAll ) .aggregation( result.minKey, f -> f.min().field( fieldPath, javaClass, valueModel ) ) .aggregation( result.maxKey, f -> f.max().field( fieldPath, javaClass, valueModel ) ) - .aggregation( result.countKey, f -> f.count().field( fieldPath ) ) - .aggregation( result.countDistinctKey, f -> f.countDistinct().field( fieldPath ) ) + .aggregation( result.countKey, f -> f.countValues().field( fieldPath ) ) + .aggregation( result.countDistinctKey, f -> f.countDistinctValues().field( fieldPath ) ) .aggregation( result.avgKey, f -> f.avg().field( fieldPath, javaClass, valueModel ) ); if ( metricAggregationsValues.sum() != null ) { diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java index db7fd706351..78b29bb4d6a 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/FieldTypeDescriptor.java @@ -244,4 +244,8 @@ public String format(F value) { return Objects.toString( value, null ); } + public F normalize(F value) { + return value; + } + } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java index 4f7f5c73ecb..aea17ac062c 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetDateTimeFieldTypeDescriptor.java @@ -171,4 +171,9 @@ public Optional> getIndex public String format(OffsetDateTime value) { return DateTimeFormatter.ISO_OFFSET_DATE_TIME.format( value ); } + + @Override + public OffsetDateTime normalize(OffsetDateTime value) { + return value == null ? null : value.toInstant().atOffset( ZoneOffset.UTC ); + } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java index 5e01be081e6..5841c7db880 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/OffsetTimeFieldTypeDescriptor.java @@ -134,4 +134,9 @@ public Optional> getIndexNull public String format(OffsetTime value) { return FormatUtils.format( value ); } + + @Override + public OffsetTime normalize(OffsetTime value) { + return value == null ? null : value.withOffsetSameInstant( ZoneOffset.UTC ); + } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java index 2c5f74349c8..ed434d213a8 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/types/ZonedDateTimeFieldTypeDescriptor.java @@ -193,4 +193,9 @@ public Optional> getIndexN public String format(ZonedDateTime value) { return DateTimeFormatter.ISO_ZONED_DATE_TIME.format( value ); } + + @Override + public ZonedDateTime normalize(ZonedDateTime value) { + return value == null ? null : value.withZoneSameInstant( ZoneOffset.UTC ); + } } diff --git a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java index df62b670195..a563381bf0c 100644 --- a/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java +++ b/integrationtest/backend/tck/src/main/java/org/hibernate/search/integrationtest/backend/tck/testsupport/util/TckBackendFeatures.java @@ -32,10 +32,6 @@ public boolean nonCanonicalRangeInAggregations() { return true; } - public boolean nonDefaultOrderInTermsAggregations() { - return true; - } - public boolean projectionPreservesNulls() { return true; } diff --git a/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java b/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java index 51cd1aff3e4..cde1ecf1b4f 100644 --- a/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java +++ b/integrationtest/metamodel/orm-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/elasticsearch/AggregationTypesIT.java @@ -65,9 +65,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java b/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java index 19a94f8ef69..080ece0bef1 100644 --- a/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java +++ b/integrationtest/metamodel/orm-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/orm/lucene/AggregationTypesIT.java @@ -65,9 +65,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.myNumber ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java b/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java index 46f5b9078dc..f169ab32954 100644 --- a/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java +++ b/integrationtest/metamodel/standalone-elasticsearch/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/elasticsearch/AggregationTypesIT.java @@ -86,9 +86,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java b/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java index 6312704dcc1..1ea4c7c53ea 100644 --- a/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java +++ b/integrationtest/metamodel/standalone-lucene/src/test/java/org/hibernate/search/integrationtest/metamodel/standalone/lucene/AggregationTypesIT.java @@ -86,9 +86,9 @@ void smoke() { .aggregation( AggregationKey.of( "sum" ), f -> f.sum().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "count" ), - f -> f.count().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "countDistinct" ), - f -> f.countDistinct().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) + f -> f.countDistinctValues().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "min" ), f -> f.min().field( AggregationTypesIT_IndexedEntity__.INDEX.number ) ) .aggregation( AggregationKey.of( "max" ), diff --git a/lucene-next/backend/lucene/pom.xml b/lucene-next/backend/lucene/pom.xml index 04ab0dc105f..048b6e89136 100644 --- a/lucene-next/backend/lucene/pom.xml +++ b/lucene-next/backend/lucene/pom.xml @@ -44,10 +44,6 @@ org.apache.lucene lucene-join - - org.apache.lucene - lucene-facet - com.carrotsearch hppc diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java index 5ba2a7e108e..43c7c070297 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneBackendSettings.java @@ -36,8 +36,8 @@ private LuceneBackendSettings() { *

* This should be set in order to get consistent behavior when Lucene is upgraded. *

- * Expects a Lucene {@link org.apache.lucene.util.Version} object, - * or a String accepted by {@link org.apache.lucene.util.Version#parseLeniently(java.lang.String)} + * Expects a Lucene {@link Version} object, + * or a String accepted by {@link Version#parseLeniently(String)} *

* Defaults to {@link Defaults#LUCENE_VERSION}, which may change when Hibernate Search or Lucene is upgraded, * and therefore does not offer any backwards-compatibility guarantees. diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java index b98bb46e226..a0a5f322ed6 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/cfg/LuceneIndexSettings.java @@ -87,7 +87,7 @@ private LuceneIndexSettings() { *

* Expects a {@link IOStrategyName} value, or a String representation of such value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_STRATEGY}. + * Defaults to {@link Defaults#IO_STRATEGY}. */ public static final String IO_STRATEGY = IO_PREFIX + IORadicals.STRATEGY; @@ -115,7 +115,7 @@ private LuceneIndexSettings() { * Expects a positive Integer value in milliseconds, such as {@code 1000}, * or a String that can be parsed into such Integer value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_COMMIT_INTERVAL}. + * Defaults to {@link Defaults#IO_COMMIT_INTERVAL}. */ public static final String IO_COMMIT_INTERVAL = IO_PREFIX + IORadicals.COMMIT_INTERVAL; @@ -140,7 +140,7 @@ private LuceneIndexSettings() { * Expects a positive Integer value in milliseconds, such as {@code 1000}, * or a String that can be parsed into such Integer value. *

- * Defaults to {@link LuceneIndexSettings.Defaults#IO_REFRESH_INTERVAL}. + * Defaults to {@link Defaults#IO_REFRESH_INTERVAL}. */ public static final String IO_REFRESH_INTERVAL = IO_PREFIX + IORadicals.REFRESH_INTERVAL; @@ -276,7 +276,7 @@ private LuceneIndexSettings() { * Expects a String, such as "hash". * See the reference documentation for a list of available values. *

- * Defaults to {@link LuceneIndexSettings.Defaults#SHARDING_STRATEGY} (no sharding). + * Defaults to {@link Defaults#SHARDING_STRATEGY} (no sharding). */ public static final String SHARDING_STRATEGY = SHARDING_PREFIX + ShardingRadicals.STRATEGY; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java index 5b4099e7b14..574815f8fdd 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/document/model/dsl/impl/LuceneIndexRootBuilder.java @@ -19,6 +19,7 @@ import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexRoot; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueField; import org.hibernate.search.backend.lucene.document.model.impl.LuceneIndexValueFieldTemplate; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LuceneCountDocumentAggregation; import org.hibernate.search.backend.lucene.types.dsl.LuceneIndexFieldTypeFactory; import org.hibernate.search.backend.lucene.types.dsl.impl.LuceneIndexFieldTypeFactoryImpl; import org.hibernate.search.backend.lucene.types.impl.LuceneIndexCompositeNodeType; @@ -33,6 +34,7 @@ import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider; import org.hibernate.search.engine.reporting.spi.EventContexts; +import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys; import org.hibernate.search.util.common.reporting.EventContext; public class LuceneIndexRootBuilder extends AbstractLuceneIndexCompositeNodeBuilder @@ -55,6 +57,8 @@ public LuceneIndexRootBuilder(EventContext indexEventContext, this.backendMapperContext = backendMapperContext; this.mappedTypeName = mappedTypeName; this.analysisDefinitionRegistry = analysisDefinitionRegistry; + + this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS, LuceneCountDocumentAggregation.factory() ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java index 843a4717f93..bdac2557c0f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunction.java @@ -14,4 +14,8 @@ public interface AggregationFunction> { R implementation(); + default boolean acceptMultipleValues() { + return true; + } + } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java index e9d0ddf3bdc..28988a10f8a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/AggregationFunctionCollector.java @@ -10,16 +10,16 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class AggregationFunctionCollector> implements Collector { +public class AggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final AggregationFunction aggregationFunction; + private LongMultiValues values; + public AggregationFunctionCollector(LongMultiValuesSource valueSource, AggregationFunction aggregationFunction) { this.valueSource = valueSource; this.aggregationFunction = aggregationFunction; @@ -34,8 +34,16 @@ public Long result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( value ); + if ( !aggregationFunction.acceptMultipleValues() ) { + break; + } + } + } } @Override @@ -43,26 +51,13 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( value ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java new file mode 100644 index 00000000000..daa0852e4ae --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValues.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.util.BitSet; + +import com.carrotsearch.hppc.LongHashSet; + +/** + *

+ * The algorithm to collect distinct elements is inspired by {@code org.apache.lucene.facet.LongValueFacetCounts} + * of Apache Lucene project. + */ +public class CountDistinctValues implements AggregationFunction { + + private final BitSet counts = new BitSet( 1024 ); + private final LongHashSet hashCounts = new LongHashSet(); + + @Override + public void apply(long value) { + if ( value >= 0 && value < counts.size() ) { + counts.set( (int) value ); + } + else { + hashCounts.add( value ); + } + } + + @Override + public void merge(AggregationFunction sibling) { + CountDistinctValues other = sibling.implementation(); + counts.or( other.counts ); + hashCounts.addAll( other.hashCounts ); + } + + @Override + public Long result() { + return (long) counts.cardinality() + hashCounts.size(); + } + + @Override + public CountDistinctValues implementation() { + return this; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java new file mode 100644 index 00000000000..e1300ef9fcf --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDistinctValuesCollectorFactory.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; + +public class CountDistinctValuesCollectorFactory + implements + CollectorFactory, + Long, + AggregationFunctionCollectorManager> { + + private final JoiningLongMultiValuesSource source; + private final CollectorKey, Long> key = CollectorKey.create(); + + public CountDistinctValuesCollectorFactory(JoiningLongMultiValuesSource source) { + this.source = source; + } + + @Override + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountDistinctValues::new ); + } + + @Override + public CollectorKey, Long> getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java new file mode 100644 index 00000000000..6e3c0e419d1 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocuemntsCollectorFactory.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; + +public class CountDocuemntsCollectorFactory + implements CollectorFactory { + + private final CollectorKey key = CollectorKey.create(); + + public static CountDocuemntsCollectorFactory instance() { + return new CountDocuemntsCollectorFactory(); + } + + @Override + public CountDocumentsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + return new CountDocumentsCollectorManager(); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java new file mode 100644 index 00000000000..b494f0e45c8 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollector.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class CountDocumentsCollector extends SimpleCollector { + + private long count = 0L; + + @Override + public void collect(int doc) throws IOException { + count++; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE; + } + + public long count() { + return count; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java new file mode 100644 index 00000000000..5f0c875d08c --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountDocumentsCollectorManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.apache.lucene.search.CollectorManager; + +public class CountDocumentsCollectorManager implements CollectorManager { + + @Override + public CountDocumentsCollector newCollector() throws IOException { + return new CountDocumentsCollector(); + } + + @Override + public Long reduce(Collection collectors) throws IOException { + long count = 0L; + for ( CountDocumentsCollector collector : collectors ) { + count += collector.count(); + } + return count; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java new file mode 100644 index 00000000000..fb4b7dedce0 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValues.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +public class CountValues implements AggregationFunction { + + private long count = 0L; + + @Override + public void apply(long value) { + count++; + } + + @Override + public void merge(AggregationFunction sibling) { + count += sibling.implementation().count; + } + + @Override + public Long result() { + return count; + } + + @Override + public CountValues implementation() { + return this; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java new file mode 100644 index 00000000000..337615e81d2 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/CountValuesCollectorFactory.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorExecutionContext; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; + +public class CountValuesCollectorFactory + implements + CollectorFactory, Long, AggregationFunctionCollectorManager> { + + private final JoiningLongMultiValuesSource source; + private final CollectorKey, Long> key = CollectorKey.create(); + + public CountValuesCollectorFactory(JoiningLongMultiValuesSource source) { + this.source = source; + } + + @Override + public AggregationFunctionCollectorManager createCollectorManager(CollectorExecutionContext context) { + return new AggregationFunctionCollectorManager<>( source, CountValues::new ); + } + + @Override + public CollectorKey, Long> getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java index 3a441653d58..aae6609a773 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/aggregation/collector/impl/DoubleAggregationFunctionCollector.java @@ -11,17 +11,17 @@ import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.LeafCollector; -import org.apache.lucene.search.Scorable; import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; -public class DoubleAggregationFunctionCollector> implements Collector { +public class DoubleAggregationFunctionCollector> extends SimpleCollector { private final LongMultiValuesSource valueSource; private final DoubleAggregationFunction aggregationFunction; private final Function longToDouble; + private LongMultiValues values; + public DoubleAggregationFunctionCollector(LongMultiValuesSource valueSource, DoubleAggregationFunction aggregationFunction, Function longToDouble) { this.valueSource = valueSource; @@ -38,8 +38,13 @@ public Double result() { } @Override - public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException { - return new AggregationFunctionLeafCollector( valueSource.getValues( context ) ); + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + while ( values.hasNextValue() ) { + long value = values.nextValue(); + aggregationFunction.apply( longToDouble.apply( value ) ); + } + } } @Override @@ -47,26 +52,13 @@ public ScoreMode scoreMode() { return ScoreMode.COMPLETE_NO_SCORES; } - public class AggregationFunctionLeafCollector implements LeafCollector { - private final LongMultiValues values; - - public AggregationFunctionLeafCollector(LongMultiValues values) { - this.values = values; - } - - @Override - public void collect(int doc) throws IOException { - if ( values.advanceExact( doc ) ) { - while ( values.hasNextValue() ) { - long value = values.nextValue(); - aggregationFunction.apply( longToDouble.apply( value ) ); - } - } - } + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valueSource.getValues( context ); + } - @Override - public void setScorer(Scorable scorer) { - // no-op by default - } + @Override + public void finish() throws IOException { + values = null; } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java new file mode 100644 index 00000000000..c139c430de6 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/BaseTermsCollector.java @@ -0,0 +1,16 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public interface BaseTermsCollector { + + CollectorKey[] keys(); + + CollectorManager[] managers(); + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java deleted file mode 100644 index ec3bf698880..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/FacetsCollectorFactory.java +++ /dev/null @@ -1,25 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.collector.impl; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollectorManager; - -public class FacetsCollectorFactory implements CollectorFactory { - public static final CollectorKey KEY = CollectorKey.create(); - - public static final CollectorFactory INSTANCE = - new FacetsCollectorFactory(); - - @Override - public FacetsCollectorManager createCollectorManager(CollectorExecutionContext context) { - return new FacetsCollectorManager(); - } - - @Override - public CollectorKey getCollectorKey() { - return KEY; - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java new file mode 100644 index 00000000000..9e3875f0757 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollector.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class NumericTermsCollector extends SimpleCollector implements BaseTermsCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final LongMultiValuesSource valuesSource; + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + private LongMultiValues values; + private LeafReaderContext leafReaderContext; + + public NumericTermsCollector(LongMultiValuesSource valuesSource, CollectorKey[] keys, + CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextValue(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); + } + } + } + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + this.values = valuesSource.getValues( context ); + this.leafReaderContext = context; + for ( LongObjectCursor value : segmentValues ) { + value.value.resetLeafCollectors( context ); + } + } + + @Override + public void finish() { + values = null; + } + + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + + LongObjectHashMap segmentValues() { + return segmentValues; + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java new file mode 100644 index 00000000000..81e0e9a9e92 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorFactory.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource, List> collectorFactories) { + return new NumericTermsCollectorFactory( valuesSource, collectorFactories ); + } + + private final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final List> collectorFactories; + + public NumericTermsCollectorFactory(LongMultiValuesSource valuesSource, + List> collectorFactories) { + this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public NumericTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new NumericTermsCollectorManager( valuesSource, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java new file mode 100644 index 00000000000..be8ec930b8d --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/NumericTermsCollectorManager.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class NumericTermsCollectorManager + implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public NumericTermsCollectorManager(LongMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public NumericTermsCollector newCollector() { + return new NumericTermsCollector( valuesSource, keys, managers ); + } + + @Override + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( NumericTermsCollector collector : collection ) { + results.add( collector.segmentValues() ); + } + return results; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java new file mode 100644 index 00000000000..95ba455a320 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollector.java @@ -0,0 +1,214 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.LongIntHashMap; +import com.carrotsearch.hppc.LongIntMap; +import com.carrotsearch.hppc.cursors.IntCursor; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.LeafCollector; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class RangeCollector extends SimpleCollector { + + private final LongMultiValuesSource valuesSource; + private final long[] boundaries; + private final IntArrayList[] countsPerBoundaries; + + private final Collector[][] collectors; + private final CollectorKey[] keys; + private final LeafCollector[][] leafCollectors; + + private LongMultiValues values; + + public RangeCollector(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, Collector[][] collectors, + CollectorKey[] keys) { + this.valuesSource = valuesSource; + this.collectors = collectors; + this.keys = keys; + + // Maps all range inclusive endpoints to int flags; 1 + // = start of interval, 2 = end of interval. We need to + // track the start vs end case separately because if a + // given point is both, then it must be its own + // elementary interval: + LongIntMap endsMap = new LongIntHashMap(); + + endsMap.put( Long.MIN_VALUE, 1 ); + endsMap.put( Long.MAX_VALUE, 2 ); + + for ( EffectiveRange range : ranges ) { + long min = range.min(); + long max = range.max(); + int cur = endsMap.get( min ); + if ( cur == 0 ) { + endsMap.put( min, 1 ); + } + else { + endsMap.put( min, cur | 1 ); + } + cur = endsMap.get( max ); + if ( cur == 0 ) { + endsMap.put( max, 2 ); + } + else { + endsMap.put( max, cur | 2 ); + } + } + + LongArrayList endsList = new LongArrayList( endsMap.keys() ); + Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); + + // Build elementaryIntervals (a 1D Venn diagram): + List elementaryIntervals = new ArrayList<>(); + int upto0 = 1; + long v = endsList.get( 0 ); + long prev; + if ( endsMap.get( v ) == 3 ) { + elementaryIntervals.add( new EffectiveRange( v, v ) ); + prev = v + 1; + } + else { + prev = v; + } + + while ( upto0 < endsList.size() ) { + v = endsList.get( upto0 ); + int flags = endsMap.get( v ); + if ( flags == 3 ) { + // This point is both an end and a start; we need to + // separate it: + if ( v > prev ) { + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); + } + elementaryIntervals.add( new EffectiveRange( v, v ) ); + prev = v + 1; + } + else if ( flags == 1 ) { + // This point is only the start of an interval; + // attach it to next interval: + if ( v > prev ) { + elementaryIntervals.add( new EffectiveRange( prev, v - 1 ) ); + } + prev = v; + } + else { + assert flags == 2; + // This point is only the end of an interval; attach + // it to last interval: + elementaryIntervals.add( new EffectiveRange( prev, v ) ); + prev = v + 1; + } + upto0++; + } + + // Set boundaries (ends of each elementary interval): + boundaries = new long[elementaryIntervals.size()]; + countsPerBoundaries = new IntArrayList[boundaries.length]; + for ( int i = 0; i < boundaries.length; i++ ) { + EffectiveRange interval = elementaryIntervals.get( i ); + boundaries[i] = interval.max(); + IntArrayList list = new IntArrayList(); + countsPerBoundaries[i] = list; + for ( int j = 0; j < ranges.length; j++ ) { + if ( interval.min() >= ranges[j].min() && interval.max() <= ranges[j].max() ) { + list.add( j ); + } + } + } + + leafCollectors = new LeafCollector[keys.length][]; + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = new LeafCollector[ranges.length]; + } + } + + private void processLeafWithIndex(int index, int doc) throws IOException { + for ( IntCursor cursor : countsPerBoundaries[index] ) { + for ( int i = 0; i < keys.length; i++ ) { + leafCollectors[i][cursor.value].collect( doc ); + } + } + } + + private int findLeafIndex(long v) { + // Binary search to find matched elementary range; we + // are guaranteed to find a match because the last + // boundary is Long.MAX_VALUE: + int lo = 0; + int hi = boundaries.length - 1; + while ( true ) { + int mid = ( lo + hi ) >>> 1; + if ( v <= boundaries[mid] ) { + if ( mid == 0 ) { + return 0; + } + else { + hi = mid - 1; + } + } + else if ( v > boundaries[mid + 1] ) { + lo = mid + 1; + } + else { + return mid + 1; + } + } + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + int leafIndex = findLeafIndex( values.nextValue() ); + if ( uniqueLeafIndicesForDocument.add( leafIndex ) ) { + processLeafWithIndex( leafIndex, doc ); + } + } + } + } + + public Collector[][] collectors() { + return collectors; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + values = valuesSource.getValues( context ); + for ( int i = 0; i < collectors.length; i++ ) { + for ( int j = 0; j < collectors[i].length; j++ ) { + leafCollectors[i][j] = collectors[i][j].getLeafCollector( context ); + } + } + } + + @Override + public void finish() throws IOException { + values = null; + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java new file mode 100644 index 00000000000..f5cfb3ff06e --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorFactory.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + LongMultiValuesSource valuesSource, EffectiveRange[] ranges, List> collectorFactories) { + return new RangeCollectorFactory( valuesSource, ranges, collectorFactories ); + } + + public final CollectorKey key = CollectorKey.create(); + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + private final List> collectorFactories; + + public RangeCollectorFactory(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + List> collectorFactories) { + this.valuesSource = valuesSource; + this.ranges = ranges; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public RangeCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory collectorFactory : collectorFactories ) { + CollectorManager collectorManager = collectorFactory.createCollectorManager( context ); + keys[index] = collectorFactory.getCollectorKey(); + managers[index] = collectorManager; + index++; + } + return new RangeCollectorManager( valuesSource, ranges, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java new file mode 100644 index 00000000000..8065fc580b1 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeCollectorManager.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; +import org.hibernate.search.backend.lucene.types.lowlevel.impl.EffectiveRange; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeCollectorManager implements CollectorManager { + + private final LongMultiValuesSource valuesSource; + private final EffectiveRange[] ranges; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public RangeCollectorManager(LongMultiValuesSource valuesSource, EffectiveRange[] ranges, + CollectorKey[] keys, CollectorManager[] managers) { + this.valuesSource = valuesSource; + this.ranges = ranges; + this.keys = keys; + this.managers = managers; + } + + @Override + public RangeCollector newCollector() throws IOException { + Collector[][] collectors = new Collector[keys.length][]; + int index = 0; + for ( CollectorManager manager : managers ) { + Collector[] c = new Collector[ranges.length]; + collectors[index] = c; + for ( int j = 0; j < c.length; j++ ) { + c[j] = manager.newCollector(); + } + index++; + } + return new RangeCollector( valuesSource, ranges, collectors, keys ); + } + + @Override + public RangeResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return RangeResults.EMPTY; + } + RangeResults results = new RangeResults( keys, managers, ranges.length ); + for ( RangeCollector collector : collection ) { + results.add( collector.collectors() ); + } + return results; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java new file mode 100644 index 00000000000..c54c5592554 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/RangeResults.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class RangeResults { + + @SuppressWarnings("unchecked") + static final RangeResults EMPTY = new RangeResults( new CollectorKey[0], new CollectorManager[0], 0 ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final List[][] buckets; + + @SuppressWarnings("unchecked") + RangeResults(CollectorKey[] collectorKeys, CollectorManager[] managers, int ranges) { + this.collectorKeys = collectorKeys; + this.managers = managers; + this.buckets = new List[managers.length][]; + for ( int i = 0; i < buckets.length; i++ ) { + buckets[i] = new List[ranges]; + for ( int j = 0; j < buckets[i].length; j++ ) { + buckets[i][j] = new ArrayList<>(); + } + } + } + + void add(Collector[][] collectors) { + for ( int collectorIndex = 0; collectorIndex < collectors.length; collectorIndex++ ) { + for ( int rangeIndex = 0; rangeIndex < collectors[collectorIndex].length; rangeIndex++ ) { + buckets[collectorIndex][rangeIndex].add( collectors[collectorIndex][rangeIndex] ); + } + } + } + + public List[][] buckets() { + return buckets; + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java new file mode 100644 index 00000000000..24cce336229 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermCollectorSegmentValue.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.LeafCollector; + +class TermCollectorSegmentValue { + final Collector[] collectors; + final LeafCollector[] leafCollectors; + long count = 0L; + + TermCollectorSegmentValue(CollectorManager[] managers, LeafReaderContext leafReaderContext) + throws IOException { + this.collectors = new Collector[managers.length]; + this.leafCollectors = new LeafCollector[managers.length]; + for ( int i = 0; i < managers.length; i++ ) { + collectors[i] = managers[i].newCollector(); + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } + + void collect(int doc) throws IOException { + count++; + for ( LeafCollector collector : leafCollectors ) { + collector.collect( doc ); + } + } + + void resetLeafCollectors(LeafReaderContext leafReaderContext) throws IOException { + for ( int i = 0; i < leafCollectors.length; i++ ) { + leafCollectors[i] = collectors[i].getLeafCollector( leafReaderContext ); + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java new file mode 100644 index 00000000000..2f8e517a1ef --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TermResults.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; + +import org.hibernate.search.backend.lucene.types.aggregation.impl.BucketOrder; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.procedures.LongObjectProcedure; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.util.PriorityQueue; + +public class TermResults { + + @SuppressWarnings("unchecked") + static final TermResults EMPTY = new TermResults( new CollectorKey[0], new CollectorManager[0] ); + + private final CollectorKey[] collectorKeys; + private final CollectorManager[] managers; + + private final LongObjectHashMap buckets = new LongObjectHashMap<>(); + + TermResults(CollectorKey[] collectorKeys, CollectorManager[] managers) { + this.collectorKeys = collectorKeys; + this.managers = managers; + } + + public List counts(BucketOrder order, int topN, int minDocCount) { + int size = Math.min( topN, buckets.size() ); + PriorityQueue pq = new HibernateSearchBucketOrderQueue( order, size ); + + buckets.forEach( (LongObjectProcedure) (key, value) -> { + if ( value.count >= minDocCount ) { + pq.insertWithOverflow( value ); + } + } ); + + List results = new LinkedList<>(); + while ( pq.size() != 0 ) { + LongBucket popped = pq.pop(); + results.add( 0, popped ); + } + + return results; + } + + void add(LongObjectHashMap segmentValues) { + for ( var segment : segmentValues ) { + LongBucket bucket = buckets.get( segment.key ); + if ( bucket == null ) { + bucket = new LongBucket( segment.key, segment.value.collectors, segment.value.count ); + buckets.put( segment.key, bucket ); + } + else { + bucket.add( segment.value.collectors, segment.value.count ); + } + } + } + + public void merge(LongObjectHashMap values) { + for ( var toadd : values ) { + LongBucket bucket = buckets.get( toadd.key ); + if ( bucket == null ) { + bucket = new LongBucket( toadd.key, toadd.value.collectors, toadd.value.count ); + buckets.put( toadd.key, bucket ); + } + else { + bucket.add( toadd.value ); + } + } + } + + public CollectorKey[] collectorKeys() { + return collectorKeys; + } + + public CollectorManager[] collectorManagers() { + return managers; + } + + private static class HibernateSearchBucketOrderQueue extends PriorityQueue { + private final Comparator comparator; + + public HibernateSearchBucketOrderQueue(BucketOrder order, int maxSize) { + super( maxSize ); + this.comparator = order.toLongBucketComparator(); + } + + @Override + protected boolean lessThan(LongBucket t1, LongBucket t2) { + return comparator.compare( t1, t2 ) > 0; + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java new file mode 100644 index 00000000000..de1dd651aad --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollector.java @@ -0,0 +1,126 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; +import org.hibernate.search.backend.lucene.types.aggregation.impl.LongBucket; + +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.cursors.LongObjectCursor; + +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.SimpleCollector; + +public class TextTermsCollector extends SimpleCollector implements BaseTermsCollector { + + private final LongHashSet uniqueLeafIndicesForDocument = new LongHashSet(); + + private final TextMultiValuesSource valuesSource; + private final LongObjectHashMap hashValues = new LongObjectHashMap<>(); + private final LongObjectHashMap segmentValues = new LongObjectHashMap<>(); + private final String field; + private SortedSetDocValues sortedSetValues; + + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + private TextMultiValues values; + private LeafReaderContext leafReaderContext; + + public TextTermsCollector(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.field = field; + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public void collect(int doc) throws IOException { + if ( values.advanceExact( doc ) ) { + // or just recreate it on each document? + uniqueLeafIndicesForDocument.clear(); + + while ( values.hasNextValue() ) { + // Each document must be counted only once per range. + long value = values.nextOrd(); + if ( uniqueLeafIndicesForDocument.add( value ) ) { + TermCollectorSegmentValue segmentValue = segmentValues.get( value ); + if ( segmentValue == null ) { + segmentValue = new TermCollectorSegmentValue( managers, leafReaderContext ); + segmentValues.put( value, segmentValue ); + } + segmentValue.collect( doc ); + } + } + } + } + + @Override + public CollectorKey[] keys() { + return keys; + } + + @Override + public CollectorManager[] managers() { + return managers; + } + + LongObjectHashMap segmentValues() { + return hashValues; + } + + @Override + public ScoreMode scoreMode() { + return ScoreMode.COMPLETE_NO_SCORES; + } + + @Override + protected void doSetNextReader(LeafReaderContext context) throws IOException { + initRootSortedSetDocValues( context ); + this.values = valuesSource.getValues( context ); + leafReaderContext = context; + } + + @Override + public void finish() throws IOException { + for ( LongObjectCursor value : segmentValues ) { + long globalOrd = sortedSetValues.lookupTerm( values.lookupOrd( value.key ) ); + LongBucket bucket = hashValues.get( globalOrd ); + if ( bucket == null ) { + bucket = new LongBucket( globalOrd, value.value.collectors, value.value.count ); + hashValues.put( globalOrd, bucket ); + } + else { + bucket.count += value.value.count; + for ( int i = 0; i < bucket.collectors.length; i++ ) { + bucket.collectors[i].add( value.value.collectors[i] ); + } + } + } + this.values = null; + this.segmentValues.clear(); + } + + private void initRootSortedSetDocValues(IndexReaderContext ctx) throws IOException { + if ( sortedSetValues != null || ctx == null ) { + return; + } + if ( ctx.isTopLevel ) { + this.sortedSetValues = MultiDocValues.getSortedSetValues( ctx.reader(), field ); + } + initRootSortedSetDocValues( ctx.parent ); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java new file mode 100644 index 00000000000..7e95c8e270b --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorFactory.java @@ -0,0 +1,53 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.io.IOException; +import java.util.List; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorFactory + implements CollectorFactory { + + public static CollectorFactory instance( + String field, TextMultiValuesSource valuesSource, List> collectorFactories) { + return new TextTermsCollectorFactory( field, valuesSource, collectorFactories ); + } + + public final CollectorKey key = CollectorKey.create(); + private final TextMultiValuesSource valuesSource; + private final String field; + private final List> collectorFactories; + + public TextTermsCollectorFactory(String field, TextMultiValuesSource valuesSource, + List> collectorFactories) { + this.field = field; + this.valuesSource = valuesSource; + this.collectorFactories = collectorFactories; + } + + @SuppressWarnings({ "unchecked" }) + @Override + public TextTermsCollectorManager createCollectorManager(CollectorExecutionContext context) throws IOException { + CollectorKey[] keys = new CollectorKey[collectorFactories.size()]; + var managers = new CollectorManager[collectorFactories.size()]; + int index = 0; + for ( CollectorFactory factory : collectorFactories ) { + keys[index] = factory.getCollectorKey(); + CollectorManager collectorManager = factory.createCollectorManager( context ); + managers[index] = collectorManager; + index++; + } + return new TextTermsCollectorManager( field, valuesSource, keys, managers ); + } + + @Override + public CollectorKey getCollectorKey() { + return key; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java new file mode 100644 index 00000000000..37115c1d145 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/collector/impl/TextTermsCollectorManager.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.lowlevel.collector.impl; + +import java.util.Collection; + +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +public class TextTermsCollectorManager + implements CollectorManager { + + private final TextMultiValuesSource valuesSource; + private final String field; + private final CollectorKey[] keys; + private final CollectorManager[] managers; + + public TextTermsCollectorManager(String field, TextMultiValuesSource valuesSource, + CollectorKey[] keys, CollectorManager[] managers) { + this.field = field; + this.valuesSource = valuesSource; + this.keys = keys; + this.managers = managers; + } + + @Override + public TextTermsCollector newCollector() { + return new TextTermsCollector( field, valuesSource, keys, managers ); + } + + @Override + public TermResults reduce(Collection collection) { + if ( collection.isEmpty() ) { + return TermResults.EMPTY; + } + TermResults results = new TermResults( keys, managers ); + for ( TextTermsCollector collector : collection ) { + results.merge( collector.segmentValues() ); + } + return results; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java index 1414e8d59a5..0f652963463 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/LongMultiValuesToSingleValuesSource.java @@ -106,7 +106,7 @@ public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws I * @param ctx the ctx * @param scores the scores * @return NumericDocValues - * @throws java.io.IOException + * @throws IOException */ public NumericDocValues getRawNumericDocValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return new RawNumericDocValues( getValues( ctx, scores ) ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java index d4087aaf4f1..8d2e4086645 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/SortedNumericDoubleDocValues.java @@ -37,7 +37,7 @@ protected SortedNumericDoubleDocValues() { * * @param target the target * @return the next value - * @throws java.io.IOException + * @throws IOException */ public abstract boolean advanceExact(int target) throws IOException; @@ -46,7 +46,7 @@ protected SortedNumericDoubleDocValues() { * {@link #docValueCount} times for the document. * * @return next value - * @throws java.io.IOException + * @throws IOException */ public abstract double nextValue() throws IOException; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java index 0c50566780d..0749fff4f99 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValues.java @@ -7,6 +7,7 @@ import java.io.IOException; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; /** * A per-document, unordered sequence of text ordinals. @@ -46,6 +47,8 @@ protected TextMultiValues() { */ public abstract long nextOrd() throws IOException; + public abstract BytesRef lookupOrd(long ord) throws IOException; + /** * Returns the number of unique values. * @return number of unique values in this SortedDocValues. This is @@ -77,6 +80,11 @@ public long nextOrd() { throw new UnsupportedOperationException(); } + @Override + public BytesRef lookupOrd(long ord) { + throw new UnsupportedOperationException(); + } + @Override public long getValueCount() { return 0; @@ -113,6 +121,11 @@ public long nextOrd() throws IOException { return values.nextOrd(); } + @Override + public BytesRef lookupOrd(long ord) throws IOException { + return values.lookupOrd( ord ); + } + @Override public long getValueCount() { return values.getValueCount(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java index 2fc3bfc90f6..b75253df044 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/docvalues/impl/TextMultiValuesToSingleValuesSource.java @@ -17,7 +17,7 @@ import org.apache.lucene.util.BytesRef; /** - * A source of {@link org.apache.lucene.index.SortedDocValues} (text doc values) with multiple values per document, + * A source of {@link SortedDocValues} (text doc values) with multiple values per document, * where multiple values are "aggregated" into a single value * according to a given {@link MultiValueMode}. *

@@ -136,15 +136,14 @@ public boolean advanceExact(int parentDoc) throws IOException { if ( parentDoc == lastSeenParentDoc ) { return result; } - lastSeenParentDoc = parentDoc; + if ( !childDocsWithValues.advanceExactParent( parentDoc ) ) { // No child of this parent has a value result = false; return false; } - lastSeenParentDoc = parentDoc; lastEmittedOrd = (int) mode.pick( values, childDocsWithValues ); result = true; return true; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java deleted file mode 100644 index 1a62ef39013..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/FacetCountsUtils.java +++ /dev/null @@ -1,56 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.Collection; -import java.util.function.ToLongFunction; - -import org.hibernate.search.util.common.data.Range; -import org.hibernate.search.util.common.data.RangeBoundInclusion; - -import org.apache.lucene.facet.range.LongRange; - -public class FacetCountsUtils { - - private FacetCountsUtils() { - } - - public static < - T extends Number> LongRange[] createLongRangesForIntegralValues(Collection> ranges) { - return createLongRanges( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); - } - - public static LongRange[] createLongRangesForFloatingPointValues(Collection> ranges, - ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { - return createLongRanges( ranges, encoder, negativeInfinity, positiveInfinity, true ); - } - - private static LongRange[] createLongRanges(Collection> ranges, - ToLongFunction encoder, - T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { - LongRange[] longRanges = new LongRange[ranges.size()]; - int i = 0; - for ( Range range : ranges ) { - T lowerBoundValue = range.lowerBoundValue().orElse( null ); - T upperBoundValue = range.upperBoundValue().orElse( null ); - longRanges[i] = new LongRange( - String.valueOf( i ), - encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ), - // The lower bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.lowerBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && lowerBoundValue == null, - encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ), - // The upper bound is included if it is explicitly included - RangeBoundInclusion.INCLUDED.equals( range.upperBoundInclusion() ) - // ... or if it is infinity but infinity cannot be represented - || !extremaAreInfinity && upperBoundValue == null - ); - ++i; - } - return longRanges; - } - -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java deleted file mode 100644 index f6b1ca94c25..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueFacetCounts.java +++ /dev/null @@ -1,194 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.LongIntCursor; -import com.carrotsearch.hppc.procedures.LongProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.PriorityQueue; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.LongValueFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueFacetCounts extends Facets { - - private final int[] counts = new int[1024]; - - private final LongIntMap hashCounts = new LongIntHashMap(); - - private final String field; - - private int totCount; - - public LongMultiValueFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits) throws IOException { - this.field = field; - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongHashSet uniqueValuesForDocument = new LongHashSet(); - LongProcedure incrementCountForDocumentId = this::increment; - - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context() ); - - DocIdSetIterator docs = hits.bits().iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( fv.advanceExact( doc ) ) { - totCount++; - while ( fv.hasNextValue() ) { - // Each document must be counted only once per value. - uniqueValuesForDocument.add( fv.nextValue() ); - } - - uniqueValuesForDocument.forEach( incrementCountForDocumentId ); - uniqueValuesForDocument.clear(); - } - } - } - } - - private void increment(long value) { - if ( value >= 0 && value < counts.length ) { - counts[(int) value]++; - } - else { - hashCounts.addTo( value, 1 ); - } - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private static class Entry { - int count; - long value; - } - - public FacetResult getTopChildrenSortByCount(int topN) { - PriorityQueue pq = new PriorityQueue( Math.min( topN, counts.length + hashCounts.size() ) ) { - @Override - protected boolean lessThan(Entry a, Entry b) { - // sort by count descending, breaking ties by value ascending: - return a.count < b.count || ( a.count == b.count && a.value > b.value ); - } - }; - - int childCount = 0; - Entry e = null; - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - childCount++; - if ( e == null ) { - e = new Entry(); - } - e.value = i; - e.count = counts[i]; - e = pq.insertWithOverflow( e ); - } - } - - if ( hashCounts.size() != 0 ) { - childCount += hashCounts.size(); - for ( LongIntCursor c : hashCounts ) { - int count = c.value; - if ( count != 0 ) { - e = insertEntry( pq, e, c, count ); - } - } - } - - LabelAndValue[] results = new LabelAndValue[pq.size()]; - while ( pq.size() != 0 ) { - Entry entry = pq.pop(); - results[pq.size()] = new LabelAndValue( Long.toString( entry.value ), entry.count ); - } - - return new FacetResult( field, new String[0], totCount, results, childCount ); - } - - private Entry insertEntry(PriorityQueue pq, - Entry e, LongIntCursor c, int count) { - if ( e == null ) { - e = new Entry(); - } - e.value = c.key; - e.count = count; - e = pq.insertWithOverflow( e ); - return e; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "LongValueFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < counts.length; i++ ) { - if ( counts[i] != 0 ) { - b.append( " " ); - b.append( i ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - } - - if ( hashCounts.size() != 0 ) { - for ( LongIntCursor c : hashCounts ) { - if ( c.value != 0 ) { - b.append( " " ); - b.append( c.key ); - b.append( " -> count=" ); - b.append( c.value ); - b.append( '\n' ); - } - } - } - - return b.toString(); - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java deleted file mode 100644 index b5f8673b46f..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeCounter.java +++ /dev/null @@ -1,293 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.LongArrayList; -import com.carrotsearch.hppc.LongIntHashMap; -import com.carrotsearch.hppc.LongIntMap; -import com.carrotsearch.hppc.cursors.IntCursor; - -import org.apache.lucene.facet.range.LongRange; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeCounter} - * of Apache Lucene project. - */ -class LongMultiValueRangeCounter { - - final LongRangeNode root; - final long[] boundaries; - final int[] leafCounts; - - // Used during rollup - private int leafUpto; - private int missingCount; - - public LongMultiValueRangeCounter(LongRange[] ranges) { - // Maps all range inclusive endpoints to int flags; 1 - // = start of interval, 2 = end of interval. We need to - // track the start vs end case separately because if a - // given point is both, then it must be its own - // elementary interval: - LongIntMap endsMap = new LongIntHashMap(); - - endsMap.put( Long.MIN_VALUE, 1 ); - endsMap.put( Long.MAX_VALUE, 2 ); - - for ( LongRange range : ranges ) { - int cur = endsMap.get( range.min ); - if ( cur == 0 ) { - endsMap.put( range.min, 1 ); - } - else { - endsMap.put( range.min, cur | 1 ); - } - cur = endsMap.get( range.max ); - if ( cur == 0 ) { - endsMap.put( range.max, 2 ); - } - else { - endsMap.put( range.max, cur | 2 ); - } - } - - LongArrayList endsList = new LongArrayList( endsMap.keys() ); - Arrays.sort( endsList.buffer, 0, endsList.elementsCount ); - - // Build elementaryIntervals (a 1D Venn diagram): - List elementaryIntervals = new ArrayList<>(); - int upto0 = 1; - long v = endsList.get( 0 ); - long prev; - if ( endsMap.get( v ) == 3 ) { - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else { - prev = v; - } - - while ( upto0 < endsList.size() ) { - v = endsList.get( upto0 ); - int flags = endsMap.get( v ); - if ( flags == 3 ) { - // This point is both an end and a start; we need to - // separate it: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - elementaryIntervals.add( new InclusiveRange( v, v ) ); - prev = v + 1; - } - else if ( flags == 1 ) { - // This point is only the start of an interval; - // attach it to next interval: - if ( v > prev ) { - elementaryIntervals.add( new InclusiveRange( prev, v - 1 ) ); - } - prev = v; - } - else { - assert flags == 2; - // This point is only the end of an interval; attach - // it to last interval: - elementaryIntervals.add( new InclusiveRange( prev, v ) ); - prev = v + 1; - } - upto0++; - } - - // Build binary tree on top of intervals: - root = split( 0, elementaryIntervals.size(), elementaryIntervals ); - - // Set outputs, so we know which range to output for - // each node in the tree: - for ( int i = 0; i < ranges.length; i++ ) { - root.addOutputs( i, ranges[i] ); - } - - // Set boundaries (ends of each elementary interval): - boundaries = new long[elementaryIntervals.size()]; - for ( int i = 0; i < boundaries.length; i++ ) { - boundaries[i] = elementaryIntervals.get( i ).end; - } - - leafCounts = new int[boundaries.length]; - } - - public void incrementCountForLeafWithIndex(int index) { - leafCounts[index]++; - } - - public int findLeafIndex(long v) { - // Binary search to find matched elementary range; we - // are guaranteed to find a match because the last - // boundary is Long.MAX_VALUE: - int lo = 0; - int hi = boundaries.length - 1; - while ( true ) { - int mid = ( lo + hi ) >>> 1; - if ( v <= boundaries[mid] ) { - if ( mid == 0 ) { - return 0; - } - else { - hi = mid - 1; - } - } - else if ( v > boundaries[mid + 1] ) { - lo = mid + 1; - } - else { - return mid + 1; - } - } - } - - /** Fills counts corresponding to the original input - * ranges, returning the missing count (how many hits - * didn't match any ranges). */ - public int fillCounts(int[] counts) { - missingCount = 0; - leafUpto = 0; - rollup( root, counts, false ); - return missingCount; - } - - private int rollup(LongRangeNode node, int[] counts, boolean sawOutputs) { - int count; - sawOutputs |= node.outputs != null; - if ( node.left != null ) { - count = rollup( node.left, counts, sawOutputs ); - count += rollup( node.right, counts, sawOutputs ); - } - else { - // Leaf: - count = leafCounts[leafUpto]; - leafUpto++; - if ( !sawOutputs ) { - // This is a missing count (no output ranges were - // seen "above" us): - missingCount += count; - } - } - if ( node.outputs != null ) { - for ( IntCursor rangeIndexCursor : node.outputs ) { - counts[rangeIndexCursor.value] += count; - } - } - return count; - } - - private static LongRangeNode split(int start, int end, List elementaryIntervals) { - if ( start == end - 1 ) { - // leaf - InclusiveRange range = elementaryIntervals.get( start ); - return new LongRangeNode( range.start, range.end, null, null ); - } - else { - int mid = ( start + end ) >>> 1; - LongRangeNode left = split( start, mid, elementaryIntervals ); - LongRangeNode right = split( mid, end, elementaryIntervals ); - return new LongRangeNode( left.start, right.end, left, right ); - } - } - - private static final class InclusiveRange { - public final long start; - public final long end; - - public InclusiveRange(long start, long end) { - assert end >= start; - this.start = start; - this.end = end; - } - - @Override - public String toString() { - return start + " to " + end; - } - } - - /** Holds one node of the segment tree. */ - public static final class LongRangeNode { - final LongRangeNode left; - final LongRangeNode right; - - // Our range, inclusive: - final long start; - final long end; - - // Which range indices to output when a query goes - // through this node: - IntArrayList outputs; - - public LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) { - this.start = start; - this.end = end; - this.left = left; - this.right = right; - } - - @Override - public String toString() { - StringBuilder sb = new StringBuilder(); - toString( sb, 0 ); - return sb.toString(); - } - - static void indent(StringBuilder sb, int depth) { - for ( int i = 0; i < depth; i++ ) { - sb.append( " " ); - } - } - - /** Recursively assigns range outputs to each node. */ - void addOutputs(int index, LongRange range) { - if ( start >= range.min && end <= range.max ) { - // Our range is fully included in the incoming - // range; add to our output list: - if ( outputs == null ) { - outputs = new IntArrayList(); - } - outputs.add( index ); - } - else if ( left != null ) { - assert right != null; - // Recurse: - left.addOutputs( index, range ); - right.addOutputs( index, range ); - } - } - - void toString(StringBuilder sb, int depth) { - indent( sb, depth ); - if ( left == null ) { - assert right == null; - sb.append( "leaf: " ).append( start ).append( " to " ).append( end ); - } - else { - sb.append( "node: " ).append( start ).append( " to " ).append( end ); - } - if ( outputs != null ) { - sb.append( " outputs=" ); - sb.append( outputs ); - } - sb.append( '\n' ); - - if ( left != null ) { - assert right != null; - left.toString( sb, depth + 1 ); - right.toString( sb, depth + 1 ); - } - } - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java deleted file mode 100644 index a1f34798ab8..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/LongMultiValueRangeFacetCounts.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.range.LongRange; -import org.apache.lucene.index.IndexReaderContext; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Weight; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.LongRangeFacetCounts} - * of Apache Lucene project. - */ -public class LongMultiValueRangeFacetCounts extends MultiValueRangeFacetCounts { - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - LongRange... ranges) - throws IOException { - this( field, valueSource, hits, null, ranges ); - } - - public LongMultiValueRangeFacetCounts(String field, LongMultiValuesSource valueSource, FacetsCollector hits, - Query fastMatchQuery, LongRange... ranges) - throws IOException { - super( field, ranges, fastMatchQuery ); - count( valueSource, hits.getMatchingDocs() ); - } - - private void count(LongMultiValuesSource valueSource, List matchingDocs) throws IOException { - LongRange[] longRanges = (LongRange[]) this.ranges; - - IntHashSet uniqueLeafIndicesForDocument = new IntHashSet(); - LongMultiValueRangeCounter counter = new LongMultiValueRangeCounter( longRanges ); - IntProcedure incrementCountForLeafWithIndex = counter::incrementCountForLeafWithIndex; - - int missingCount = 0; - for ( FacetsCollector.MatchingDocs hits : matchingDocs ) { - LongMultiValues fv = valueSource.getValues( hits.context() ); - - totCount += hits.totalHits(); - final DocIdSetIterator fastMatchDocs; - if ( fastMatchQuery != null ) { - final IndexReaderContext topLevelContext = ReaderUtil.getTopLevelContext( hits.context() ); - final IndexSearcher searcher = new IndexSearcher( topLevelContext ); - searcher.setQueryCache( null ); - final Weight fastMatchWeight = - searcher.createWeight( searcher.rewrite( fastMatchQuery ), ScoreMode.COMPLETE_NO_SCORES, 1 ); - Scorer s = fastMatchWeight.scorer( hits.context() ); - if ( s == null ) { - continue; - } - fastMatchDocs = s.iterator(); - } - else { - fastMatchDocs = null; - } - - DocIdSetIterator docs = hits.bits().iterator(); - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; ) { - if ( fastMatchDocs != null ) { - int fastMatchDoc = fastMatchDocs.docID(); - if ( fastMatchDoc < doc ) { - fastMatchDoc = fastMatchDocs.advance( doc ); - } - - if ( doc != fastMatchDoc ) { - doc = docs.advance( fastMatchDoc ); - continue; - } - } - - if ( fv.advanceExact( doc ) ) { - while ( fv.hasNextValue() ) { - // Each document must be counted only once per range. - int leafIndex = counter.findLeafIndex( fv.nextValue() ); - uniqueLeafIndicesForDocument.add( leafIndex ); - } - - uniqueLeafIndicesForDocument.forEach( incrementCountForLeafWithIndex ); - uniqueLeafIndicesForDocument.clear(); - } - else { - missingCount++; - } - - doc = docs.nextDoc(); - } - } - - int x = counter.fillCounts( counts ); - - missingCount += x; - - totCount -= missingCount; - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java deleted file mode 100644 index 43cf7269294..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/MultiValueRangeFacetCounts.java +++ /dev/null @@ -1,83 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.range.Range; -import org.apache.lucene.search.Query; - -/** - *

- * Copied with some changes from {@code org.apache.lucene.facet.range.RangeFacetCounts} - * of Apache Lucene project. - */ -public class MultiValueRangeFacetCounts extends Facets { - - protected final Range[] ranges; - protected final int[] counts; - protected final Query fastMatchQuery; - protected final String field; - protected int totCount; - - protected MultiValueRangeFacetCounts(String field, Range[] ranges, Query fastMatchQuery) { - this.field = field; - this.ranges = ranges; - this.fastMatchQuery = fastMatchQuery; - counts = new int[ranges.length]; - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) { - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - LabelAndValue[] labelValues = new LabelAndValue[counts.length]; - for ( int i = 0; i < counts.length; i++ ) { - labelValues[i] = new LabelAndValue( ranges[i].label, counts[i] ); - } - return new FacetResult( dim, path, totCount, labelValues, labelValues.length ); - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - @Override - public String toString() { - StringBuilder b = new StringBuilder(); - b.append( "MultiValueRangeFacetCounts totCount=" ); - b.append( totCount ); - b.append( ":\n" ); - for ( int i = 0; i < ranges.length; i++ ) { - b.append( " " ); - b.append( ranges[i].label ); - b.append( " -> count=" ); - b.append( counts[i] ); - b.append( '\n' ); - } - return b.toString(); - } -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java deleted file mode 100644 index 89db37ffcd3..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/lowlevel/facet/impl/TextMultiValueFacetCounts.java +++ /dev/null @@ -1,275 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.lowlevel.facet.impl; - -import java.io.IOException; -import java.util.Collections; -import java.util.List; - -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValues; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.TextMultiValuesSource; - -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.procedures.IntProcedure; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.facet.TopOrdAndIntQueue; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiDocValues; -import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.OrdinalMap; -import org.apache.lucene.index.ReaderUtil; -import org.apache.lucene.index.SortedSetDocValues; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.LongValues; -import org.apache.lucene.util.PriorityQueue; - -/** - * Copied with some changes from {@code org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts} - * of Apache Lucene project. - */ -public class TextMultiValueFacetCounts extends Facets { - - final SortedSetDocValues dv; - final String field; - final int ordCount; - final int[] counts; - - public TextMultiValueFacetCounts(IndexReader reader, String field, TextMultiValuesSource valuesSource, FacetsCollector hits) - throws IOException { - this.field = field; - dv = MultiDocValues.getSortedSetValues( reader, field ); - if ( dv != null && dv.getValueCount() > Integer.MAX_VALUE ) { - // We may want to remove this limitation? - // Note that DefaultSortedSetDocValuesReaderState has the same limitation, - // so this is no worse than the "legacy" facets from Search 5. - throw new IllegalStateException( - "Cannot aggregate when more than " + Integer.MAX_VALUE + " terms are indexed" ); - } - ordCount = dv == null ? 0 : (int) dv.getValueCount(); - counts = new int[ordCount]; - count( reader, valuesSource, hits.getMatchingDocs() ); - } - - @Override - public FacetResult getAllChildren(String dim, String... path) { - throw new UnsupportedOperationException( - "Getting all children is not supported by " + this.getClass().getSimpleName() ); - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - if ( topN <= 0 ) { - throw new IllegalArgumentException( "topN must be > 0 (got: " + topN + ")" ); - } - if ( !dim.equals( field ) ) { - throw new IllegalArgumentException( "invalid dim \"" + dim + "\"; should be \"" + field + "\"" ); - } - if ( path.length != 0 ) { - throw new IllegalArgumentException( "path.length should be 0" ); - } - return getTopChildrenSortByCount( topN ); - } - - private FacetResult getTopChildrenSortByCount(int topN) throws IOException { - if ( topN > ordCount ) { - // HSEARCH-4544 Avoid OutOfMemoryError when passing crazy high topN values - // We know there will never be more than "ordCount" values anyway. - topN = ordCount; - } - - HibernateSearchTopOrdAndIntQueue q = null; - - int bottomCount = 0; - - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndInt reuse = null; - - for ( int ord = 0; ord < ordCount; ord++ ) { - if ( counts[ord] > 0 ) { - totCount += counts[ord]; - childCount++; - if ( counts[ord] > bottomCount ) { - if ( reuse == null ) { - reuse = new TopOrdAndIntQueue.OrdAndInt(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - if ( q == null ) { - // Lazy init, so we don't create this for the - // sparse case unnecessarily - q = new HibernateSearchTopOrdAndIntQueue( topN ); - } - reuse = q.insertWithOverflow( reuse ); - if ( q.size() == topN ) { - bottomCount = ( q.top() ).value; - } - } - } - } - - if ( q == null ) { - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for ( int i = labelValues.length - 1; i >= 0; i-- ) { - TopOrdAndIntQueue.OrdAndInt ordAndValue = q.pop(); - final BytesRef term = dv.lookupOrd( ordAndValue.ord ); - labelValues[i] = new LabelAndValue( term.utf8ToString(), ordAndValue.value ); - } - - return new FacetResult( field, new String[0], totCount, labelValues, childCount ); - } - - private void countOneSegment(OrdinalMap ordinalMap, TextMultiValues segValues, int segOrd, MatchingDocs hits) - throws IOException { - if ( segValues == null ) { - // nothing to count - return; - } - IntHashSet uniqueOrdinalsForDocument = new IntHashSet(); - - DocIdSetIterator docs = hits.bits().iterator(); - - // TODO: yet another option is to count all segs - // first, only in seg-ord space, and then do a - // merge-sort-PQ in the end to only "resolve to - // global" those seg ords that can compete, if we know - // we just want top K? ie, this is the same algo - // that'd be used for merging facets across shards - // (distributed faceting). but this has much higher - // temp ram req'ts (sum of number of ords across all - // segs) - if ( ordinalMap != null ) { - final LongValues ordMap = ordinalMap.getGlobalOrds( segOrd ); - - int numSegOrds = (int) segValues.getValueCount(); - - if ( hits.totalHits() < numSegOrds / 10 ) { - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - // Remap every ord to global ord as we iterate: - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - int globalOrd = (int) ordMap.get( term ); - uniqueOrdinalsForDocument.add( globalOrd ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - else { - // First count in seg-ord space: - final int[] segCounts = new int[numSegOrds]; - IntProcedure incrementCountForOrdinal = ord -> segCounts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - - // Then, migrate to global ords: - for ( int ord = 0; ord < numSegOrds; ord++ ) { - int count = segCounts[ord]; - if ( count != 0 ) { - counts[(int) ordMap.get( ord )] += count; - } - } - } - } - else { - // No ord mapping (e.g., single segment index): - // just aggregate directly into counts. - IntProcedure incrementCountForOrdinal = ord -> counts[ord]++; - for ( int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc() ) { - if ( !segValues.advanceExact( doc ) ) { - continue; // No value for this doc - } - while ( segValues.hasNextValue() ) { - int term = (int) segValues.nextOrd(); - uniqueOrdinalsForDocument.add( term ); - } - uniqueOrdinalsForDocument.forEach( incrementCountForOrdinal ); - uniqueOrdinalsForDocument.clear(); - } - } - } - - /** - * Does all the "real work" of tallying up the counts. - */ - private void count(IndexReader reader, TextMultiValuesSource valuesSource, List matchingDocs) - throws IOException { - OrdinalMap ordinalMap; - - // TODO: is this right? really, we need a way to - // verify that this ordinalMap "matches" the leaves in - // matchingDocs... - if ( dv instanceof MultiSortedSetDocValues && matchingDocs.size() > 1 ) { - ordinalMap = ( (MultiSortedSetDocValues) dv ).mapping; - } - else { - ordinalMap = null; - } - - for ( MatchingDocs hits : matchingDocs ) { - - // LUCENE-5090: make sure the provided reader context "matches" - // the top-level reader passed to the - // SortedSetDocValuesReaderState, else cryptic - // AIOOBE can happen: - if ( ReaderUtil.getTopLevelContext( hits.context() ).reader() != reader ) { - throw new IllegalStateException( - "the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader" ); - } - - countOneSegment( ordinalMap, valuesSource.getValues( hits.context() ), hits.context().ord, hits ); - } - } - - @Override - public Number getSpecificValue(String dim, String... path) { - throw new UnsupportedOperationException(); - } - - @Override - public List getAllDims(int topN) throws IOException { - return Collections.singletonList( getTopChildren( topN, field ) ); - } - - /** - * While there is a `TopOrdAndIntQueue` in Lucene, unfortunately it works with OrdAndValue objects (in API). - * And there's no access to the value, leading to casting any type value has to be accessed. Hence, this impl: - */ - private static class HibernateSearchTopOrdAndIntQueue extends PriorityQueue { - - public HibernateSearchTopOrdAndIntQueue(int maxSize) { - super( maxSize ); - } - - @Override - protected boolean lessThan(TopOrdAndIntQueue.OrdAndInt a, TopOrdAndIntQueue.OrdAndInt b) { - return a.lessThan( b ); - } - } - -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java index f8ad0b4fca4..edb3f90d4f3 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationExtractContext.java @@ -4,63 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Query; -public class AggregationExtractContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final IndexReader indexReader; - private final FromDocumentValueConvertContext fromDocumentValueConvertContext; - private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; - private final Set routingKeys; - private final QueryParameters parameters; - - public AggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - IndexReader indexReader, - FromDocumentValueConvertContext fromDocumentValueConvertContext, - HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.indexReader = indexReader; - this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; - this.multiCollectedResults = multiCollectedResults; - this.routingKeys = routingKeys; - this.parameters = parameters; - } - - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } +public interface AggregationExtractContext { + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public IndexReader getIndexReader() { - return indexReader; - } + IndexReader getIndexReader(); - public FromDocumentValueConvertContext fromDocumentValueConvertContext() { - return fromDocumentValueConvertContext; - } + FromDocumentValueConvertContext fromDocumentValueConvertContext(); - public T getFacets(CollectorKey key) { - return multiCollectedResults.get( key ); - } + T getCollectorResults(CollectorKey key); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java index b7b87fc573a..fbd865f17b7 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/AggregationRequestContext.java @@ -4,54 +4,23 @@ */ package org.hibernate.search.backend.lucene.search.aggregation.impl; -import java.util.Set; - import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; -import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; -import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; import org.hibernate.search.engine.search.common.NamedValues; -import org.hibernate.search.engine.search.query.spi.QueryParameters; import org.apache.lucene.search.Collector; import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.Query; -public final class AggregationRequestContext { - - private final LuceneSearchQueryIndexScope queryIndexScope; - private final BackendSessionContext sessionContext; - private final Set routingKeys; - private final ExtractionRequirements.Builder extractionRequirementsBuilder; - private final QueryParameters parameters; - - public AggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, BackendSessionContext sessionContext, - Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, - QueryParameters parameters) { - this.queryIndexScope = queryIndexScope; - this.sessionContext = sessionContext; - this.routingKeys = routingKeys; - this.extractionRequirementsBuilder = extractionRequirementsBuilder; - this.parameters = parameters; - } - - public > void requireCollector( - CollectorFactory collectorFactory) { - extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); - } +public interface AggregationRequestContext { + > void requireCollector( + CollectorFactory collectorFactory + ); - public NamedValues queryParameters() { - return parameters; - } + NamedValues queryParameters(); - public PredicateRequestContext toPredicateRequestContext(String absolutePath) { - return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) - .withNestedPath( absolutePath ); - } + PredicateRequestContext toPredicateRequestContext(String absolutePath); - public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { - return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); - } + NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java new file mode 100644 index 00000000000..c285af570d4 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationExtractContext.java @@ -0,0 +1,72 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.HibernateSearchMultiCollectorManager; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +public class RootAggregationExtractContext implements AggregationExtractContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final IndexReader indexReader; + private final FromDocumentValueConvertContext fromDocumentValueConvertContext; + private final HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults; + private final Set routingKeys; + private final QueryParameters parameters; + + public RootAggregationExtractContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + IndexReader indexReader, + FromDocumentValueConvertContext fromDocumentValueConvertContext, + HibernateSearchMultiCollectorManager.MultiCollectedResults multiCollectedResults, Set routingKeys, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.indexReader = indexReader; + this.fromDocumentValueConvertContext = fromDocumentValueConvertContext; + this.multiCollectedResults = multiCollectedResults; + this.routingKeys = routingKeys; + this.parameters = parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return indexReader; + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return fromDocumentValueConvertContext; + } + + @Override + public T getCollectorResults(CollectorKey key) { + return multiCollectedResults.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java new file mode 100644 index 00000000000..39e8fcd3bb6 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/aggregation/impl/RootAggregationRequestContext.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.search.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.backend.lucene.search.query.impl.LuceneSearchQueryIndexScope; +import org.hibernate.search.engine.backend.session.spi.BackendSessionContext; +import org.hibernate.search.engine.search.common.NamedValues; +import org.hibernate.search.engine.search.query.spi.QueryParameters; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +public final class RootAggregationRequestContext implements AggregationRequestContext { + + private final LuceneSearchQueryIndexScope queryIndexScope; + private final BackendSessionContext sessionContext; + private final Set routingKeys; + private final ExtractionRequirements.Builder extractionRequirementsBuilder; + private final QueryParameters parameters; + + public RootAggregationRequestContext(LuceneSearchQueryIndexScope queryIndexScope, + BackendSessionContext sessionContext, + Set routingKeys, ExtractionRequirements.Builder extractionRequirementsBuilder, + QueryParameters parameters) { + this.queryIndexScope = queryIndexScope; + this.sessionContext = sessionContext; + this.routingKeys = routingKeys; + this.extractionRequirementsBuilder = extractionRequirementsBuilder; + this.parameters = parameters; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + extractionRequirementsBuilder.requireCollectorForAllMatchingDocs( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return parameters; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return PredicateRequestContext.withSession( queryIndexScope, sessionContext, routingKeys, parameters ) + .withNestedPath( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return new NestedDocsProvider( nestedDocumentPath, nestedFilter ); + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java index a4a601f9488..220ca48d8cc 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/highlighter/impl/LuceneUnifiedSearchHighlighter.java @@ -166,7 +166,7 @@ public List highlight(int doc) throws IOException { private static class MultiValueUnifiedHighlighter extends UnifiedHighlighter { - private MultiValueUnifiedHighlighter(MultiValueUnifiedHighlighter.Builder builder) { + private MultiValueUnifiedHighlighter(Builder builder) { super( builder ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java index 570cc97fb71..61a8ba5850b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/predicate/impl/LuceneNamedPredicate.java @@ -131,7 +131,6 @@ private static class BasicBuilder extends Builder { this.definition = definition; } - @SuppressWarnings("unchecked") @Override public void factory(ExtendedSearchPredicateFactory factory) { this.factory = factory.withScopeRoot( NonStaticMetamodelScope.class ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java index c0ab5665c16..ac91547cefe 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneDistanceToFieldProjection.java @@ -196,7 +196,7 @@ public Builder create(LuceneSearchIndexScope scope, LuceneSearchIndexValueFie } } - public static class Builder extends AbstractLuceneProjection.AbstractBuilder + public static class Builder extends AbstractBuilder implements DistanceToFieldProjectionBuilder { private final LuceneFieldCodec codec; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java index 3cb1e06b5ea..373d86d1d7f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/projection/impl/LuceneFieldProjection.java @@ -86,7 +86,7 @@ public ValueFieldExtractor request(ProjectionRequestContext context) { /** * @param The type of the temporary storage for accumulated values, before and after being transformed. */ - private class ValueFieldExtractor implements LuceneSearchProjection.Extractor { + private class ValueFieldExtractor implements Extractor { private final String contextAbsoluteFieldPath; private final ProjectionCollector collector; @@ -193,7 +193,7 @@ private TypeSelector(LuceneFieldCodec codec, } } - private static class Builder extends AbstractLuceneProjection.AbstractBuilder + private static class Builder extends AbstractBuilder implements FieldProjectionBuilder { private final Function decodeFunction; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java index d3cafdfbc97..35eca043321 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/LuceneSearchQuery.java @@ -20,7 +20,7 @@ public interface LuceneSearchQuery * @param id The id of the entity whose score should be explained. * This is the entity ID, which may be of any type ({@code long}, ...), * not the document ID which is always a string. - * @return An {@link org.apache.lucene.search.Explanation} describing the score computation for the hit. + * @return An {@link Explanation} describing the score computation for the hit. * @throws org.hibernate.search.util.common.SearchException If the query targets multiple mapped types, * or if the explain request fails. */ @@ -35,14 +35,14 @@ public interface LuceneSearchQuery * @param id The id of the entity whose score should be explained. * This is the entity ID, which may be of any type ({@code long}, ...), * not the document ID which is always a string. - * @return An {@link org.apache.lucene.search.Explanation} describing the score computation for the hit. + * @return An {@link Explanation} describing the score computation for the hit. * @throws org.hibernate.search.util.common.SearchException If the given index name does not refer to a mapped name targeted by this query, * or if the explain request fails. */ Explanation explain(String typeName, Object id); /** - * @return The Lucene {@link org.apache.lucene.search.Sort} this query relies on. + * @return The Lucene {@link Sort} this query relies on. */ Sort luceneSort(); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java index 9b84c33d833..9a90f34eaba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneExtractableSearchResult.java @@ -15,6 +15,7 @@ import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TopDocsDataCollectorExecutionContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationExtractContext; import org.hibernate.search.backend.lucene.search.extraction.impl.LuceneCollectors; import org.hibernate.search.backend.lucene.search.projection.impl.LuceneSearchProjection; import org.hibernate.search.backend.lucene.search.projection.impl.ProjectionExtractContext; @@ -112,7 +113,7 @@ private List extractHits(ProjectionHitMapper projectionHitMapper, int } private Map, ?> extractAggregations() throws IOException { - AggregationExtractContext aggregationExtractContext = new AggregationExtractContext( + AggregationExtractContext aggregationExtractContext = new RootAggregationExtractContext( requestContext.getQueryIndexScope(), requestContext.getSessionContext(), indexSearcher.getIndexReader(), diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java index 6bcf85e9ac6..c5eaaee583f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/query/impl/LuceneSearchQueryBuilder.java @@ -20,6 +20,7 @@ import org.hibernate.search.backend.lucene.orchestration.impl.LuceneSyncWorkOrchestrator; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.aggregation.impl.RootAggregationRequestContext; import org.hibernate.search.backend.lucene.search.extraction.impl.ExtractionRequirements; import org.hibernate.search.backend.lucene.search.highlighter.impl.LuceneAbstractSearchHighlighter; import org.hibernate.search.backend.lucene.search.predicate.impl.LuceneSearchPredicate; @@ -42,7 +43,6 @@ import org.hibernate.search.engine.search.sort.SearchSort; import org.hibernate.search.engine.search.timeout.spi.TimeoutManager; -import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; @@ -214,7 +214,7 @@ public LuceneSearchQuery build() { Query filter = scope.filterOrNull( sessionContext.tenantIdentifier() ); if ( filter != null ) { - luceneQueryBuilder.add( filter, BooleanClause.Occur.FILTER ); + luceneQueryBuilder.add( filter, Occur.FILTER ); } Query definitiveLuceneQuery = luceneQueryBuilder.build(); @@ -266,7 +266,7 @@ public LuceneSearchQuery build() { if ( aggregations != null ) { aggregationExtractors = new LinkedHashMap<>(); AggregationRequestContext aggregationRequestContext = - new AggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, + new RootAggregationRequestContext( scope, sessionContext, routingKeys, extractionRequirementsBuilder, parameters ); for ( Map.Entry, LuceneSearchAggregation> entry : aggregations.entrySet() ) { aggregationExtractors.put( entry.getKey(), entry.getValue().request( aggregationRequestContext ) ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java index c04a9ef22e6..7a9e2adac8b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/search/sort/impl/LuceneSearchSortCollector.java @@ -11,7 +11,7 @@ *

* Used by Lucene-specific sort contributors. * - * @see LuceneSearchSort#toSortFields(org.hibernate.search.backend.lucene.search.sort.impl.LuceneSearchSortCollector) + * @see LuceneSearchSort#toSortFields(LuceneSearchSortCollector) */ public interface LuceneSearchSortCollector extends SortRequestContext { diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java deleted file mode 100644 index 566a92e567f..00000000000 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneFacetsBasedTermsAggregation.java +++ /dev/null @@ -1,223 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * Copyright Red Hat Inc. and Hibernate Authors - */ -package org.hibernate.search.backend.lucene.types.aggregation.impl; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Comparator; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; -import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; -import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; -import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; -import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; -import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; -import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; -import org.hibernate.search.engine.search.common.ValueModel; - -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; -import org.apache.lucene.facet.LabelAndValue; -import org.apache.lucene.index.IndexReader; - -/** - * @param The type of field values exposed to the mapper. - * @param The type of terms returned by the Lucene Facets. - * @param The type of keys in the returned map. It can be {@code F} - * or a different type if value converters are used. - */ -public abstract class AbstractLuceneFacetsBasedTermsAggregation - extends AbstractLuceneBucketAggregation { - - private final ProjectionConverter fromFieldValueConverter; - - private final BucketOrder order; - private final int maxTermCount; - private final int minDocCount; - - AbstractLuceneFacetsBasedTermsAggregation(AbstractBuilder builder) { - super( builder ); - this.fromFieldValueConverter = builder.fromFieldValueConverter; - this.order = builder.order; - this.maxTermCount = builder.maxTermCount; - this.minDocCount = builder.minDocCount; - } - - @Override - public Extractor> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); - - return extractor( context ); - } - - protected abstract Extractor> extractor(AggregationRequestContext context); - - protected abstract class AbstractExtractor implements Extractor> { - @Override - public final Map extract(AggregationExtractContext context) throws IOException { - FromDocumentValueConvertContext convertContext = context.fromDocumentValueConvertContext(); - - List> buckets = getTopBuckets( context ); - - if ( BucketOrder.COUNT_DESC.equals( order ) && ( minDocCount > 0 || buckets.size() >= maxTermCount ) ) { - /* - * Optimization: in this case, minDocCount and sorting can be safely ignored. - * We already have all the buckets we need, and they are already sorted. - */ - return toMap( convertContext, buckets ); - } - - if ( minDocCount <= 0 ) { - Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); - // If some of the first terms are already in non-zero buckets, ignore them in the next step - for ( Bucket bucket : buckets ) { - firstTerms.remove( bucket.term ); - } - // Complete the list of buckets with zero-count terms - for ( T term : firstTerms ) { - buckets.add( new Bucket<>( term, 0L ) ); - } - } - - // Sort the list of buckets and trim it if necessary (there may be more buckets than we want in some cases) - buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); - if ( buckets.size() > maxTermCount ) { - buckets.subList( maxTermCount, buckets.size() ).clear(); - } - - return toMap( convertContext, buckets ); - } - - abstract FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException; - - abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) - throws IOException; - - abstract Comparator getAscendingTermComparator(); - - abstract T labelToTerm(String label); - - abstract V termToFieldValue(T key); - - private List> getTopBuckets(AggregationExtractContext context) throws IOException { - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); - - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); - - /* - * TODO HSEARCH-3666 What if the sort order is by term value? - * Lucene returns facets in descending count order. - * If that's what we need, then we can ask Lucene to apply the "maxTermCount" limit directly. - * This is what we do here. - * But if we need a different sort, then having to retrieve the "top N" facets by document count - * becomes clearly sub-optimal: to properly implement this, we would need to retrieve - * *all* facets, and Lucene would allocate an array of Integer.MAX_VALUE elements. - * To improve on this, we would need to re-implement the facet collections. - */ - int limit = maxTermCount; - FacetResult facetResult = getTopChildren( context.getIndexReader(), facetsCollector, nestedDocsProvider, limit ); - - List> buckets = new ArrayList<>(); - - if ( facetResult != null ) { - // Add results for matching documents - for ( LabelAndValue labelAndValue : facetResult.labelValues ) { - long count = (Integer) labelAndValue.value; - if ( count >= minDocCount ) { - buckets.add( new Bucket<>( labelToTerm( labelAndValue.label ), count ) ); - } - } - } - - return buckets; - } - - private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { - Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering - for ( Bucket bucket : buckets ) { - V decoded = termToFieldValue( bucket.term ); - K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); - result.put( key, bucket.count ); - } - return result; - } - } - - abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { - protected final LuceneSearchIndexScope scope; - protected final LuceneSearchIndexValueFieldContext field; - - protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { - this.scope = scope; - this.field = field; - } - - @Override - public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); - } - - abstract static class AbstractBuilder - extends AbstractLuceneBucketAggregation.AbstractBuilder - implements TermsAggregationBuilder { - - private final ProjectionConverter fromFieldValueConverter; - - private BucketOrder order = BucketOrder.COUNT_DESC; - private int minDocCount = 1; - private int maxTermCount = 100; - - AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, - ProjectionConverter fromFieldValueConverter) { - super( scope, field ); - this.fromFieldValueConverter = fromFieldValueConverter; - } - - @Override - public void orderByCountDescending() { - order( BucketOrder.COUNT_DESC ); - } - - @Override - public void orderByCountAscending() { - order( BucketOrder.COUNT_ASC ); - } - - @Override - public void orderByTermAscending() { - order( BucketOrder.TERM_ASC ); - } - - @Override - public void orderByTermDescending() { - order( BucketOrder.TERM_DESC ); - } - - @Override - public void minDocumentCount(int minDocumentCount) { - this.minDocCount = minDocumentCount; - } - - @Override - public void maxTermCount(int maxTermCount) { - this.maxTermCount = maxTermCount; - } - - @Override - public abstract AbstractLuceneFacetsBasedTermsAggregation build(); - - protected final void order(BucketOrder order) { - this.order = order; - } - } - -} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java index bcdfae57cf0..5d0a866d193 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericFieldAggregation.java @@ -73,7 +73,7 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); E e = codec.getDomain().sortedDocValueToTerm( collector ); F decode = codec.decode( e ); @@ -111,7 +111,7 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @SuppressWarnings("unchecked") @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); + Long collector = context.getCollectorResults( collectorKey ); return (K) numericDomain.sortedDocValueToTerm( collector ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java index 0e0c326fe22..67c9fe5da43 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMetricNumericLongAggregation.java @@ -44,7 +44,7 @@ public Set indexNames() { private class LuceneNumericMetricLongAggregationExtraction implements Extractor { @Override public Long extract(AggregationExtractContext context) { - return context.getFacets( collectorKey ); + return context.getCollectorResults( collectorKey ); } } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java new file mode 100644 index 00000000000..306f6c64bbd --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/AbstractLuceneMultivaluedTermsAggregation.java @@ -0,0 +1,203 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.io.IOException; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; +import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; +import org.hibernate.search.engine.search.common.ValueModel; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; + +/** + * @param The type of field values exposed to the mapper. + * @param The type of terms returned by the Lucene Facets. + * @param The type of keys in the returned map. It can be {@code F} + * or a different type if value converters are used. + */ +public abstract class AbstractLuceneMultivaluedTermsAggregation + extends AbstractLuceneBucketAggregation { + + protected final ProjectionConverter fromFieldValueConverter; + + protected final BucketOrder order; + protected final int maxTermCount; + protected final int minDocCount; + protected final LuceneSearchAggregation aggregation; + + AbstractLuceneMultivaluedTermsAggregation(AbstractBuilder builder) { + super( builder ); + this.fromFieldValueConverter = builder.fromFieldValueConverter; + this.order = builder.order; + this.maxTermCount = builder.maxTermCount; + this.minDocCount = builder.minDocCount; + this.aggregation = builder.aggregation; + } + + protected abstract class AbstractExtractor implements Extractor> { + protected final Extractor extractor; + + protected AbstractExtractor(Extractor extractor) { + this.extractor = extractor; + } + + @Override + public final Map extract(AggregationExtractContext context) throws IOException { + List> buckets = getTopBuckets( context ); + + if ( minDocCount == 0 && buckets.size() < maxTermCount ) { + Set firstTerms = collectFirstTerms( context.getIndexReader(), order.isTermOrderDescending(), maxTermCount ); + for ( Bucket bucket : buckets ) { + firstTerms.remove( bucket.term() ); + } + R zeroValue = createZeroValue( context ); + firstTerms.forEach( term -> buckets.add( new Bucket<>( term, 0, zeroValue ) ) ); + buckets.sort( order.toBucketComparator( getAscendingTermComparator() ) ); + } + + return toMap( context.fromDocumentValueConvertContext(), buckets ); + } + + protected abstract TermResults termResults(AggregationExtractContext context) throws IOException; + + protected R createZeroValue(AggregationExtractContext context) throws IOException { + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + var termResults = termResults( context ); + CollectorManager[] managers = termResults.collectorManagers(); + CollectorKey[] keys = termResults.collectorKeys(); + HashMap, Object> results = new HashMap<>(); + for ( int i = 0; i < keys.length; i++ ) { + results.put( keys[i], managers[i].reduce( List.of( managers[i].newCollector() ) ) ); + } + localContext.setResults( results ); + return extractor.extract( localContext ); + } + + abstract Set collectFirstTerms(IndexReader reader, boolean descending, int limit) + throws IOException; + + abstract Comparator getAscendingTermComparator(); + + abstract V termToFieldValue(T key); + + abstract List> getTopBuckets(AggregationExtractContext context) throws IOException; + + private Map toMap(FromDocumentValueConvertContext convertContext, List> buckets) { + Map result = new LinkedHashMap<>(); // LinkedHashMap to preserve ordering + for ( Bucket bucket : buckets ) { + V decoded = termToFieldValue( bucket.term() ); + K key = fromFieldValueConverter.fromDocumentValue( decoded, convertContext ); + result.put( key, bucket.value() ); + } + return result; + } + + protected Map, Object> prepareResults(LongBucket bucket, TermResults termResults) + throws IOException { + Map, Object> result = new HashMap<>(); + List[] collectors = bucket.collectors; + CollectorKey[] collectorKeys = termResults.collectorKeys(); + CollectorManager[] managers = termResults.collectorManagers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i] ) ); + } + return result; + } + } + + abstract static class AbstractTypeSelector implements TermsAggregationBuilder.TypeSelector { + protected final LuceneSearchIndexScope scope; + protected final LuceneSearchIndexValueFieldContext field; + + protected AbstractTypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + this.scope = scope; + this.field = field; + } + + @Override + public abstract AbstractBuilder type(Class expectedType, ValueModel valueModel); + } + + abstract static class AbstractBuilder + extends AbstractLuceneBucketAggregation.AbstractBuilder + implements TermsAggregationBuilder { + + protected final LuceneSearchAggregation aggregation; + protected final ProjectionConverter fromFieldValueConverter; + protected BucketOrder order; + protected int minDocCount; + protected int maxTermCount; + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + this( scope, field, aggregation, fromFieldValueConverter, BucketOrder.COUNT_DESC, 1, 100 ); + } + + AbstractBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field ); + this.aggregation = aggregation; + this.fromFieldValueConverter = fromFieldValueConverter; + this.order = order; + this.minDocCount = minDocCount; + this.maxTermCount = maxTermCount; + } + + @Override + public void orderByCountDescending() { + order( BucketOrder.COUNT_DESC ); + } + + @Override + public void orderByCountAscending() { + order( BucketOrder.COUNT_ASC ); + } + + @Override + public void orderByTermAscending() { + order( BucketOrder.TERM_ASC ); + } + + @Override + public void orderByTermDescending() { + order( BucketOrder.TERM_DESC ); + } + + @Override + public void minDocumentCount(int minDocumentCount) { + this.minDocCount = minDocumentCount; + } + + @Override + public void maxTermCount(int maxTermCount) { + this.maxTermCount = maxTermCount; + } + + @Override + public abstract AbstractLuceneMultivaluedTermsAggregation build(); + + protected final void order(BucketOrder order) { + this.order = order; + } + } + +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java index 6c1c27c662a..7f01fc781ba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/Bucket.java @@ -4,12 +4,5 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -final class Bucket { - final F term; - final long count; - - Bucket(F term, long count) { - this.term = term; - this.count = count; - } +public record Bucket(F term, long count, R value) { } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java index 4767879901c..2f9ab4292ad 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/BucketOrder.java @@ -6,37 +6,59 @@ import java.util.Comparator; -enum BucketOrder { +public enum BucketOrder { COUNT_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( left.count, right.count ); + int order = Long.compare( left.count(), right.count() ); if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); return order; }; } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::count ).thenComparingLong( LongBucket::termOrd ); + } }, COUNT_DESC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { + Comparator> toBucketComparatorInternal(Comparator termComparator) { return (left, right) -> { - int order = Long.compare( right.count, left.count ); // reversed, because desc + int order = Long.compare( right.count(), left.count() ); // reversed, because desc if ( order != 0 ) { return order; } - order = termComparator.compare( left.term, right.term ); + order = termComparator.compare( left.term(), right.term() ); + return order; + }; + } + + @Override + Comparator toLongBucketComparatorInternal() { + return (left, right) -> { + int order = Long.compare( right.count(), left.count() ); // reversed, because desc + if ( order != 0 ) { + return order; + } + order = Long.compare( left.termOrd(), right.termOrd() ); return order; }; } }, TERM_ASC { @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + Comparator> toBucketComparatorInternal(Comparator termComparator) { + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::termOrd ); } }, TERM_DESC { @@ -46,17 +68,28 @@ boolean isTermOrderDescending() { } @Override - Comparator> toBucketComparatorInternal(Comparator termComparator) { - return (left, right) -> termComparator.compare( left.term, right.term ); + Comparator> toBucketComparatorInternal(Comparator termComparator) { + return (left, right) -> termComparator.compare( left.term(), right.term() ); + } + + @Override + Comparator toLongBucketComparatorInternal() { + return Comparator.comparingLong( LongBucket::termOrd ).reversed(); } }; - Comparator> toBucketComparator(Comparator termAscendingComparator) { + public Comparator> toBucketComparator(Comparator termAscendingComparator) { return toBucketComparatorInternal( isTermOrderDescending() ? termAscendingComparator.reversed() : termAscendingComparator ); } - abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + public Comparator toLongBucketComparator() { + return toLongBucketComparatorInternal(); + } + + abstract Comparator> toBucketComparatorInternal(Comparator termComparator); + + abstract Comparator toLongBucketComparatorInternal(); boolean isTermOrderDescending() { return false; diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java new file mode 100644 index 00000000000..2882036b99c --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationExtractContext.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Map; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.backend.types.converter.runtime.FromDocumentValueConvertContext; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Query; + +class LocalAggregationExtractContext implements AggregationExtractContext { + + private final AggregationExtractContext delegate; + + private Map, Object> results; + + LocalAggregationExtractContext(AggregationExtractContext delegate) { + this.delegate = delegate; + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public IndexReader getIndexReader() { + return delegate.getIndexReader(); + } + + @Override + public FromDocumentValueConvertContext fromDocumentValueConvertContext() { + return delegate.fromDocumentValueConvertContext(); + } + + @SuppressWarnings("unchecked") + @Override + public T getCollectorResults(CollectorKey key) { + return (T) results.get( key ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public void setResults(Map, Object> results) { + this.results = results; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java new file mode 100644 index 00000000000..97dc028f22a --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LocalAggregationRequestContext.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; + +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.predicate.impl.PredicateRequestContext; +import org.hibernate.search.engine.search.common.NamedValues; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.Query; + +class LocalAggregationRequestContext implements AggregationRequestContext { + + private final AggregationRequestContext delegate; + private final Set> localCollectorFactories = new LinkedHashSet<>(); + + LocalAggregationRequestContext(AggregationRequestContext delegate) { + this.delegate = delegate; + } + + @Override + public > void requireCollector( + CollectorFactory collectorFactory + ) { + localCollectorFactories.add( collectorFactory ); + } + + @Override + public NamedValues queryParameters() { + return delegate.queryParameters(); + } + + @Override + public PredicateRequestContext toPredicateRequestContext(String absolutePath) { + return delegate.toPredicateRequestContext( absolutePath ); + } + + @Override + public NestedDocsProvider createNestedDocsProvider(String nestedDocumentPath, Query nestedFilter) { + return delegate.createNestedDocsProvider( nestedDocumentPath, nestedFilter ); + } + + public List> localCollectorFactories() { + return new ArrayList<>( localCollectorFactories ); + } + + public AggregationRequestContext actualContext() { + return delegate; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java new file mode 100644 index 00000000000..aad664b24b4 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LongBucket.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import org.apache.lucene.search.Collector; + +public class LongBucket { + public final long termOrd; + public final List[] collectors; + public long count; + + @SuppressWarnings("unchecked") + public LongBucket(long termOrd, Collector[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = new List[collectors.length]; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i] = new ArrayList<>(); + this.collectors[i].add( collectors[i] ); + } + this.count = count; + } + + public LongBucket(long termOrd, List[] collectors, long count) { + this.termOrd = termOrd; + this.collectors = collectors; + this.count = count; + } + + public void add(Collector[] collectors, long count) { + this.count += count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].add( collectors[i] ); + } + } + + public void add(LongBucket bucket) { + this.count += bucket.count; + for ( int i = 0; i < collectors.length; i++ ) { + this.collectors[i].addAll( bucket.collectors[i] ); + } + } + + public long count() { + return count; + } + + public long termOrd() { + return termOrd; + } + + @Override + public String toString() { + return "LongBucket{" + + "termOrd=" + termOrd + + ", count=" + count + + ", collectors=" + Arrays.toString( collectors ) + + '}'; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java index c392eee44b8..964419b735e 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgCompensatedSumAggregation.java @@ -5,7 +5,7 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CompensatedSumCollectorFactory; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; @@ -34,15 +34,15 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte compensatedSumCollectorKey = sumCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); - collectorKey = countCollectorFactory.getCollectorKey(); - context.requireCollector( countCollectorFactory ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); + collectorKey = countValuesCollectorFactory.getCollectorKey(); + context.requireCollector( countValuesCollectorFactory ); } @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); - Long counts = context.getFacets( collectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); + Long counts = context.getCollectorResults( collectorKey ); double avg = ( sum / counts ); return numericDomain.doubleToTerm( avg ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java index 4ab59041635..597f8d57f9f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneAvgNumericFieldAggregation.java @@ -5,8 +5,8 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.AggregationFunctionCollector; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.Count; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValues; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.SumCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; @@ -28,7 +28,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec } // Supplementary collector used by the avg function - protected CollectorKey, Long> countCollectorKey; + protected CollectorKey, Long> countCollectorKey; LuceneAvgNumericFieldAggregation(Builder builder) { super( builder ); @@ -37,11 +37,11 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { SumCollectorFactory sumCollectorFactory = new SumCollectorFactory( source ); - CountCollectorFactory countCollectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory countValuesCollectorFactory = new CountValuesCollectorFactory( source ); collectorKey = sumCollectorFactory.getCollectorKey(); - countCollectorKey = countCollectorFactory.getCollectorKey(); + countCollectorKey = countValuesCollectorFactory.getCollectorKey(); context.requireCollector( sumCollectorFactory ); - context.requireCollector( countCollectorFactory ); + context.requireCollector( countValuesCollectorFactory ); } private static class LuceneNumericMetricFieldAggregationExtraction implements Extractor { @@ -61,8 +61,8 @@ private LuceneNumericMetricFieldAggregationExtraction(CollectorKey coll @Override public K extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); @@ -106,8 +106,8 @@ private LuceneNumericMetricFieldAggregationDoubleExtraction(CollectorKey c @Override public E extract(AggregationExtractContext context) { - Long collector = context.getFacets( collectorKey ); - Long counts = context.getFacets( countCollectorKey ); + Long collector = context.getCollectorResults( collectorKey ); + Long counts = context.getCollectorResults( countCollectorKey ); Double avg = ( (double) collector / counts ); collector = NumberUtils.toLong( avg ); return codec.getDomain().sortedDocValueToTerm( collector ); diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java index 7e055d18150..f83e3d09acc 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDistinctNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDistinctValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountDistinctCollectorFactory collectorFactory = new CountDistinctCollectorFactory( source ); + CountDistinctValuesCollectorFactory collectorFactory = new CountDistinctValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java new file mode 100644 index 00000000000..b35fd6dc169 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountDocumentAggregation.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.aggregation.impl; + +import java.util.Set; + +import org.hibernate.search.backend.lucene.logging.impl.QueryLog; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountDocuemntsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexCompositeNodeContext; +import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; +import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder; +import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory; + +public class LuceneCountDocumentAggregation implements LuceneSearchAggregation { + + public static Factory factory() { + return Factory.INSTANCE; + } + + private final Set indexNames; + + LuceneCountDocumentAggregation(Builder builder) { + this.indexNames = builder.scope.hibernateSearchIndexNames(); + } + + @Override + public Extractor request(AggregationRequestContext context) { + CountDocuemntsCollectorFactory collectorFactory = CountDocuemntsCollectorFactory.instance(); + var collectorKey = collectorFactory.getCollectorKey(); + + context.requireCollector( collectorFactory ); + return new CountDocumentsExtractor( collectorKey ); + } + + private record CountDocumentsExtractor(CollectorKey collectorKey) implements Extractor { + + @Override + public Long extract(AggregationExtractContext context) { + return context.getCollectorResults( collectorKey ); + } + } + + @Override + public Set indexNames() { + return indexNames; + } + + protected static class Factory + implements + SearchQueryElementFactory, + LuceneSearchIndexCompositeNodeContext> { + + private static final Factory INSTANCE = new Factory(); + + private Factory() { + } + + @Override + public CountDocumentAggregationBuilder.TypeSelector create(LuceneSearchIndexScope scope, + LuceneSearchIndexCompositeNodeContext node) { + return new TypeSelector( scope ); + } + + @Override + public void checkCompatibleWith(SearchQueryElementFactory other) { + if ( !getClass().equals( other.getClass() ) ) { + throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() ); + } + } + } + + protected record TypeSelector(LuceneSearchIndexScope scope) implements CountDocumentAggregationBuilder.TypeSelector { + @Override + public CountDocumentAggregationBuilder type() { + return new Builder( scope ); + } + } + + public static class Builder implements CountDocumentAggregationBuilder { + + protected final LuceneSearchIndexScope scope; + + public Builder(LuceneSearchIndexScope scope) { + this.scope = scope; + } + + @Override + public LuceneCountDocumentAggregation build() { + return new LuceneCountDocumentAggregation( this ); + } + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java index 7369a8fd21c..b9128f0eae4 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneCountNumericLongAggregation.java @@ -4,7 +4,7 @@ */ package org.hibernate.search.backend.lucene.types.aggregation.impl; -import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.aggregation.collector.impl.CountValuesCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; @@ -25,7 +25,7 @@ public static Factory factory(AbstractLuceneNumericFieldCodec codec @Override void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestContext context) { - CountCollectorFactory collectorFactory = new CountCollectorFactory( source ); + CountValuesCollectorFactory collectorFactory = new CountValuesCollectorFactory( source ); collectorKey = collectorFactory.getCollectorKey(); context.requireCollector( collectorFactory ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java index a60a074113a..19cd2204aab 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericRangeAggregation.java @@ -6,54 +6,77 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.function.Function; -import org.hibernate.search.backend.lucene.lowlevel.collector.impl.FacetsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.RangeResults; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; -import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.RangeAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; /** * @param The type of field values. * @param The type of encoded field values. * @param The type of keys in the returned map. It can be {@code F} + * @param The type of aggregated values. * or a different type if value converters are used. */ -public class LuceneNumericRangeAggregation - extends AbstractLuceneBucketAggregation, Long> { +public class LuceneNumericRangeAggregation + extends AbstractLuceneBucketAggregation, V> { + private final LuceneSearchAggregation aggregation; private final AbstractLuceneNumericFieldCodec codec; private final List> rangesInOrder; private final List> encodedRangesInOrder; - private LuceneNumericRangeAggregation(Builder builder) { + private CollectorKey collectorKey; + + private LuceneNumericRangeAggregation(Builder builder) { super( builder ); + this.aggregation = builder.aggregation; this.codec = builder.codec; this.rangesInOrder = builder.rangesInOrder; this.encodedRangesInOrder = builder.encodedRangesInOrder; } @Override - public Extractor, Long>> request(AggregationRequestContext context) { - context.requireCollector( FacetsCollectorFactory.INSTANCE ); + public Extractor, V>> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var rangeFactory = RangeCollectorFactory.instance( source, + codec.getDomain().createEffectiveRanges( encodedRangesInOrder ), + localAggregationContext.localCollectorFactories() ); - return new LuceneNumericRangeAggregationExtractor(); + collectorKey = rangeFactory.getCollectorKey(); + context.requireCollector( rangeFactory ); + + return new LuceneNumericRangeAggregationExtractor( extractor ); } public static class Factory @@ -71,30 +94,38 @@ public Factory(AbstractLuceneNumericFieldCodec codec) { } } - private class LuceneNumericRangeAggregationExtractor implements Extractor, Long>> { - - @Override - public Map, Long> extract(AggregationExtractContext context) throws IOException { - LuceneNumericDomain numericDomain = codec.getDomain(); - - FacetsCollector facetsCollector = context.getFacets( FacetsCollectorFactory.KEY ); + private class LuceneNumericRangeAggregationExtractor implements Extractor, V>> { + private final Extractor extractor; - NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + public LuceneNumericRangeAggregationExtractor(Extractor extractor) { + this.extractor = extractor; + } - Facets facetsCount = numericDomain.createRangeFacetCounts( - absoluteFieldPath, facetsCollector, encodedRangesInOrder, - nestedDocsProvider - ); + @Override + public Map, V> extract(AggregationExtractContext context) throws IOException { + RangeResults rangeResults = context.getCollectorResults( collectorKey ); - FacetResult facetResult = facetsCount.getTopChildren( rangesInOrder.size(), absoluteFieldPath ); + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); - Map, Long> result = new LinkedHashMap<>(); + Map, V> result = new LinkedHashMap<>(); for ( int i = 0; i < rangesInOrder.size(); i++ ) { - result.put( rangesInOrder.get( i ), (long) (Integer) facetResult.labelValues[i].value ); + localContext.setResults( prepareResults( i, rangeResults ) ); + result.put( rangesInOrder.get( i ), extractor.extract( localContext ) ); } return result; } + + private Map, Object> prepareResults(int index, RangeResults rangeResults) throws IOException { + Map, Object> result = new HashMap<>(); + List[][] collectors = rangeResults.buckets(); + CollectorKey[] collectorKeys = rangeResults.collectorKeys(); + CollectorManager[] managers = rangeResults.collectorManagers(); + for ( int i = 0; i < collectorKeys.length; i++ ) { + result.put( collectorKeys[i], managers[i].reduce( collectors[i][index] ) ); + } + return result; + } } public static class TypeSelector implements RangeAggregationBuilder.TypeSelector { @@ -110,28 +141,34 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } @Override - public Builder type(Class expectedType, ValueModel valueModel) { - return new Builder<>( codec, - field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), - scope, field ); + public Builder type(Class expectedType, ValueModel valueModel) { + return new CountBuilder<>( + codec, field.encodingContext().encoder( scope, field, codec, expectedType, valueModel ), + scope, field + ); } } - public static class Builder - extends AbstractLuceneBucketAggregation.AbstractBuilder, Long> - implements RangeAggregationBuilder { + public static class Builder + extends AbstractLuceneBucketAggregation.AbstractBuilder, V> + implements RangeAggregationBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function convertAndEncode; - private final List> rangesInOrder = new ArrayList<>(); - private final List> encodedRangesInOrder = new ArrayList<>(); + private final LuceneSearchAggregation aggregation; + private final List> rangesInOrder; + private final List> encodedRangesInOrder; - public Builder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, - LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + protected Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchAggregation aggregation, List> rangesInOrder, List> encodedRangesInOrder) { super( scope, field ); this.codec = codec; this.convertAndEncode = convertAndEncode; + this.aggregation = aggregation; + this.rangesInOrder = rangesInOrder; + this.encodedRangesInOrder = encodedRangesInOrder; } @Override @@ -141,8 +178,26 @@ public void range(Range range) { } @Override - public LuceneNumericRangeAggregation build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, codec, convertAndEncode, LuceneSearchAggregation.from( scope, aggregation ), + new ArrayList<>( rangesInOrder ), new ArrayList<>( encodedRangesInOrder ) ); + } + + @Override + public LuceneNumericRangeAggregation build() { return new LuceneNumericRangeAggregation<>( this ); } } + + public static class CountBuilder extends Builder { + + protected CountBuilder(AbstractLuceneNumericFieldCodec codec, Function convertAndEncode, + LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { + super( scope, field, codec, convertAndEncode, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + new ArrayList<>(), new ArrayList<>() ); + } + } + } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java index e32d2da2d29..b070e7b7977 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneNumericTermsAggregation.java @@ -5,26 +5,34 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; +import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.NumericTermsCollectorFactory; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneCodecAwareSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.backend.lucene.types.codec.impl.AbstractLuceneNumericFieldCodec; import org.hibernate.search.backend.lucene.types.lowlevel.impl.LuceneNumericDomain; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -36,15 +44,16 @@ * @param The type of keys in the returned map. It can be {@code F} * or a different type if value converters are used. */ -public class LuceneNumericTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { +public class LuceneNumericTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private final LuceneNumericDomain numericDomain; private final Comparator termComparator; private final Function decoder; + private CollectorKey collectorKey; - private LuceneNumericTermsAggregation(Builder builder) { + private LuceneNumericTermsAggregation(Builder builder) { super( builder ); this.numericDomain = builder.codec.getDomain(); this.termComparator = numericDomain.createComparator(); @@ -52,8 +61,20 @@ private LuceneNumericTermsAggregation(Builder builder) { } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneNumericTermsAggregationExtractor(); + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( + absoluteFieldPath, nestedDocsProvider + ); + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = + NumericTermsCollectorFactory.instance( source, localAggregationContext.localCollectorFactories() ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return new LuceneNumericTermsAggregationExtractor( extractor ); } public static class Factory @@ -73,20 +94,51 @@ public TermsAggregationBuilder.TypeSelector create(LuceneSearchIndexScope sco } private class LuceneNumericTermsAggregationExtractor extends AbstractExtractor { + + private LuceneNumericTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - Facets facetCounts = numericDomain.createTermsFacetCounts( - absoluteFieldPath, facetsCollector, nestedDocsProvider - ); - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + protected TermResults termResults(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); + } + + @Override + Comparator getAscendingTermComparator() { + return termComparator; + } + + @Override + V termToFieldValue(E key) { + return decoder.apply( key ); + } + + @Override + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termResults = context.getCollectorResults( collectorKey ); + + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List counts = termResults.counts( order, maxTermCount, minDocCount ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : counts ) { + localContext.setResults( prepareResults( bucket, termResults ) ); + buckets.add( + new Bucket<>( + numericDomain.sortedDocValueToTerm( bucket.termOrd() ), + bucket.count(), + extractor.extract( localContext ) + ) + ); + } + return buckets; } @Override - SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit) + Set collectFirstTerms(IndexReader reader, boolean descending, int limit) throws IOException { - TreeSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); + SortedSet collectedTerms = new TreeSet<>( descending ? termComparator.reversed() : termComparator ); for ( LeafReaderContext leaf : reader.leaves() ) { final LeafReader atomicReader = leaf.reader(); SortedNumericDocValues docValues = atomicReader.getSortedNumericDocValues( absoluteFieldPath ); @@ -107,23 +159,9 @@ SortedSet collectFirstTerms(IndexReader reader, boolean descending, int limit return collectedTerms; } - @Override - Comparator getAscendingTermComparator() { - return termComparator; - } - - @Override - E labelToTerm(String termAsString) { - return numericDomain.sortedDocValueToTerm( Long.parseLong( termAsString ) ); - } - - @Override - V termToFieldValue(E term) { - return decoder.apply( term ); - } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private final AbstractLuceneNumericFieldCodec codec; private TypeSelector(AbstractLuceneNumericFieldCodec codec, @@ -134,16 +172,16 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ), Function.identity() ); } else { - return new Builder<>( codec, scope, field, + return new CountBuilder<>( codec, scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ), codec::decode ); @@ -151,24 +189,52 @@ private TypeSelector(AbstractLuceneNumericFieldCodec codec, } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { + + private CountBuilder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + ProjectionConverter fromFieldValueConverter, + Function decoder) { + super( codec, scope, field, LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), fromFieldValueConverter, + decoder ); + } + } + + private static class Builder + extends AbstractBuilder { private final AbstractLuceneNumericFieldCodec codec; private final Function decoder; - public Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, - LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter, + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, LuceneSearchAggregation aggregation, + ProjectionConverter fromFieldValueConverter, Function decoder) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, aggregation, fromFieldValueConverter ); + this.codec = codec; + this.decoder = decoder; + } + + private Builder(AbstractLuceneNumericFieldCodec codec, LuceneSearchIndexScope scope, + LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + Function decoder, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); this.codec = codec; this.decoder = decoder; } @Override - public LuceneNumericTermsAggregation build() { + public LuceneNumericTermsAggregation build() { return new LuceneNumericTermsAggregation<>( this ); } - } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( codec, scope, field, LuceneSearchAggregation.from( scope, aggregation ), + fromFieldValueConverter, decoder, order, minDocCount, maxTermCount ); + } + } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java index b91a81ca70f..2eee45d884b 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneSumCompensatedSumAggregation.java @@ -36,7 +36,7 @@ void fillCollectors(JoiningLongMultiValuesSource source, AggregationRequestConte @Override E extractEncoded(AggregationExtractContext context, LuceneNumericDomain numericDomain) { - Double sum = context.getFacets( compensatedSumCollectorKey ); + Double sum = context.getCollectorResults( compensatedSumCollectorKey ); return numericDomain.doubleToTerm( sum ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java index f8428ade788..d0980d10457 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/aggregation/impl/LuceneTextTermsAggregation.java @@ -5,60 +5,78 @@ package org.hibernate.search.backend.lucene.types.aggregation.impl; import java.io.IOException; +import java.util.ArrayList; import java.util.Comparator; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.CollectorKey; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TermResults; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollector; +import org.hibernate.search.backend.lucene.lowlevel.collector.impl.TextTermsCollectorFactory; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningTextMultiValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.TextMultiValueFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; +import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationExtractContext; import org.hibernate.search.backend.lucene.search.aggregation.impl.AggregationRequestContext; +import org.hibernate.search.backend.lucene.search.aggregation.impl.LuceneSearchAggregation; import org.hibernate.search.backend.lucene.search.common.impl.AbstractLuceneValueFieldSearchQueryElementFactory; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexScope; import org.hibernate.search.backend.lucene.search.common.impl.LuceneSearchIndexValueFieldContext; import org.hibernate.search.engine.backend.types.converter.spi.ProjectionConverter; +import org.hibernate.search.engine.search.aggregation.SearchAggregation; import org.hibernate.search.engine.search.aggregation.spi.TermsAggregationBuilder; import org.hibernate.search.engine.search.common.ValueModel; -import org.apache.lucene.facet.FacetResult; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; /** * @param The type of keys in the returned map. It can be {@code String} * or a different type if value converters are used. */ -public class LuceneTextTermsAggregation - extends AbstractLuceneFacetsBasedTermsAggregation { +public class LuceneTextTermsAggregation + extends AbstractLuceneMultivaluedTermsAggregation { private static final Comparator STRING_COMPARATOR = Comparator.naturalOrder(); - private LuceneTextTermsAggregation(Builder builder) { + private CollectorKey collectorKey; + + private LuceneTextTermsAggregation(Builder builder) { super( builder ); } @Override - protected Extractor> extractor(AggregationRequestContext context) { - return new LuceneTextTermsAggregationExtractor(); + public Extractor> request(AggregationRequestContext context) { + NestedDocsProvider nestedDocsProvider = createNestedDocsProvider( context ); + JoiningTextMultiValuesSource source = JoiningTextMultiValuesSource.fromField( + absoluteFieldPath, nestedDocsProvider + ); + + LocalAggregationRequestContext localAggregationContext = new LocalAggregationRequestContext( context ); + Extractor extractor = aggregation.request( localAggregationContext ); + + var termsCollectorFactory = TextTermsCollectorFactory.instance( absoluteFieldPath, source, + localAggregationContext.localCollectorFactories() ); + context.requireCollector( termsCollectorFactory ); + collectorKey = termsCollectorFactory.getCollectorKey(); + + return new LuceneTextTermsAggregationExtractor( extractor ); } private class LuceneTextTermsAggregationExtractor extends AbstractExtractor { + + private LuceneTextTermsAggregationExtractor(Extractor extractor) { + super( extractor ); + } + @Override - FacetResult getTopChildren(IndexReader reader, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider, int limit) - throws IOException { - JoiningTextMultiValuesSource valueSource = JoiningTextMultiValuesSource.fromField( - absoluteFieldPath, nestedDocsProvider - ); - TextMultiValueFacetCounts facetCounts = new TextMultiValueFacetCounts( - reader, absoluteFieldPath, valueSource, facetsCollector - ); - - return facetCounts.getTopChildren( limit, absoluteFieldPath ); + protected TermResults termResults(AggregationExtractContext context) throws IOException { + return context.getCollectorResults( collectorKey ); } @Override @@ -95,13 +113,31 @@ Comparator getAscendingTermComparator() { } @Override - String labelToTerm(String label) { - return label; + String termToFieldValue(String key) { + return key; } @Override - String termToFieldValue(String key) { - return key; + List> getTopBuckets(AggregationExtractContext context) throws IOException { + var termResults = context.getCollectorResults( collectorKey ); + + LocalAggregationExtractContext localContext = new LocalAggregationExtractContext( context ); + + List results = termResults.counts( order, maxTermCount, minDocCount ); + + var dv = MultiDocValues.getSortedSetValues( context.getIndexReader(), absoluteFieldPath ); + List> buckets = new ArrayList<>(); + for ( LongBucket bucket : results ) { + localContext.setResults( prepareResults( bucket, termResults ) ); + buckets.add( + new Bucket<>( + dv.lookupOrd( bucket.termOrd() ).utf8ToString(), + bucket.count(), + extractor.extract( localContext ) + ) + ); + } + return buckets; } } @@ -113,40 +149,62 @@ public TypeSelector create(LuceneSearchIndexScope scope, LuceneSearchIndexVal } } - private static class TypeSelector extends AbstractTypeSelector { + private static class TypeSelector extends AbstractTypeSelector { private TypeSelector(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field) { super( scope, field ); } @SuppressWarnings("unchecked") @Override - public Builder type(Class expectedType, ValueModel valueModel) { + public Builder type(Class expectedType, ValueModel valueModel) { if ( ValueModel.RAW.equals( valueModel ) ) { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, ( (ProjectionConverter) field.type().rawProjectionConverter() ) .withConvertedType( expectedType, field ) ); } else { - return new Builder<>( scope, field, + return new CountBuilder<>( scope, field, field.type().projectionConverter( valueModel ).withConvertedType( expectedType, field ) ); } } } - private static class Builder - extends AbstractBuilder { + private static class CountBuilder extends Builder { - private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + private CountBuilder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, ProjectionConverter fromFieldValueConverter) { - super( scope, field, fromFieldValueConverter ); + super( scope, field, + LuceneSearchAggregation.from( scope, + LuceneCountDocumentAggregation.factory().create( scope, null ).type().build() ), + fromFieldValueConverter ); + } + } + + private static class Builder + extends AbstractBuilder { + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter) { + super( scope, field, aggregation, fromFieldValueConverter ); + } + + private Builder(LuceneSearchIndexScope scope, LuceneSearchIndexValueFieldContext field, + LuceneSearchAggregation aggregation, ProjectionConverter fromFieldValueConverter, + BucketOrder order, int minDocCount, int maxTermCount) { + super( scope, field, aggregation, fromFieldValueConverter, order, minDocCount, maxTermCount ); } @Override - public LuceneTextTermsAggregation build() { + public LuceneTextTermsAggregation build() { return new LuceneTextTermsAggregation<>( this ); } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new Builder<>( scope, field, LuceneSearchAggregation.from( scope, aggregation ), fromFieldValueConverter, + order, minDocCount, maxTermCount ); + } } } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java index a5e8dfa4b42..250387a6fba 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneNumericIndexFieldTypeOptionsStep.java @@ -96,8 +96,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { builder.queryElementFactory( AggregationTypeKeys.SUM, sumMetricAggregationFactory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, avgMetricAggregationFactory( codec ) ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java index c4a777fa8b9..08e8311752f 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/dsl/impl/AbstractLuceneTemporalIndexFieldTypeOptionsStep.java @@ -102,8 +102,8 @@ public LuceneIndexValueFieldType toIndexFieldType() { } builder.queryElementFactory( AggregationTypeKeys.MIN, LuceneMinNumericFieldAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.MAX, LuceneMaxNumericFieldAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT, LuceneCountNumericLongAggregation.factory( codec ) ); - builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT, + builder.queryElementFactory( AggregationTypeKeys.COUNT_VALUES, LuceneCountNumericLongAggregation.factory( codec ) ); + builder.queryElementFactory( AggregationTypeKeys.COUNT_DISTINCT_VALUES, LuceneCountDistinctNumericLongAggregation.factory( codec ) ); builder.queryElementFactory( AggregationTypeKeys.AVG, LuceneAvgNumericFieldAggregation.factory( codec ) ); } diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java new file mode 100644 index 00000000000..f04c116c3d5 --- /dev/null +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/EffectiveRange.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * Copyright Red Hat Inc. and Hibernate Authors + */ +package org.hibernate.search.backend.lucene.types.lowlevel.impl; + +import java.util.Collection; +import java.util.function.ToLongFunction; + +import org.hibernate.search.util.common.data.Range; +import org.hibernate.search.util.common.data.RangeBoundInclusion; + +public record EffectiveRange(long min, long max) { + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges) { + return createEffectiveRangesForIntegralValues( ranges, Number::longValue, Long.MIN_VALUE, Long.MAX_VALUE, false ); + } + + public static EffectiveRange[] createEffectiveRangesForIntegralValues( + Collection> ranges, + ToLongFunction encoder, T negativeInfinity, T positiveInfinity) { + return createEffectiveRangesForIntegralValues( ranges, encoder, negativeInfinity, positiveInfinity, true ); + } + + private static EffectiveRange[] createEffectiveRangesForIntegralValues(Collection> ranges, + ToLongFunction encoder, + T lowestPossibleValue, T highestPossibleValue, boolean extremaAreInfinity) { + EffectiveRange[] effectiveRanges = new EffectiveRange[ranges.size()]; + int i = 0; + for ( Range range : ranges ) { + final T lowerBoundValue = range.lowerBoundValue().orElse( null ); + final T upperBoundValue = range.upperBoundValue().orElse( null ); + + + long min = encoder.applyAsLong( lowerBoundValue == null ? lowestPossibleValue : lowerBoundValue ); + long max = encoder.applyAsLong( upperBoundValue == null ? highestPossibleValue : upperBoundValue ); + + // The lower bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by ++ it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.lowerBoundInclusion() ) + && ( extremaAreInfinity || lowerBoundValue != null ) ) { + ++min; + } + + // The upper bound is included if it is explicitly included + // ... or if it is infinity but infinity cannot be represented + // so if it's none of the above we exclude the boundary by -- it. + if ( + RangeBoundInclusion.EXCLUDED.equals( range.upperBoundInclusion() ) + && ( extremaAreInfinity || upperBoundValue != null ) ) { + --max; + } + + effectiveRanges[i] = new EffectiveRange( + min, + max + ); + ++i; + } + return effectiveRanges; + } +} diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java index d4ed1241e9a..12f319907b2 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneDoubleDomain.java @@ -4,24 +4,17 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.DoubleValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.DoublePoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,37 +86,9 @@ public Double doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, long values, whose order is the same as their corresponding double value. - // Values are ultimately converted back to the Double equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, NumericUtils::doubleToSortableLong, - Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::doubleToSortableLong, + Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java index 48a7218ac16..333f1799939 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneFloatDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.FloatValuesSourceComparator; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.DoubleMultiValuesToSingleValuesSource; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -94,37 +87,9 @@ public Float doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( absoluteFieldPath, source, facetsCollector ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - // As we don't need to apply any operation to terms except sometimes a sort, - // we can simply rely on raw, int values, whose order is the same as their corresponding float value. - // Values are ultimately converted back to the Float equivalent by calling sortedDocValueToTerm. - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForFloatingPointValues( - ranges, value -> (long) NumericUtils.floatToSortableInt( value ), - Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY - ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges, NumericUtils::floatToSortableInt, + Float.NEGATIVE_INFINITY, Float.POSITIVE_INFINITY ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java index db1a866a90b..e57cb0f0ae0 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneIntegerDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.IntValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,31 +86,8 @@ public double sortedDocValueToDouble(long longValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromIntField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java index b8df3919af0..6aab8e3a5d5 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneLongDomain.java @@ -4,25 +4,18 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; import org.hibernate.search.backend.lucene.lowlevel.comparator.impl.LongValuesSourceComparator; -import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.JoiningLongMultiValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.LongMultiValuesToSingleValuesSource; import org.hibernate.search.backend.lucene.lowlevel.docvalues.impl.MultiValueMode; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.FacetCountsUtils; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueFacetCounts; -import org.hibernate.search.backend.lucene.lowlevel.facet.impl.LongMultiValueRangeFacetCounts; import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.engine.cfg.spi.NumberUtils; import org.hibernate.search.util.common.data.Range; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.SortedNumericDocValuesField; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -93,31 +86,8 @@ public Long doubleToTerm(double doubleValue) { } @Override - public Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueFacetCounts( - absoluteFieldPath, source, - facetsCollector - ); - } - - @Override - public Facets createRangeFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException { - JoiningLongMultiValuesSource source = JoiningLongMultiValuesSource.fromLongField( - absoluteFieldPath, nestedDocsProvider - ); - return new LongMultiValueRangeFacetCounts( - absoluteFieldPath, source, - facetsCollector, - FacetCountsUtils.createLongRangesForIntegralValues( ranges ) - ); + public EffectiveRange[] createEffectiveRanges(Collection> ranges) { + return EffectiveRange.createEffectiveRangesForIntegralValues( ranges ); } @Override diff --git a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java index 2b82c7a9807..a25ade3387a 100644 --- a/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java +++ b/lucene-next/backend/lucene/src/main/java/org/hibernate/search/backend/lucene/types/lowlevel/impl/LuceneNumericDomain.java @@ -4,7 +4,6 @@ */ package org.hibernate.search.backend.lucene.types.lowlevel.impl; -import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -12,8 +11,6 @@ import org.hibernate.search.backend.lucene.lowlevel.join.impl.NestedDocsProvider; import org.hibernate.search.util.common.data.Range; -import org.apache.lucene.facet.Facets; -import org.apache.lucene.facet.FacetsCollector; import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Pruning; @@ -43,14 +40,7 @@ public interface LuceneNumericDomain { E doubleToTerm(double doubleValue); - Facets createTermsFacetCounts(String absoluteFieldPath, FacetsCollector facetsCollector, - NestedDocsProvider nestedDocsProvider) - throws IOException; - - Facets createRangeFacetCounts(String absoluteFieldPath, - FacetsCollector facetsCollector, Collection> ranges, - NestedDocsProvider nestedDocsProvider) - throws IOException; + EffectiveRange[] createEffectiveRanges(Collection> ranges); IndexableField createIndexField(String absoluteFieldPath, E numericValue); diff --git a/lucene-next/build/parents/integrationtest/pom.xml b/lucene-next/build/parents/integrationtest/pom.xml index 893e99dbf36..bab17b24ea3 100644 --- a/lucene-next/build/parents/integrationtest/pom.xml +++ b/lucene-next/build/parents/integrationtest/pom.xml @@ -61,11 +61,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/internal/pom.xml b/lucene-next/build/parents/internal/pom.xml index 21b7292c444..24564511e08 100644 --- a/lucene-next/build/parents/internal/pom.xml +++ b/lucene-next/build/parents/internal/pom.xml @@ -49,11 +49,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/public/pom.xml b/lucene-next/build/parents/public/pom.xml index fee8d321c27..4783c83132c 100644 --- a/lucene-next/build/parents/public/pom.xml +++ b/lucene-next/build/parents/public/pom.xml @@ -49,11 +49,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/lucene-next/build/parents/springtest/pom.xml b/lucene-next/build/parents/springtest/pom.xml index 15df0a75b5a..cd1be1c3e31 100644 --- a/lucene-next/build/parents/springtest/pom.xml +++ b/lucene-next/build/parents/springtest/pom.xml @@ -60,11 +60,6 @@ lucene-join ${version.org.apache.lucene.next.updatable} - - org.apache.lucene - lucene-facet - ${version.org.apache.lucene.next.updatable} - org.apache.lucene lucene-highlighter diff --git a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java index 8126cbfe5c7..474e09af463 100644 --- a/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java +++ b/metamodel/metamodel-processor/src/main/java/org/hibernate/search/processor/writer/impl/TraitReferenceMapping.java @@ -10,8 +10,8 @@ import org.hibernate.search.engine.backend.types.IndexFieldTraits; import org.hibernate.search.engine.search.reference.aggregation.AvgAggregationFieldReference; -import org.hibernate.search.engine.search.reference.aggregation.CountAggregationFieldReference; -import org.hibernate.search.engine.search.reference.aggregation.CountDistinctAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountDistinctValuesAggregationFieldReference; +import org.hibernate.search.engine.search.reference.aggregation.CountValuesAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MaxAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.MinAggregationFieldReference; import org.hibernate.search.engine.search.reference.aggregation.RangeAggregationFieldReference; @@ -116,10 +116,10 @@ private TraitReferenceMapping() { traits.put( IndexFieldTraits.Aggregations.MAX, new TraitReferenceDetails( MaxAggregationFieldReference.class, "A4", TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE ) ); - traits.put( IndexFieldTraits.Aggregations.COUNT, - new TraitReferenceDetails( CountAggregationFieldReference.class, "A5", TraitKind.UNTYPED ) ); - traits.put( IndexFieldTraits.Aggregations.COUNT_DISTINCT, - new TraitReferenceDetails( CountDistinctAggregationFieldReference.class, "A6", TraitKind.UNTYPED ) ); + traits.put( IndexFieldTraits.Aggregations.COUNT_VALUES, + new TraitReferenceDetails( CountValuesAggregationFieldReference.class, "A5", TraitKind.UNTYPED ) ); + traits.put( IndexFieldTraits.Aggregations.COUNT_DISTINCT_VALUES, + new TraitReferenceDetails( CountDistinctValuesAggregationFieldReference.class, "A6", TraitKind.UNTYPED ) ); traits.put( IndexFieldTraits.Aggregations.AVG, new TraitReferenceDetails( AvgAggregationFieldReference.class, "A7", TraitKind.TYPED_OUTPUT, EXTRA_PROPERTY_AGGREGATION_TYPE ) ); diff --git a/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java b/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java index eefbcbc9234..5c1523049c7 100644 --- a/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java +++ b/metamodel/metamodel-processor/src/test/java/org/hibernate/search/processor/writer/impl/TraitReferenceMappingTest.java @@ -45,6 +45,10 @@ private static Stream traitNames() { traitNames.addAll( traitNames( IndexFieldTraits.Projections.class ) ); traitNames.addAll( traitNames( IndexFieldTraits.Sorts.class ) ); traitNames.addAll( traitNames( IndexFieldTraits.Aggregations.class ) ); + + // count documents is an aggregation that does not require a field and as a result does not require the field reference: + traitNames.remove( IndexFieldTraits.Aggregations.COUNT_DOCUMENTS ); + return traitNames.stream().map( Arguments::of ); } diff --git a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java index 61df7b21096..9666fdd1453 100644 --- a/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java +++ b/util/internal/integrationtest/common/src/main/java/org/hibernate/search/util/impl/integrationtest/common/stub/backend/search/aggregation/impl/StubSearchAggregation.java @@ -47,19 +47,19 @@ public RangeAggregationBuilder.TypeSelector create(StubSearchIndexScope scope public static class TermsTypeSelector implements TermsAggregationBuilder.TypeSelector { @Override - public TermsBuilder type(Class expectedType, ValueModel valueModel) { + public TermsBuilder type(Class expectedType, ValueModel valueModel) { return new TermsBuilder<>(); } } public static class RangeTypeSelector implements RangeAggregationBuilder.TypeSelector { @Override - public RangeBuilder type(Class expectedType, ValueModel valueModel) { + public RangeBuilder type(Class expectedType, ValueModel valueModel) { return new RangeBuilder<>(); } } - static class TermsBuilder implements TermsAggregationBuilder { + static class TermsBuilder implements TermsAggregationBuilder { @Override public void orderByCountDescending() { @@ -91,18 +91,23 @@ public void maxTermCount(int maxTermCount) { // No-op } + @Override + public TermsAggregationBuilder withValue(SearchAggregation aggregation) { + return new TermsBuilder<>(); + } + @Override public void filter(SearchPredicate filter) { // No-op } @Override - public SearchAggregation> build() { + public SearchAggregation> build() { return new StubSearchAggregation<>(); } } - static class RangeBuilder implements RangeAggregationBuilder { + static class RangeBuilder implements RangeAggregationBuilder { @Override public void range(Range range) { @@ -115,7 +120,12 @@ public void filter(SearchPredicate filter) { } @Override - public SearchAggregation, Long>> build() { + public RangeAggregationBuilder withValue(SearchAggregation aggregation) { + return new RangeBuilder<>(); + } + + @Override + public SearchAggregation, A>> build() { return new StubSearchAggregation<>(); } } diff --git a/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java b/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java index c1594b1d59c..61e22cb2d86 100644 --- a/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java +++ b/v5migrationhelper/engine/src/main/java/org/hibernate/search/query/dsl/impl/RangeFacetRequest.java @@ -33,9 +33,9 @@ public class RangeFacetRequest extends FacetingRequestImpl, Long @Override public AggregationFinalStep, Long>> requestAggregation(TypedSearchAggregationFactory factory) { - RangeAggregationRangeStep rangeStep = factory + RangeAggregationRangeStep rangeStep = factory .range().field( getFieldName(), getFacetValueType() ); - RangeAggregationRangeMoreStep rangeMoreStep = null; + RangeAggregationRangeMoreStep rangeMoreStep = null; for ( FacetRange facetRange : facetRangeList ) { rangeMoreStep = rangeStep.range( facetRange.range() ); rangeStep = rangeMoreStep;