Skip to content

HSEARCH-3661 Setting the value of bucket aggregations to something more than just the document count #4697

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
30969c2
HSEARCH-3661 Rename collector methods
marko-bekhta Jun 25, 2025
ee81874
HSEARCH-3661 Do not rely on facets collector for range aggregations
marko-bekhta Jun 25, 2025
2fb2212
HSEARCH-3661 Do not rely on facets collector for terms aggregations
marko-bekhta Jun 26, 2025
6703b18
HSEARCH-3661 Remove more code dependant on Lucene facets
marko-bekhta Jun 27, 2025
e411cea
HSEARCH-3661 Do not do rollup for range aggregation
marko-bekhta Jun 27, 2025
75be8ac
HSEARCH-3661 WIP making aggregation value work
marko-bekhta Jun 30, 2025
ed874dc
HSEARCH-3661 Make it more clear that count aggregations are targeting…
marko-bekhta Jul 1, 2025
cd4f113
HSEARCH-3661 Introduce count documents aggregation
marko-bekhta Jul 1, 2025
0d71e9f
HSEARCH-3661 Add count documents aggregation
marko-bekhta Jul 2, 2025
44c7188
HSEARCH-3661 Test countValues aggregations on multivalued fields
marko-bekhta Jul 2, 2025
0b70347
HSEARCH-3661 Update the Elasticsearch backend with "new aggregations"
marko-bekhta Jul 3, 2025
876fbf3
HSEARCH-3661 Make necessary adjustments to the terms aggregations to …
marko-bekhta Jul 3, 2025
916fd41
HSEARCH-3661 Make Elasticsearch's terms aggregation accept "value"
marko-bekhta Jul 4, 2025
9291b9b
HSEARCH-3661 Do not collect counts in Lucene's range collectors
marko-bekhta Jul 4, 2025
3a0dacd
HSEARCH-3661 Use SimpleCollectors to not recreate leaf collectors
marko-bekhta Jul 4, 2025
87c5f21
HSEARCH-3661 Adjust how extractors are built for bucket Elasticsearch…
marko-bekhta Jul 4, 2025
9eed71f
HSEARCH-3661 Use reducable results in terms collectors
marko-bekhta Jul 7, 2025
36f4ea8
HSEARCH-3661 Use reducable results in range collectors
marko-bekhta Jul 7, 2025
170ac56
HSEARCH-3661 Add more tests around aggregations to the backend TCK
marko-bekhta Jul 8, 2025
3b120f9
HSEARCH-3661 Document range/terms .values(..)
marko-bekhta Jul 8, 2025
572e27d
HSEARCH-3666 "Enable" "non-default" sorts in Lucene's terms aggregati…
marko-bekhta Jul 8, 2025
bdad81d
HSEARCH-3661 Fix the interface names
marko-bekhta Jul 10, 2025
73371d9
HSEARCH-3661 Make eclipse compiler happier with generics in more comp…
marko-bekhta Jul 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.hibernate.search.backend.elasticsearch.lowlevel.index.mapping.impl.RoutingType;
import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.IndexSettings;
import org.hibernate.search.backend.elasticsearch.lowlevel.index.settings.impl.PropertyMappingIndexSettingsContributor;
import org.hibernate.search.backend.elasticsearch.search.aggregation.impl.ElasticsearchCountDocumentAggregation;
import org.hibernate.search.backend.elasticsearch.types.dsl.ElasticsearchIndexFieldTypeFactory;
import org.hibernate.search.backend.elasticsearch.types.dsl.provider.impl.ElasticsearchIndexFieldTypeFactoryProvider;
import org.hibernate.search.backend.elasticsearch.types.impl.ElasticsearchIndexCompositeNodeType;
Expand All @@ -45,6 +46,7 @@
import org.hibernate.search.engine.common.tree.spi.TreeNodeInclusion;
import org.hibernate.search.engine.mapper.mapping.building.spi.IndexFieldTypeDefaultsProvider;
import org.hibernate.search.engine.reporting.spi.EventContexts;
import org.hibernate.search.engine.search.aggregation.spi.AggregationTypeKeys;
import org.hibernate.search.util.common.reporting.EventContext;

public class ElasticsearchIndexRootBuilder extends AbstractElasticsearchIndexCompositeNodeBuilder
Expand Down Expand Up @@ -84,6 +86,8 @@ public ElasticsearchIndexRootBuilder(ElasticsearchIndexFieldTypeFactoryProvider
this.customIndexMapping = customIndexMapping;
this.defaultDynamicType = DynamicType.create( dynamicMapping );

this.typeBuilder.queryElementFactory( AggregationTypeKeys.COUNT_DOCUMENTS,
ElasticsearchCountDocumentAggregation.factory( false ) );
this.addDefaultImplicitFields();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import java.util.Map;

import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor;
import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog;
import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope;
import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexValueFieldContext;
import org.hibernate.search.backend.elasticsearch.search.predicate.impl.ElasticsearchSearchPredicate;
Expand All @@ -29,21 +28,20 @@ public abstract class AbstractElasticsearchBucketAggregation<K, V>
private static final String ROOT_DOC_COUNT_NAME = "root_doc_count";
private static final JsonAccessor<JsonObject> REQUEST_AGGREGATIONS_ROOT_DOC_COUNT_ACCESSOR =
JsonAccessor.root().property( "aggregations" ).property( ROOT_DOC_COUNT_NAME ).asObject();
private static final JsonAccessor<Long> RESPONSE_DOC_COUNT_ACCESSOR =
JsonAccessor.root().property( "doc_count" ).asLong();
private static final JsonAccessor<Long> RESPONSE_ROOT_DOC_COUNT_ACCESSOR =
JsonAccessor.root().property( ROOT_DOC_COUNT_NAME ).property( "doc_count" ).asLong();

protected static final String INNER_EXTRACTOR_KEY = "innerExtractorKey";
protected static final String INNER_EXTRACTOR = "innerExtractor";

AbstractElasticsearchBucketAggregation(AbstractBuilder<K, V> builder) {
super( builder );
}

@Override
protected final JsonObject doRequest(AggregationRequestContext context) {
protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) {
JsonObject outerObject = new JsonObject();
JsonObject innerObject = new JsonObject();

doRequest( outerObject, innerObject );
doRequest( outerObject, innerObject, context );

if ( isNested() ) {
JsonObject rootDocCountSubAggregationOuterObject = new JsonObject();
Expand All @@ -56,20 +54,8 @@ protected final JsonObject doRequest(AggregationRequestContext context) {
return outerObject;
}

protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject);

protected final long getBucketDocCount(JsonObject bucket) {
if ( isNested() ) {
// We must return the number of root documents,
// not the number of leaf documents that Elasticsearch returns by default.
return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( bucket )
.orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData );
}
else {
return RESPONSE_DOC_COUNT_ACCESSOR.get( bucket )
.orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData );
}
}
protected abstract void doRequest(JsonObject outerObject, JsonObject innerObject,
AggregationRequestBuildingContextContext context);

protected abstract class AbstractBucketExtractor<A, B> extends AbstractExtractor<Map<A, B>> {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ public abstract class AbstractElasticsearchNestableAggregation<A> extends Abstra

@Override
public final Extractor<A> request(AggregationRequestContext context, AggregationKey<?> key, JsonObject jsonAggregations) {
jsonAggregations.add( key.name(), request( context ) );
return extractor( context );
AggregationRequestBuildingContextContext buildingContext = new AggregationRequestBuildingContextContext( context );
jsonAggregations.add( key.name(), request( buildingContext ) );
return extractor( buildingContext );
}

private JsonObject request(AggregationRequestContext context) {
private JsonObject request(AggregationRequestBuildingContextContext context) {
JsonObject result = doRequest( context );

if ( nestedPathHierarchy.isEmpty() ) {
Expand Down Expand Up @@ -90,9 +91,9 @@ private JsonObject request(AggregationRequestContext context) {
return result;
}

protected abstract JsonObject doRequest(AggregationRequestContext context);
protected abstract JsonObject doRequest(AggregationRequestBuildingContextContext context);

protected abstract Extractor<A> extractor(AggregationRequestContext context);
protected abstract Extractor<A> extractor(AggregationRequestBuildingContextContext context);

protected abstract static class AbstractExtractor<T> implements Extractor<T> {

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* SPDX-License-Identifier: Apache-2.0
* Copyright Red Hat Inc. and Hibernate Authors
*/
package org.hibernate.search.backend.elasticsearch.search.aggregation.impl;

import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

import org.hibernate.search.backend.elasticsearch.search.predicate.impl.PredicateRequestContext;
import org.hibernate.search.util.common.annotation.Incubating;

/**
* Sometimes we need to pass something we created while building up the json in one of the "doRequest" methods
* in the aggregation build up to the "later" steps e.g. to when we create the extractor.
*/
@Incubating
public final class AggregationRequestBuildingContextContext implements AggregationRequestContext {
private final AggregationRequestContext aggregationRequestContext;
private final Map<Key<?>, Object> buildingContext = new HashMap<>();

public AggregationRequestBuildingContextContext(AggregationRequestContext aggregationRequestContext) {
this.aggregationRequestContext = aggregationRequestContext;
}

public <T> T get(Key<T> key) {
Object value = buildingContext.get( key );
return key.cast( value );
}

public void add(Key<?> key, Object value) {
buildingContext.put( key, value );
}

public AggregationRequestContext rootAggregationRequestContext() {
return aggregationRequestContext;
}

@Override
public PredicateRequestContext getRootPredicateContext() {
return aggregationRequestContext.getRootPredicateContext();
}

public static <V> Key<V> buildingContextKey(String name) {
return new Key<>( name );
}

public static class Key<V> {

private final String name;

private Key(String name) {
this.name = name;
}

@SuppressWarnings("unchecked")
private V cast(Object value) {
return (V) value;
}

@Override
public boolean equals(Object o) {
if ( o == null || getClass() != o.getClass() ) {
return false;
}
Key<?> key = (Key<?>) o;
return Objects.equals( name, key.name );
}

@Override
public int hashCode() {
return Objects.hashCode( name );
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* SPDX-License-Identifier: Apache-2.0
* Copyright Red Hat Inc. and Hibernate Authors
*/
package org.hibernate.search.backend.elasticsearch.search.aggregation.impl;

import org.hibernate.search.backend.elasticsearch.gson.impl.JsonAccessor;
import org.hibernate.search.backend.elasticsearch.logging.impl.ElasticsearchClientLog;
import org.hibernate.search.backend.elasticsearch.logging.impl.QueryLog;
import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexCompositeNodeContext;
import org.hibernate.search.backend.elasticsearch.search.common.impl.ElasticsearchSearchIndexScope;
import org.hibernate.search.backend.elasticsearch.search.query.impl.ElasticsearchSearchQueryExtractContext;
import org.hibernate.search.engine.search.aggregation.AggregationKey;
import org.hibernate.search.engine.search.aggregation.spi.CountDocumentAggregationBuilder;
import org.hibernate.search.engine.search.common.spi.SearchQueryElementFactory;

import com.google.gson.JsonObject;

public class ElasticsearchCountDocumentAggregation extends AbstractElasticsearchAggregation<Long> {

private static final JsonAccessor<Long> TOTAL_HITS_VALUE_PROPERTY_ACCESSOR =
JsonAccessor.root().property( "hits" )
.property( "total" )
.property( "value" ).asLong();

private static final JsonAccessor<Long> RESPONSE_DOC_COUNT_ACCESSOR =
JsonAccessor.root().property( "doc_count" ).asLong();
private static final JsonAccessor<Long> RESPONSE_ROOT_DOC_COUNT_ACCESSOR =
JsonAccessor.root().property( "root_doc_count" ).property( "doc_count" ).asLong();

public static SearchQueryElementFactory<CountDocumentAggregationBuilder.TypeSelector,
ElasticsearchSearchIndexScope<?>,
ElasticsearchSearchIndexCompositeNodeContext> factory(boolean isNested) {
return new ElasticsearchCountDocumentAggregation.Factory( isNested );
}

private final boolean isNested;

private ElasticsearchCountDocumentAggregation(Builder builder) {
super( builder );
this.isNested = builder.isNested;
}

@Override
public Extractor<Long> request(AggregationRequestContext context, AggregationKey<?> key, JsonObject jsonAggregations) {
return new CountDocumentsExtractor( isNested );
}

private record CountDocumentsExtractor(boolean isNested) implements Extractor<Long> {

@Override
public Long extract(JsonObject aggregationResult, AggregationExtractContext context) {
if ( aggregationResult != null ) {
if ( isNested ) {
// We must return the number of root documents,
// not the number of leaf documents that Elasticsearch returns by default.
return RESPONSE_ROOT_DOC_COUNT_ACCESSOR.get( aggregationResult )
.orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData );
}
else {
return RESPONSE_DOC_COUNT_ACCESSOR.get( aggregationResult )
.orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData );
}
}
else if ( context instanceof ElasticsearchSearchQueryExtractContext c ) {
return TOTAL_HITS_VALUE_PROPERTY_ACCESSOR.get( c.getResponseBody() )
.orElseThrow( ElasticsearchClientLog.INSTANCE::elasticsearchResponseMissingData );
}
throw ElasticsearchClientLog.INSTANCE.elasticsearchResponseMissingData();
}
}

private static class Factory
implements
SearchQueryElementFactory<CountDocumentAggregationBuilder.TypeSelector,
ElasticsearchSearchIndexScope<?>,
ElasticsearchSearchIndexCompositeNodeContext> {
private final boolean isNested;

public Factory(boolean isNested) {
this.isNested = isNested;
}

@Override
public CountDocumentAggregationBuilder.TypeSelector create(ElasticsearchSearchIndexScope<?> scope,
ElasticsearchSearchIndexCompositeNodeContext node) {
return new ElasticsearchCountDocumentAggregation.TypeSelector( scope, isNested );
}

@Override
public void checkCompatibleWith(SearchQueryElementFactory<?, ?, ?> other) {
if ( !getClass().equals( other.getClass() ) ) {
throw QueryLog.INSTANCE.differentImplementationClassForQueryElement( getClass(), other.getClass() );
}
}
}

private record TypeSelector(ElasticsearchSearchIndexScope<?> scope, boolean isNested)
implements CountDocumentAggregationBuilder.TypeSelector {

@Override
public CountDocumentAggregationBuilder type() {
return new Builder( scope, isNested );
}
}

private static class Builder extends AbstractBuilder<Long>
implements CountDocumentAggregationBuilder {
private final boolean isNested;

private Builder(ElasticsearchSearchIndexScope<?> scope, boolean isNested) {
super( scope );
this.isNested = isNested;
}

@Override
public ElasticsearchCountDocumentAggregation build() {
return new ElasticsearchCountDocumentAggregation( this );
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ private ElasticsearchMetricFieldAggregation(Builder<F, K> builder) {
}

@Override
protected final JsonObject doRequest(AggregationRequestContext context) {
protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) {
JsonObject outerObject = new JsonObject();
JsonObject innerObject = new JsonObject();

Expand All @@ -84,7 +84,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) {
}

@Override
protected Extractor<K> extractor(AggregationRequestContext context) {
protected Extractor<K> extractor(AggregationRequestBuildingContextContext context) {
return metricFieldExtractorCreator.extractor( filter );
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ private ElasticsearchMetricLongAggregation(Builder builder) {
}

@Override
protected final JsonObject doRequest(AggregationRequestContext context) {
protected final JsonObject doRequest(AggregationRequestBuildingContextContext context) {
JsonObject outerObject = new JsonObject();
JsonObject innerObject = new JsonObject();

Expand All @@ -57,7 +57,7 @@ protected final JsonObject doRequest(AggregationRequestContext context) {
}

@Override
protected Extractor<Long> extractor(AggregationRequestContext context) {
protected Extractor<Long> extractor(AggregationRequestBuildingContextContext context) {
return new MetricLongExtractor( nestedPathHierarchy, filter );
}

Expand Down
Loading