Skip to content

Commit 4b34fbe

Browse files
committed
query optimizer
1 parent 3b645b1 commit 4b34fbe

File tree

8 files changed

+668
-9
lines changed

8 files changed

+668
-9
lines changed

metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import static com.linkedin.metadata.aspect.models.graph.Edge.*;
44
import static com.linkedin.metadata.graph.elastic.ElasticSearchGraphService.*;
55
import static com.linkedin.metadata.search.utils.ESUtils.applyResultLimit;
6+
import static com.linkedin.metadata.search.utils.ESUtils.queryOptimize;
67

78
import com.datahub.util.exception.ESQueryException;
89
import com.google.common.annotations.VisibleForTesting;
@@ -178,7 +179,7 @@ private static BoolQueryBuilder getAggregationFilter(
178179
BoolQueryBuilder subFilter = QueryBuilders.boolQuery();
179180
TermQueryBuilder relationshipTypeTerm =
180181
QueryBuilders.termQuery(RELATIONSHIP_TYPE, pair.getValue().getType()).caseInsensitive(true);
181-
subFilter.must(relationshipTypeTerm);
182+
subFilter.filter(relationshipTypeTerm);
182183

183184
String sourceType;
184185
String destinationType;
@@ -192,10 +193,10 @@ private static BoolQueryBuilder getAggregationFilter(
192193

193194
TermQueryBuilder sourceTypeTerm =
194195
QueryBuilders.termQuery(SOURCE_TYPE, sourceType).caseInsensitive(true);
195-
subFilter.must(sourceTypeTerm);
196+
subFilter.filter(sourceTypeTerm);
196197
TermQueryBuilder destinationTypeTerm =
197198
QueryBuilders.termQuery(DESTINATION_TYPE, destinationType).caseInsensitive(true);
198-
subFilter.must(destinationTypeTerm);
199+
subFilter.filter(destinationTypeTerm);
199200
return subFilter;
200201
}
201202

@@ -261,6 +262,7 @@ public static BoolQueryBuilder buildQuery(
261262
relationshipQuery.should(
262263
QueryBuilders.termQuery(RELATIONSHIP_TYPE, relationshipType)));
263264
relationshipQuery.minimumShouldMatch(1);
265+
264266
finalQuery.filter(relationshipQuery);
265267
}
266268

@@ -1140,6 +1142,10 @@ private List<LineageRelationship> relationshipsGroupQuery(
11401142

11411143
Set<Urn> entityUrnSet = new HashSet<>(entityUrns);
11421144

1145+
if (config.getSearch().getGraph().isQueryOptimization()) {
1146+
queryOptimize(baseQuery, false);
1147+
}
1148+
11431149
// Get search responses as a stream with pagination
11441150
Stream<SearchResponse> responseStream =
11451151
executeGroupByLineageSearchQuery(
@@ -1167,16 +1173,16 @@ private List<LineageRelationship> relationshipsGroupQuery(
11671173
private static BoolQueryBuilder getOutGoingEdgeQuery(
11681174
@Nonnull Set<Urn> urns, @Nonnull Set<EdgeInfo> outgoingEdges) {
11691175
BoolQueryBuilder outgoingEdgeQuery = QueryBuilders.boolQuery();
1170-
outgoingEdgeQuery.must(buildUrnFilters(urns, SOURCE));
1171-
outgoingEdgeQuery.must(buildEdgeFilters(outgoingEdges));
1176+
outgoingEdgeQuery.filter(buildUrnFilters(urns, SOURCE));
1177+
outgoingEdgeQuery.filter(buildEdgeFilters(outgoingEdges));
11721178
return outgoingEdgeQuery;
11731179
}
11741180

11751181
private static BoolQueryBuilder getIncomingEdgeQuery(
11761182
@Nonnull Set<Urn> urns, Set<EdgeInfo> incomingEdges) {
11771183
BoolQueryBuilder incomingEdgeQuery = QueryBuilders.boolQuery();
1178-
incomingEdgeQuery.must(buildUrnFilters(urns, DESTINATION));
1179-
incomingEdgeQuery.must(buildEdgeFilters(incomingEdges));
1184+
incomingEdgeQuery.filter(buildUrnFilters(urns, DESTINATION));
1185+
incomingEdgeQuery.filter(buildEdgeFilters(incomingEdges));
11801186
return incomingEdgeQuery;
11811187
}
11821188

metadata-io/src/main/java/com/linkedin/metadata/search/utils/ESUtils.java

Lines changed: 314 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext;
2929
import com.linkedin.metadata.utils.CriterionUtils;
3030
import io.datahubproject.metadata.context.OperationContext;
31+
import java.util.ArrayList;
3132
import java.util.Collections;
3233
import java.util.HashMap;
3334
import java.util.List;
@@ -935,4 +936,317 @@ public static int applyResultLimit(@Nonnull ElasticSearchConfiguration config, i
935936
}
936937
return count;
937938
}
939+
940+
/**
941+
* Performs a single optimization pass on the query tree.
942+
*
943+
* @param query The QueryBuilder to optimize
944+
* @param considerScore If true, moves to must() for scoring; if false, moves to filter()
945+
* @return true if any optimization was performed, false otherwise
946+
*/
947+
static boolean optimizePass(QueryBuilder query, boolean considerScore) {
948+
if (!(query instanceof BoolQueryBuilder)) {
949+
return false;
950+
}
951+
952+
BoolQueryBuilder boolQuery = (BoolQueryBuilder) query;
953+
boolean changed = false;
954+
955+
// First, recursively optimize all nested queries
956+
// Must clauses
957+
for (int i = 0; i < boolQuery.must().size(); i++) {
958+
changed |= optimizePass(boolQuery.must().get(i), considerScore);
959+
}
960+
961+
// Filter clauses - always use considerScore=false for filters
962+
for (int i = 0; i < boolQuery.filter().size(); i++) {
963+
changed |= optimizePass(boolQuery.filter().get(i), false);
964+
}
965+
966+
// Should clauses
967+
for (int i = 0; i < boolQuery.should().size(); i++) {
968+
changed |= optimizePass(boolQuery.should().get(i), considerScore);
969+
}
970+
971+
// MustNot clauses - always use considerScore=false
972+
for (int i = 0; i < boolQuery.mustNot().size(); i++) {
973+
changed |= optimizePass(boolQuery.mustNot().get(i), false);
974+
}
975+
976+
// After optimizing children, check if this query itself can be optimized
977+
978+
// Optimization 1: Convert single should clause with minimumShouldMatch=1
979+
if (isOptimizableShould(boolQuery)) {
980+
// Get the should clause
981+
QueryBuilder shouldClause = boolQuery.should().get(0);
982+
983+
// Remove the should clause
984+
boolQuery.should().clear();
985+
986+
// Reset minimumShouldMatch since there are no more should clauses
987+
boolQuery.minimumShouldMatch(null);
988+
989+
// Add to appropriate clause type
990+
if (considerScore) {
991+
boolQuery.must(shouldClause);
992+
} else {
993+
boolQuery.filter(shouldClause);
994+
}
995+
996+
changed = true;
997+
}
998+
999+
// Optimization 2: Flatten nested bool queries with only filter clauses
1000+
if (canFlattenFilters(boolQuery)) {
1001+
changed |= flattenFilters(boolQuery);
1002+
}
1003+
1004+
// Note: We don't handle unwrapping here because it requires replacing the query object
1005+
// which can't be done in-place. Unwrapping is handled at the top level in queryOptimize.
1006+
1007+
return changed;
1008+
}
1009+
1010+
/**
1011+
* Checks if the BoolQueryBuilder has a single should clause with minimumShouldMatch=1
1012+
*
1013+
* @param query The BoolQueryBuilder to check
1014+
* @return true if the query can be optimized, false otherwise
1015+
*/
1016+
static boolean isOptimizableShould(BoolQueryBuilder query) {
1017+
// Check if there's exactly one should clause
1018+
if (query.should().size() != 1) {
1019+
return false;
1020+
}
1021+
1022+
// Check if minimumShouldMatch is set to "1"
1023+
String minShouldMatch = query.minimumShouldMatch();
1024+
if (minShouldMatch == null) {
1025+
return false;
1026+
}
1027+
1028+
// Handle different formats of minimumShouldMatch
1029+
// It could be "1", "1%", or other formats
1030+
if (minShouldMatch.equals("1") || minShouldMatch.equals("100%")) {
1031+
return true;
1032+
}
1033+
1034+
// Check if it's a percentage format like "100%"
1035+
if (minShouldMatch.endsWith("%")) {
1036+
try {
1037+
int percentage = Integer.parseInt(minShouldMatch.substring(0, minShouldMatch.length() - 1));
1038+
// With only 1 should clause, 100% means that 1 clause must match
1039+
return percentage == 100;
1040+
} catch (NumberFormatException e) {
1041+
return false;
1042+
}
1043+
}
1044+
1045+
return false;
1046+
}
1047+
1048+
/**
1049+
* Checks if a bool query can have its filter clauses flattened. This is possible when it has
1050+
* filter clauses containing other bool queries that only have filter clauses.
1051+
*
1052+
* @param query The BoolQueryBuilder to check
1053+
* @return true if filters can be flattened
1054+
*/
1055+
static boolean canFlattenFilters(BoolQueryBuilder query) {
1056+
// Check each filter clause
1057+
for (QueryBuilder filter : query.filter()) {
1058+
if (filter instanceof BoolQueryBuilder) {
1059+
BoolQueryBuilder nestedBool = (BoolQueryBuilder) filter;
1060+
// If the nested bool query has only filter clauses (no must, should, mustNot),
1061+
// then we can flatten it
1062+
if (nestedBool.must().isEmpty()
1063+
&& nestedBool.should().isEmpty()
1064+
&& nestedBool.mustNot().isEmpty()
1065+
&& !nestedBool.filter().isEmpty()) {
1066+
return true;
1067+
}
1068+
}
1069+
}
1070+
return false;
1071+
}
1072+
1073+
/**
1074+
* Flattens nested filter clauses in a bool query. Extracts filter clauses from nested bool
1075+
* queries that only contain filters and adds them directly to the parent.
1076+
*
1077+
* @param query The BoolQueryBuilder to flatten
1078+
* @return true if any flattening occurred
1079+
*/
1080+
private static boolean flattenFilters(BoolQueryBuilder query) {
1081+
boolean changed = false;
1082+
List<QueryBuilder> filtersToAdd = new ArrayList<>();
1083+
List<QueryBuilder> filtersToRemove = new ArrayList<>();
1084+
1085+
// Identify filters that can be flattened
1086+
for (QueryBuilder filter : query.filter()) {
1087+
if (filter instanceof BoolQueryBuilder) {
1088+
BoolQueryBuilder nestedBool = (BoolQueryBuilder) filter;
1089+
// If the nested bool query has only filter clauses, extract them
1090+
if (nestedBool.must().isEmpty()
1091+
&& nestedBool.should().isEmpty()
1092+
&& nestedBool.mustNot().isEmpty()
1093+
&& !nestedBool.filter().isEmpty()) {
1094+
1095+
// Mark for removal
1096+
filtersToRemove.add(filter);
1097+
1098+
// Extract all filters from the nested bool query
1099+
filtersToAdd.addAll(nestedBool.filter());
1100+
1101+
changed = true;
1102+
}
1103+
}
1104+
}
1105+
1106+
// Apply the changes
1107+
if (changed) {
1108+
// Remove the nested bool queries
1109+
for (QueryBuilder toRemove : filtersToRemove) {
1110+
query.filter().remove(toRemove);
1111+
}
1112+
1113+
// Add the extracted filters directly
1114+
for (QueryBuilder toAdd : filtersToAdd) {
1115+
query.filter(toAdd);
1116+
}
1117+
}
1118+
1119+
return changed;
1120+
}
1121+
1122+
/**
1123+
* Checks if a bool query can be unwrapped (replaced by its single clause). This is possible when
1124+
* the bool query has exactly one clause total.
1125+
*
1126+
* @param query The BoolQueryBuilder to check
1127+
* @return true if the query can be unwrapped
1128+
*/
1129+
static boolean canUnwrap(BoolQueryBuilder query) {
1130+
int totalClauses =
1131+
query.must().size()
1132+
+ query.filter().size()
1133+
+ query.should().size()
1134+
+ query.mustNot().size();
1135+
1136+
// Can unwrap if there's exactly one clause and no minimumShouldMatch constraint
1137+
return totalClauses == 1 && query.minimumShouldMatch() == null;
1138+
}
1139+
1140+
/**
1141+
* Fully optimizes a query by running optimization passes until no more changes occur.
1142+
*
1143+
* @param query The QueryBuilder to optimize
1144+
* @param considerScore If true, preserves scoring; if false, optimizes for filtering
1145+
* @return The optimized query (may be a different instance if unwrapping occurred)
1146+
*/
1147+
public static QueryBuilder queryOptimize(QueryBuilder query, boolean considerScore) {
1148+
if (query == null) {
1149+
return null;
1150+
}
1151+
1152+
// For non-bool queries, return as-is
1153+
if (!(query instanceof BoolQueryBuilder)) {
1154+
return query;
1155+
}
1156+
1157+
BoolQueryBuilder boolQuery = (BoolQueryBuilder) query;
1158+
1159+
// Keep optimizing until no more changes
1160+
boolean changed;
1161+
int iterations = 0;
1162+
int maxIterations = 100; // Safety limit to prevent infinite loops
1163+
1164+
do {
1165+
changed = optimizePass(boolQuery, considerScore);
1166+
iterations++;
1167+
1168+
if (iterations >= maxIterations) {
1169+
// Log warning or throw exception in production code
1170+
break;
1171+
}
1172+
} while (changed);
1173+
1174+
// After all optimization passes, check if we can unwrap at the top level
1175+
if (canUnwrap(boolQuery)) {
1176+
// Return the single clause directly
1177+
if (!boolQuery.must().isEmpty()) {
1178+
return queryOptimize(boolQuery.must().get(0), considerScore);
1179+
} else if (!boolQuery.filter().isEmpty()) {
1180+
return queryOptimize(boolQuery.filter().get(0), false);
1181+
} else if (!boolQuery.should().isEmpty()) {
1182+
return queryOptimize(boolQuery.should().get(0), considerScore);
1183+
} else if (!boolQuery.mustNot().isEmpty()) {
1184+
// mustNot can't stand alone, keep it wrapped
1185+
return boolQuery;
1186+
}
1187+
}
1188+
1189+
return boolQuery;
1190+
}
1191+
1192+
/**
1193+
* Recursively optimizes and unwraps nested bool queries where possible. This is a helper method
1194+
* that can be used to handle unwrapping during optimization.
1195+
*
1196+
* @param query The QueryBuilder to process
1197+
* @param parent The parent BoolQueryBuilder (null if this is the root)
1198+
* @param clauseType The type of clause this query is in the parent (must, filter, should,
1199+
* mustNot)
1200+
* @param index The index of this query in the parent's clause list
1201+
* @return true if any changes were made
1202+
*/
1203+
private static boolean optimizeAndUnwrapNested(
1204+
QueryBuilder query, BoolQueryBuilder parent, String clauseType, int index) {
1205+
if (!(query instanceof BoolQueryBuilder)) {
1206+
return false;
1207+
}
1208+
1209+
BoolQueryBuilder boolQuery = (BoolQueryBuilder) query;
1210+
1211+
// First optimize the query
1212+
boolean changed =
1213+
optimizePass(boolQuery, "must".equals(clauseType) || "should".equals(clauseType));
1214+
1215+
// If this query can be unwrapped and we have a parent, unwrap it
1216+
if (parent != null && canUnwrap(boolQuery)) {
1217+
QueryBuilder unwrapped = null;
1218+
1219+
if (!boolQuery.must().isEmpty()) {
1220+
unwrapped = boolQuery.must().get(0);
1221+
} else if (!boolQuery.filter().isEmpty()) {
1222+
unwrapped = boolQuery.filter().get(0);
1223+
} else if (!boolQuery.should().isEmpty()) {
1224+
unwrapped = boolQuery.should().get(0);
1225+
} else if (!boolQuery.mustNot().isEmpty()) {
1226+
// mustNot needs special handling - can't unwrap
1227+
return changed;
1228+
}
1229+
1230+
if (unwrapped != null) {
1231+
// Replace in parent
1232+
switch (clauseType) {
1233+
case "must":
1234+
parent.must().set(index, unwrapped);
1235+
break;
1236+
case "filter":
1237+
parent.filter().set(index, unwrapped);
1238+
break;
1239+
case "should":
1240+
parent.should().set(index, unwrapped);
1241+
break;
1242+
case "mustNot":
1243+
parent.mustNot().set(index, unwrapped);
1244+
break;
1245+
}
1246+
changed = true;
1247+
}
1248+
}
1249+
1250+
return changed;
1251+
}
9381252
}

0 commit comments

Comments
 (0)