|
28 | 28 | import com.linkedin.metadata.search.elasticsearch.query.filter.QueryFilterRewriterContext;
|
29 | 29 | import com.linkedin.metadata.utils.CriterionUtils;
|
30 | 30 | import io.datahubproject.metadata.context.OperationContext;
|
| 31 | +import java.util.ArrayList; |
31 | 32 | import java.util.Collections;
|
32 | 33 | import java.util.HashMap;
|
33 | 34 | import java.util.List;
|
@@ -935,4 +936,317 @@ public static int applyResultLimit(@Nonnull ElasticSearchConfiguration config, i
|
935 | 936 | }
|
936 | 937 | return count;
|
937 | 938 | }
|
| 939 | + |
| 940 | + /** |
| 941 | + * Performs a single optimization pass on the query tree. |
| 942 | + * |
| 943 | + * @param query The QueryBuilder to optimize |
| 944 | + * @param considerScore If true, moves to must() for scoring; if false, moves to filter() |
| 945 | + * @return true if any optimization was performed, false otherwise |
| 946 | + */ |
| 947 | + static boolean optimizePass(QueryBuilder query, boolean considerScore) { |
| 948 | + if (!(query instanceof BoolQueryBuilder)) { |
| 949 | + return false; |
| 950 | + } |
| 951 | + |
| 952 | + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; |
| 953 | + boolean changed = false; |
| 954 | + |
| 955 | + // First, recursively optimize all nested queries |
| 956 | + // Must clauses |
| 957 | + for (int i = 0; i < boolQuery.must().size(); i++) { |
| 958 | + changed |= optimizePass(boolQuery.must().get(i), considerScore); |
| 959 | + } |
| 960 | + |
| 961 | + // Filter clauses - always use considerScore=false for filters |
| 962 | + for (int i = 0; i < boolQuery.filter().size(); i++) { |
| 963 | + changed |= optimizePass(boolQuery.filter().get(i), false); |
| 964 | + } |
| 965 | + |
| 966 | + // Should clauses |
| 967 | + for (int i = 0; i < boolQuery.should().size(); i++) { |
| 968 | + changed |= optimizePass(boolQuery.should().get(i), considerScore); |
| 969 | + } |
| 970 | + |
| 971 | + // MustNot clauses - always use considerScore=false |
| 972 | + for (int i = 0; i < boolQuery.mustNot().size(); i++) { |
| 973 | + changed |= optimizePass(boolQuery.mustNot().get(i), false); |
| 974 | + } |
| 975 | + |
| 976 | + // After optimizing children, check if this query itself can be optimized |
| 977 | + |
| 978 | + // Optimization 1: Convert single should clause with minimumShouldMatch=1 |
| 979 | + if (isOptimizableShould(boolQuery)) { |
| 980 | + // Get the should clause |
| 981 | + QueryBuilder shouldClause = boolQuery.should().get(0); |
| 982 | + |
| 983 | + // Remove the should clause |
| 984 | + boolQuery.should().clear(); |
| 985 | + |
| 986 | + // Reset minimumShouldMatch since there are no more should clauses |
| 987 | + boolQuery.minimumShouldMatch(null); |
| 988 | + |
| 989 | + // Add to appropriate clause type |
| 990 | + if (considerScore) { |
| 991 | + boolQuery.must(shouldClause); |
| 992 | + } else { |
| 993 | + boolQuery.filter(shouldClause); |
| 994 | + } |
| 995 | + |
| 996 | + changed = true; |
| 997 | + } |
| 998 | + |
| 999 | + // Optimization 2: Flatten nested bool queries with only filter clauses |
| 1000 | + if (canFlattenFilters(boolQuery)) { |
| 1001 | + changed |= flattenFilters(boolQuery); |
| 1002 | + } |
| 1003 | + |
| 1004 | + // Note: We don't handle unwrapping here because it requires replacing the query object |
| 1005 | + // which can't be done in-place. Unwrapping is handled at the top level in queryOptimize. |
| 1006 | + |
| 1007 | + return changed; |
| 1008 | + } |
| 1009 | + |
| 1010 | + /** |
| 1011 | + * Checks if the BoolQueryBuilder has a single should clause with minimumShouldMatch=1 |
| 1012 | + * |
| 1013 | + * @param query The BoolQueryBuilder to check |
| 1014 | + * @return true if the query can be optimized, false otherwise |
| 1015 | + */ |
| 1016 | + static boolean isOptimizableShould(BoolQueryBuilder query) { |
| 1017 | + // Check if there's exactly one should clause |
| 1018 | + if (query.should().size() != 1) { |
| 1019 | + return false; |
| 1020 | + } |
| 1021 | + |
| 1022 | + // Check if minimumShouldMatch is set to "1" |
| 1023 | + String minShouldMatch = query.minimumShouldMatch(); |
| 1024 | + if (minShouldMatch == null) { |
| 1025 | + return false; |
| 1026 | + } |
| 1027 | + |
| 1028 | + // Handle different formats of minimumShouldMatch |
| 1029 | + // It could be "1", "1%", or other formats |
| 1030 | + if (minShouldMatch.equals("1") || minShouldMatch.equals("100%")) { |
| 1031 | + return true; |
| 1032 | + } |
| 1033 | + |
| 1034 | + // Check if it's a percentage format like "100%" |
| 1035 | + if (minShouldMatch.endsWith("%")) { |
| 1036 | + try { |
| 1037 | + int percentage = Integer.parseInt(minShouldMatch.substring(0, minShouldMatch.length() - 1)); |
| 1038 | + // With only 1 should clause, 100% means that 1 clause must match |
| 1039 | + return percentage == 100; |
| 1040 | + } catch (NumberFormatException e) { |
| 1041 | + return false; |
| 1042 | + } |
| 1043 | + } |
| 1044 | + |
| 1045 | + return false; |
| 1046 | + } |
| 1047 | + |
| 1048 | + /** |
| 1049 | + * Checks if a bool query can have its filter clauses flattened. This is possible when it has |
| 1050 | + * filter clauses containing other bool queries that only have filter clauses. |
| 1051 | + * |
| 1052 | + * @param query The BoolQueryBuilder to check |
| 1053 | + * @return true if filters can be flattened |
| 1054 | + */ |
| 1055 | + static boolean canFlattenFilters(BoolQueryBuilder query) { |
| 1056 | + // Check each filter clause |
| 1057 | + for (QueryBuilder filter : query.filter()) { |
| 1058 | + if (filter instanceof BoolQueryBuilder) { |
| 1059 | + BoolQueryBuilder nestedBool = (BoolQueryBuilder) filter; |
| 1060 | + // If the nested bool query has only filter clauses (no must, should, mustNot), |
| 1061 | + // then we can flatten it |
| 1062 | + if (nestedBool.must().isEmpty() |
| 1063 | + && nestedBool.should().isEmpty() |
| 1064 | + && nestedBool.mustNot().isEmpty() |
| 1065 | + && !nestedBool.filter().isEmpty()) { |
| 1066 | + return true; |
| 1067 | + } |
| 1068 | + } |
| 1069 | + } |
| 1070 | + return false; |
| 1071 | + } |
| 1072 | + |
| 1073 | + /** |
| 1074 | + * Flattens nested filter clauses in a bool query. Extracts filter clauses from nested bool |
| 1075 | + * queries that only contain filters and adds them directly to the parent. |
| 1076 | + * |
| 1077 | + * @param query The BoolQueryBuilder to flatten |
| 1078 | + * @return true if any flattening occurred |
| 1079 | + */ |
| 1080 | + private static boolean flattenFilters(BoolQueryBuilder query) { |
| 1081 | + boolean changed = false; |
| 1082 | + List<QueryBuilder> filtersToAdd = new ArrayList<>(); |
| 1083 | + List<QueryBuilder> filtersToRemove = new ArrayList<>(); |
| 1084 | + |
| 1085 | + // Identify filters that can be flattened |
| 1086 | + for (QueryBuilder filter : query.filter()) { |
| 1087 | + if (filter instanceof BoolQueryBuilder) { |
| 1088 | + BoolQueryBuilder nestedBool = (BoolQueryBuilder) filter; |
| 1089 | + // If the nested bool query has only filter clauses, extract them |
| 1090 | + if (nestedBool.must().isEmpty() |
| 1091 | + && nestedBool.should().isEmpty() |
| 1092 | + && nestedBool.mustNot().isEmpty() |
| 1093 | + && !nestedBool.filter().isEmpty()) { |
| 1094 | + |
| 1095 | + // Mark for removal |
| 1096 | + filtersToRemove.add(filter); |
| 1097 | + |
| 1098 | + // Extract all filters from the nested bool query |
| 1099 | + filtersToAdd.addAll(nestedBool.filter()); |
| 1100 | + |
| 1101 | + changed = true; |
| 1102 | + } |
| 1103 | + } |
| 1104 | + } |
| 1105 | + |
| 1106 | + // Apply the changes |
| 1107 | + if (changed) { |
| 1108 | + // Remove the nested bool queries |
| 1109 | + for (QueryBuilder toRemove : filtersToRemove) { |
| 1110 | + query.filter().remove(toRemove); |
| 1111 | + } |
| 1112 | + |
| 1113 | + // Add the extracted filters directly |
| 1114 | + for (QueryBuilder toAdd : filtersToAdd) { |
| 1115 | + query.filter(toAdd); |
| 1116 | + } |
| 1117 | + } |
| 1118 | + |
| 1119 | + return changed; |
| 1120 | + } |
| 1121 | + |
| 1122 | + /** |
| 1123 | + * Checks if a bool query can be unwrapped (replaced by its single clause). This is possible when |
| 1124 | + * the bool query has exactly one clause total. |
| 1125 | + * |
| 1126 | + * @param query The BoolQueryBuilder to check |
| 1127 | + * @return true if the query can be unwrapped |
| 1128 | + */ |
| 1129 | + static boolean canUnwrap(BoolQueryBuilder query) { |
| 1130 | + int totalClauses = |
| 1131 | + query.must().size() |
| 1132 | + + query.filter().size() |
| 1133 | + + query.should().size() |
| 1134 | + + query.mustNot().size(); |
| 1135 | + |
| 1136 | + // Can unwrap if there's exactly one clause and no minimumShouldMatch constraint |
| 1137 | + return totalClauses == 1 && query.minimumShouldMatch() == null; |
| 1138 | + } |
| 1139 | + |
| 1140 | + /** |
| 1141 | + * Fully optimizes a query by running optimization passes until no more changes occur. |
| 1142 | + * |
| 1143 | + * @param query The QueryBuilder to optimize |
| 1144 | + * @param considerScore If true, preserves scoring; if false, optimizes for filtering |
| 1145 | + * @return The optimized query (may be a different instance if unwrapping occurred) |
| 1146 | + */ |
| 1147 | + public static QueryBuilder queryOptimize(QueryBuilder query, boolean considerScore) { |
| 1148 | + if (query == null) { |
| 1149 | + return null; |
| 1150 | + } |
| 1151 | + |
| 1152 | + // For non-bool queries, return as-is |
| 1153 | + if (!(query instanceof BoolQueryBuilder)) { |
| 1154 | + return query; |
| 1155 | + } |
| 1156 | + |
| 1157 | + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; |
| 1158 | + |
| 1159 | + // Keep optimizing until no more changes |
| 1160 | + boolean changed; |
| 1161 | + int iterations = 0; |
| 1162 | + int maxIterations = 100; // Safety limit to prevent infinite loops |
| 1163 | + |
| 1164 | + do { |
| 1165 | + changed = optimizePass(boolQuery, considerScore); |
| 1166 | + iterations++; |
| 1167 | + |
| 1168 | + if (iterations >= maxIterations) { |
| 1169 | + // Log warning or throw exception in production code |
| 1170 | + break; |
| 1171 | + } |
| 1172 | + } while (changed); |
| 1173 | + |
| 1174 | + // After all optimization passes, check if we can unwrap at the top level |
| 1175 | + if (canUnwrap(boolQuery)) { |
| 1176 | + // Return the single clause directly |
| 1177 | + if (!boolQuery.must().isEmpty()) { |
| 1178 | + return queryOptimize(boolQuery.must().get(0), considerScore); |
| 1179 | + } else if (!boolQuery.filter().isEmpty()) { |
| 1180 | + return queryOptimize(boolQuery.filter().get(0), false); |
| 1181 | + } else if (!boolQuery.should().isEmpty()) { |
| 1182 | + return queryOptimize(boolQuery.should().get(0), considerScore); |
| 1183 | + } else if (!boolQuery.mustNot().isEmpty()) { |
| 1184 | + // mustNot can't stand alone, keep it wrapped |
| 1185 | + return boolQuery; |
| 1186 | + } |
| 1187 | + } |
| 1188 | + |
| 1189 | + return boolQuery; |
| 1190 | + } |
| 1191 | + |
| 1192 | + /** |
| 1193 | + * Recursively optimizes and unwraps nested bool queries where possible. This is a helper method |
| 1194 | + * that can be used to handle unwrapping during optimization. |
| 1195 | + * |
| 1196 | + * @param query The QueryBuilder to process |
| 1197 | + * @param parent The parent BoolQueryBuilder (null if this is the root) |
| 1198 | + * @param clauseType The type of clause this query is in the parent (must, filter, should, |
| 1199 | + * mustNot) |
| 1200 | + * @param index The index of this query in the parent's clause list |
| 1201 | + * @return true if any changes were made |
| 1202 | + */ |
| 1203 | + private static boolean optimizeAndUnwrapNested( |
| 1204 | + QueryBuilder query, BoolQueryBuilder parent, String clauseType, int index) { |
| 1205 | + if (!(query instanceof BoolQueryBuilder)) { |
| 1206 | + return false; |
| 1207 | + } |
| 1208 | + |
| 1209 | + BoolQueryBuilder boolQuery = (BoolQueryBuilder) query; |
| 1210 | + |
| 1211 | + // First optimize the query |
| 1212 | + boolean changed = |
| 1213 | + optimizePass(boolQuery, "must".equals(clauseType) || "should".equals(clauseType)); |
| 1214 | + |
| 1215 | + // If this query can be unwrapped and we have a parent, unwrap it |
| 1216 | + if (parent != null && canUnwrap(boolQuery)) { |
| 1217 | + QueryBuilder unwrapped = null; |
| 1218 | + |
| 1219 | + if (!boolQuery.must().isEmpty()) { |
| 1220 | + unwrapped = boolQuery.must().get(0); |
| 1221 | + } else if (!boolQuery.filter().isEmpty()) { |
| 1222 | + unwrapped = boolQuery.filter().get(0); |
| 1223 | + } else if (!boolQuery.should().isEmpty()) { |
| 1224 | + unwrapped = boolQuery.should().get(0); |
| 1225 | + } else if (!boolQuery.mustNot().isEmpty()) { |
| 1226 | + // mustNot needs special handling - can't unwrap |
| 1227 | + return changed; |
| 1228 | + } |
| 1229 | + |
| 1230 | + if (unwrapped != null) { |
| 1231 | + // Replace in parent |
| 1232 | + switch (clauseType) { |
| 1233 | + case "must": |
| 1234 | + parent.must().set(index, unwrapped); |
| 1235 | + break; |
| 1236 | + case "filter": |
| 1237 | + parent.filter().set(index, unwrapped); |
| 1238 | + break; |
| 1239 | + case "should": |
| 1240 | + parent.should().set(index, unwrapped); |
| 1241 | + break; |
| 1242 | + case "mustNot": |
| 1243 | + parent.mustNot().set(index, unwrapped); |
| 1244 | + break; |
| 1245 | + } |
| 1246 | + changed = true; |
| 1247 | + } |
| 1248 | + } |
| 1249 | + |
| 1250 | + return changed; |
| 1251 | + } |
938 | 1252 | }
|
0 commit comments