|
19 | 19 | import static java.util.stream.Collectors.collectingAndThen;
|
20 | 20 | import static org.jooq.lambda.tuple.Tuple.tuple;
|
21 | 21 |
|
22 |
| -import java.util.ArrayList; |
23 |
| -import java.util.Comparator; |
24 |
| -import java.util.HashSet; |
25 |
| -import java.util.LinkedHashMap; |
26 |
| -import java.util.List; |
| 22 | +import java.util.*; |
27 | 23 | import java.util.Map.Entry;
|
28 |
| -import java.util.Objects; |
29 |
| -import java.util.Optional; |
30 |
| -import java.util.TreeSet; |
31 |
| -import java.util.function.Function; |
32 |
| -import java.util.function.Predicate; |
33 |
| -import java.util.function.ToIntFunction; |
34 |
| -import java.util.function.ToLongFunction; |
| 24 | +import java.util.function.*; |
35 | 25 | import java.util.stream.Collector;
|
36 | 26 | import java.util.stream.Collectors;
|
37 | 27 |
|
@@ -916,32 +906,152 @@ else if (l1[0] == null)
|
916 | 906 | else if (percentile == 1.0)
|
917 | 907 | return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
|
918 | 908 | else
|
919 |
| - return Collector.of( |
920 |
| - () -> new ArrayList<Tuple2<T, U>>(), |
921 |
| - (l, v) -> l.add(tuple(v, function.apply(v))), |
922 |
| - (l1, l2) -> { |
923 |
| - l1.addAll(l2); |
924 |
| - return l1; |
925 |
| - }, |
| 909 | + return percentileCollector( |
| 910 | + function, |
| 911 | + comparator, |
| 912 | + Optional::empty, |
| 913 | + Optional::of, |
| 914 | + l -> Optional.of(l.get(percentileIndex(percentile, l.size())).v1) |
| 915 | + ); |
| 916 | + } |
| 917 | + |
| 918 | + /** |
| 919 | + * Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 920 | + */ |
| 921 | + public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> percentileAll(double percentile) { |
| 922 | + return percentileAllBy(percentile, t -> t, naturalOrder()); |
| 923 | + } |
| 924 | + |
| 925 | + /** |
| 926 | + * Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 927 | + */ |
| 928 | + public static <T> Collector<T, ?, Seq<T>> percentileAll(double percentile, Comparator<? super T> comparator) { |
| 929 | + return percentileAllBy(percentile, t -> t, comparator); |
| 930 | + } |
| 931 | + |
| 932 | + /** |
| 933 | + * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 934 | + */ |
| 935 | + public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function) { |
| 936 | + return percentileAllBy(percentile, function, naturalOrder()); |
| 937 | + } |
| 938 | + |
| 939 | + /** |
| 940 | + * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 941 | + */ |
| 942 | + public static <T, U> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) { |
| 943 | + if (percentile < 0.0 || percentile > 1.0) |
| 944 | + throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0"); |
| 945 | + |
| 946 | + if (percentile == 0.0) |
| 947 | + return minAllBy(function, comparator); |
| 948 | + else if (percentile == 1.0) |
| 949 | + return maxAllBy(function, comparator); |
| 950 | + else |
| 951 | + return percentileCollector( |
| 952 | + function, |
| 953 | + comparator, |
| 954 | + Seq::empty, |
| 955 | + Seq::of, |
926 | 956 | l -> {
|
927 |
| - int size = l.size(); |
| 957 | + int left = 0; |
| 958 | + int right = percentileIndex(percentile, l.size()); |
| 959 | + int mid; |
| 960 | + |
| 961 | + U value = l.get(right).v2; |
928 | 962 |
|
929 |
| - if (size == 0) |
930 |
| - return Optional.empty(); |
931 |
| - else if (size == 1) |
932 |
| - return Optional.of(l.get(0).v1); |
| 963 | + // Search the first value equal to the value at the position of PERCENTILE by binary search |
| 964 | + while (left < right) { |
| 965 | + mid = left + (right - left) / 2; |
933 | 966 |
|
934 |
| - l.sort(Comparator.comparing(t -> t.v2, comparator)); |
| 967 | + if (comparator.compare(l.get(mid).v2, value) < 0) |
| 968 | + left = mid + 1; |
| 969 | + else |
| 970 | + right = mid; |
| 971 | + } |
935 | 972 |
|
936 |
| - // x.5 should be rounded down |
937 |
| - return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1); |
| 973 | + // We could use Seq.seq(l).skip(left).filter(<comparison>) to prevent the additional allocation |
| 974 | + // but it is very likely that l is quite large and result is quite small. So let's not keep an |
| 975 | + // unnecessary reference to a possibly large list. |
| 976 | + List<T> result = new ArrayList<>(); |
| 977 | + |
| 978 | + for (; left < l.size() && comparator.compare(l.get(left).v2, value) == 0; left++) |
| 979 | + result.add(l.get(left).v1); |
| 980 | + |
| 981 | + return Seq.seq(result); |
938 | 982 | }
|
939 | 983 | );
|
940 | 984 | }
|
941 | 985 |
|
| 986 | + private static <T, U, R> Collector<T, List<Tuple2<T, U>>, R> percentileCollector( |
| 987 | + Function<? super T, ? extends U> function, |
| 988 | + Comparator<? super U> comparator, |
| 989 | + Supplier<R> onEmpty, |
| 990 | + Function<T, R> onSingle, |
| 991 | + Function<List<Tuple2<T, U>>, R> finisher |
| 992 | + ) { |
| 993 | + return Collector.of( |
| 994 | + () -> new ArrayList<>(), |
| 995 | + (l, v) -> l.add(tuple(v, function.apply(v))), |
| 996 | + (l1, l2) -> { |
| 997 | + l1.addAll(l2); |
| 998 | + return l1; |
| 999 | + }, |
| 1000 | + l -> { |
| 1001 | + int size = l.size(); |
| 1002 | + |
| 1003 | + if (size == 0) |
| 1004 | + return onEmpty.get(); |
| 1005 | + else if (size == 1) |
| 1006 | + return onSingle.apply(l.get(0).v1); |
| 1007 | + |
| 1008 | + l.sort(Comparator.comparing(t -> t.v2, comparator)); |
| 1009 | + return finisher.apply(l); |
| 1010 | + } |
| 1011 | + ); |
| 1012 | + } |
| 1013 | + |
| 1014 | + private static int percentileIndex(double percentile, int size) { |
| 1015 | + |
| 1016 | + // x.5 should be rounded down |
| 1017 | + return (int) -Math.round(-(size * percentile + 0.5)) - 1; |
| 1018 | + } |
| 1019 | + |
| 1020 | + /** |
| 1021 | + * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
| 1022 | + * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 1023 | + */ |
| 1024 | + public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> medianAll() { |
| 1025 | + return medianAllBy(t -> t, naturalOrder()); |
| 1026 | + } |
| 1027 | + |
| 1028 | + /** |
| 1029 | + * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
| 1030 | + * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 1031 | + */ |
| 1032 | + public static <T> Collector<T, ?, Seq<T>> medianAll(Comparator<? super T> comparator) { |
| 1033 | + return medianAllBy(t -> t, comparator); |
| 1034 | + } |
| 1035 | + |
942 | 1036 | /**
|
943 |
| - * Get a {@link Collector} that calculates the common prefix of a set of strings. |
| 1037 | + * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
| 1038 | + * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
944 | 1039 | */
|
| 1040 | + public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> medianAllBy(Function<? super T, ? extends U> function) { |
| 1041 | + return medianAllBy(function, naturalOrder()); |
| 1042 | + } |
| 1043 | + |
| 1044 | + /** |
| 1045 | + * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
| 1046 | + * Get a {@link Collector} that calculates the derived <code>MEDIAN()</code> function given natural ordering, producing multiple results. |
| 1047 | + */ |
| 1048 | + public static <T, U> Collector<T, ?, Seq<T>> medianAllBy(Function<? super T, ? extends U> function, Comparator<? super U> comparator) { |
| 1049 | + return percentileAllBy(0.5, function, comparator); |
| 1050 | + } |
| 1051 | + |
| 1052 | + /** |
| 1053 | + * Get a {@link Collector} that calculates the common prefix of a set of strings. |
| 1054 | + */ |
945 | 1055 | public static Collector<CharSequence, ?, String> commonPrefix() {
|
946 | 1056 | return Collectors.collectingAndThen(
|
947 | 1057 | Collectors.reducing((CharSequence s1, CharSequence s2) -> {
|
|
0 commit comments