|
19 | 19 | import static java.util.stream.Collectors.collectingAndThen;
|
20 | 20 | import static org.jooq.lambda.tuple.Tuple.tuple;
|
21 | 21 |
|
22 |
| -import java.util.ArrayList; |
23 |
| -import java.util.Comparator; |
24 |
| -import java.util.HashSet; |
25 |
| -import java.util.LinkedHashMap; |
26 |
| -import java.util.List; |
| 22 | +import java.util.*; |
27 | 23 | import java.util.Map.Entry;
|
28 |
| -import java.util.Objects; |
29 |
| -import java.util.Optional; |
30 |
| -import java.util.TreeSet; |
31 |
| -import java.util.function.Function; |
32 |
| -import java.util.function.Predicate; |
33 |
| -import java.util.function.ToIntFunction; |
34 |
| -import java.util.function.ToLongFunction; |
| 24 | +import java.util.function.*; |
35 | 25 | import java.util.stream.Collector;
|
36 | 26 | import java.util.stream.Collectors;
|
37 | 27 |
|
@@ -916,107 +906,117 @@ else if (l1[0] == null)
|
916 | 906 | else if (percentile == 1.0)
|
917 | 907 | return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
|
918 | 908 | else
|
919 |
| - return Collector.of( |
920 |
| - () -> new ArrayList<Tuple2<T, U>>(), |
921 |
| - (l, v) -> l.add(tuple(v, function.apply(v))), |
922 |
| - (l1, l2) -> { |
923 |
| - l1.addAll(l2); |
924 |
| - return l1; |
925 |
| - }, |
926 |
| - l -> { |
927 |
| - int size = l.size(); |
928 |
| - |
929 |
| - if (size == 0) |
930 |
| - return Optional.empty(); |
931 |
| - else if (size == 1) |
932 |
| - return Optional.of(l.get(0).v1); |
933 |
| - |
934 |
| - l.sort(Comparator.comparing(t -> t.v2, comparator)); |
935 |
| - |
936 |
| - // x.5 should be rounded down |
937 |
| - return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1); |
938 |
| - } |
| 909 | + return percentileCollector( |
| 910 | + function, |
| 911 | + comparator, |
| 912 | + Optional::empty, |
| 913 | + Optional::of, |
| 914 | + l -> Optional.of(l.get(percentileIndex(percentile, l.size())).v1) |
939 | 915 | );
|
940 | 916 | }
|
941 | 917 |
|
942 | 918 | /**
|
943 |
| - * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
944 |
| - * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 919 | + * Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
945 | 920 | */
|
946 | 921 | public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> percentileAll(double percentile) {
|
947 | 922 | return percentileAllBy(percentile, t -> t, naturalOrder());
|
948 | 923 | }
|
949 | 924 |
|
950 | 925 | /**
|
951 |
| - * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
952 |
| - * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
| 926 | + * Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results. |
953 | 927 | */
|
954 | 928 | public static <T> Collector<T, ?, Seq<T>> percentileAll(double percentile, Comparator<? super T> comparator) {
|
955 | 929 | return percentileAllBy(percentile, t -> t, comparator);
|
956 | 930 | }
|
957 | 931 |
|
958 | 932 | /**
|
959 |
| - * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
960 | 933 | * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
|
961 | 934 | */
|
962 | 935 | public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function) {
|
963 | 936 | return percentileAllBy(percentile, function, naturalOrder());
|
964 | 937 | }
|
965 | 938 |
|
966 | 939 | /**
|
967 |
| - * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221 |
968 | 940 | * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
|
969 | 941 | */
|
970 | 942 | public static <T, U> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
|
971 | 943 | if (percentile < 0.0 || percentile > 1.0)
|
972 | 944 | throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0");
|
973 | 945 |
|
974 | 946 | if (percentile == 0.0)
|
975 |
| - // If percentile is 0, this is the same as taking the items with the minimum value. |
976 | 947 | return minAllBy(function, comparator);
|
977 | 948 | else if (percentile == 1.0)
|
978 |
| - // If percentile is 1, this is the same as taking the items with the maximum value. |
979 | 949 | return maxAllBy(function, comparator);
|
980 |
| - |
981 |
| - return Collector.of( |
982 |
| - () -> new ArrayList<Tuple2<? extends T, U>>(), |
983 |
| - (l, v) -> l.add(tuple(v, function.apply(v))), |
984 |
| - (l1, l2) -> { |
985 |
| - l1.addAll(l2); |
986 |
| - return l1; |
987 |
| - }, |
| 950 | + else |
| 951 | + return percentileCollector( |
| 952 | + function, |
| 953 | + comparator, |
| 954 | + Seq::empty, |
| 955 | + Seq::of, |
988 | 956 | l -> {
|
989 |
| - int size = l.size(); |
990 |
| - |
991 |
| - if (size == 0) |
992 |
| - return Seq.empty(); |
993 |
| - else if (size == 1) |
994 |
| - return Seq.of(l.get(0).v1); |
995 |
| - |
996 |
| - l.sort(Comparator.comparing(t -> t.v2, comparator)); |
997 |
| - |
998 | 957 | int left = 0;
|
999 |
| - int right = (int) -Math.round(-(size * percentile + 0.5)) - 1; |
| 958 | + int right = percentileIndex(percentile, l.size()); |
1000 | 959 | int mid;
|
1001 | 960 |
|
1002 |
| - List<T> result = new ArrayList<>(); |
1003 | 961 | U value = l.get(right).v2;
|
| 962 | + |
1004 | 963 | // Search the first value equal to the value at the position of PERCENTILE by binary search
|
1005 | 964 | while (left < right) {
|
1006 | 965 | mid = left + (right - left) / 2;
|
| 966 | + |
1007 | 967 | if (comparator.compare(l.get(mid).v2, value) < 0)
|
1008 | 968 | left = mid + 1;
|
1009 | 969 | else
|
1010 | 970 | right = mid;
|
1011 | 971 | }
|
1012 | 972 |
|
1013 |
| - for (; left < size && comparator.compare(l.get(left).v2, value) == 0; left++) |
| 973 | + // We could use Seq.seq(l).skip(left).filter(<comparison>) to prevent the additional allocation |
| 974 | + // but it is very likely that l is quite large and result is quite small. So let's not keep an |
| 975 | + // unnecessary reference to a possibly large list. |
| 976 | + List<T> result = new ArrayList<>(); |
| 977 | + |
| 978 | + for (; left < l.size() && comparator.compare(l.get(left).v2, value) == 0; left++) |
1014 | 979 | result.add(l.get(left).v1);
|
| 980 | + |
1015 | 981 | return Seq.seq(result);
|
1016 | 982 | }
|
| 983 | + ); |
| 984 | + } |
| 985 | + |
| 986 | + private static <T, U, R> Collector<T, List<Tuple2<T, U>>, R> percentileCollector( |
| 987 | + Function<? super T, ? extends U> function, |
| 988 | + Comparator<? super U> comparator, |
| 989 | + Supplier<R> onEmpty, |
| 990 | + Function<T, R> onSingle, |
| 991 | + Function<List<Tuple2<T, U>>, R> finisher |
| 992 | + ) { |
| 993 | + return Collector.of( |
| 994 | + () -> new ArrayList<>(), |
| 995 | + (l, v) -> l.add(tuple(v, function.apply(v))), |
| 996 | + (l1, l2) -> { |
| 997 | + l1.addAll(l2); |
| 998 | + return l1; |
| 999 | + }, |
| 1000 | + l -> { |
| 1001 | + int size = l.size(); |
| 1002 | + |
| 1003 | + if (size == 0) |
| 1004 | + return onEmpty.get(); |
| 1005 | + else if (size == 1) |
| 1006 | + return onSingle.apply(l.get(0).v1); |
| 1007 | + |
| 1008 | + l.sort(Comparator.comparing(t -> t.v2, comparator)); |
| 1009 | + return finisher.apply(l); |
| 1010 | + } |
1017 | 1011 | );
|
1018 | 1012 | }
|
1019 | 1013 |
|
| 1014 | + private static int percentileIndex(double percentile, int size) { |
| 1015 | + |
| 1016 | + // x.5 should be rounded down |
| 1017 | + return (int) -Math.round(-(size * percentile + 0.5)) - 1; |
| 1018 | + } |
| 1019 | + |
1020 | 1020 | /**
|
1021 | 1021 | * CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
|
1022 | 1022 | * Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
|
|
0 commit comments