Skip to content

Commit d41bd54

Browse files
committed
[jOOQ#221] Add Agg.medianAll(), Agg.percentileAll() to return a Seq of multiple values
A few fixes from the suggested PR: - Removed unnecessary comments - Removed unnecessary covariance in Tuple2<T, U> - Added static imports where appropriate - Factored out common Collector logic between percentileBy() and percentileAllBy() - percentileCollector() is a Collector for common supplier/accumulator/combiner logic as well as handling empty and single element cases and sorting - percentileIndex does the percentile to arraylist index calculation
1 parent 2a232d9 commit d41bd54

File tree

2 files changed

+330
-49
lines changed

2 files changed

+330
-49
lines changed

jOOL/src/main/java/org/jooq/lambda/Agg.java

Lines changed: 138 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,9 @@
1919
import static java.util.stream.Collectors.collectingAndThen;
2020
import static org.jooq.lambda.tuple.Tuple.tuple;
2121

22-
import java.util.ArrayList;
23-
import java.util.Comparator;
24-
import java.util.HashSet;
25-
import java.util.LinkedHashMap;
26-
import java.util.List;
22+
import java.util.*;
2723
import java.util.Map.Entry;
28-
import java.util.Objects;
29-
import java.util.Optional;
30-
import java.util.TreeSet;
31-
import java.util.function.Function;
32-
import java.util.function.Predicate;
33-
import java.util.function.ToIntFunction;
34-
import java.util.function.ToLongFunction;
24+
import java.util.function.*;
3525
import java.util.stream.Collector;
3626
import java.util.stream.Collectors;
3727

@@ -916,32 +906,152 @@ else if (l1[0] == null)
916906
else if (percentile == 1.0)
917907
return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
918908
else
919-
return Collector.of(
920-
() -> new ArrayList<Tuple2<T, U>>(),
921-
(l, v) -> l.add(tuple(v, function.apply(v))),
922-
(l1, l2) -> {
923-
l1.addAll(l2);
924-
return l1;
925-
},
909+
return percentileCollector(
910+
function,
911+
comparator,
912+
Optional::empty,
913+
Optional::of,
914+
l -> Optional.of(l.get(percentileIndex(percentile, l.size())).v1)
915+
);
916+
}
917+
918+
/**
919+
* Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
920+
*/
921+
public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> percentileAll(double percentile) {
922+
return percentileAllBy(percentile, t -> t, naturalOrder());
923+
}
924+
925+
/**
926+
* Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
927+
*/
928+
public static <T> Collector<T, ?, Seq<T>> percentileAll(double percentile, Comparator<? super T> comparator) {
929+
return percentileAllBy(percentile, t -> t, comparator);
930+
}
931+
932+
/**
933+
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
934+
*/
935+
public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function) {
936+
return percentileAllBy(percentile, function, naturalOrder());
937+
}
938+
939+
/**
940+
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
941+
*/
942+
public static <T, U> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
943+
if (percentile < 0.0 || percentile > 1.0)
944+
throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0");
945+
946+
if (percentile == 0.0)
947+
return minAllBy(function, comparator);
948+
else if (percentile == 1.0)
949+
return maxAllBy(function, comparator);
950+
else
951+
return percentileCollector(
952+
function,
953+
comparator,
954+
Seq::empty,
955+
Seq::of,
926956
l -> {
927-
int size = l.size();
957+
int left = 0;
958+
int right = percentileIndex(percentile, l.size());
959+
int mid;
960+
961+
U value = l.get(right).v2;
928962

929-
if (size == 0)
930-
return Optional.empty();
931-
else if (size == 1)
932-
return Optional.of(l.get(0).v1);
963+
// Search the first value equal to the value at the position of PERCENTILE by binary search
964+
while (left < right) {
965+
mid = left + (right - left) / 2;
933966

934-
l.sort(Comparator.comparing(t -> t.v2, comparator));
967+
if (comparator.compare(l.get(mid).v2, value) < 0)
968+
left = mid + 1;
969+
else
970+
right = mid;
971+
}
935972

936-
// x.5 should be rounded down
937-
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
973+
// We could use Seq.seq(l).skip(left).filter(<comparison>) to prevent the additional allocation
974+
// but it is very likely that l is quite large and result is quite small. So let's not keep an
975+
// unnecessary reference to a possibly large list.
976+
List<T> result = new ArrayList<>();
977+
978+
for (; left < l.size() && comparator.compare(l.get(left).v2, value) == 0; left++)
979+
result.add(l.get(left).v1);
980+
981+
return Seq.seq(result);
938982
}
939983
);
940984
}
941985

986+
private static <T, U, R> Collector<T, List<Tuple2<T, U>>, R> percentileCollector(
987+
Function<? super T, ? extends U> function,
988+
Comparator<? super U> comparator,
989+
Supplier<R> onEmpty,
990+
Function<T, R> onSingle,
991+
Function<List<Tuple2<T, U>>, R> finisher
992+
) {
993+
return Collector.of(
994+
() -> new ArrayList<>(),
995+
(l, v) -> l.add(tuple(v, function.apply(v))),
996+
(l1, l2) -> {
997+
l1.addAll(l2);
998+
return l1;
999+
},
1000+
l -> {
1001+
int size = l.size();
1002+
1003+
if (size == 0)
1004+
return onEmpty.get();
1005+
else if (size == 1)
1006+
return onSingle.apply(l.get(0).v1);
1007+
1008+
l.sort(Comparator.comparing(t -> t.v2, comparator));
1009+
return finisher.apply(l);
1010+
}
1011+
);
1012+
}
1013+
1014+
private static int percentileIndex(double percentile, int size) {
1015+
1016+
// x.5 should be rounded down
1017+
return (int) -Math.round(-(size * percentile + 0.5)) - 1;
1018+
}
1019+
1020+
/**
1021+
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
1022+
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
1023+
*/
1024+
public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> medianAll() {
1025+
return medianAllBy(t -> t, naturalOrder());
1026+
}
1027+
1028+
/**
1029+
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
1030+
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
1031+
*/
1032+
public static <T> Collector<T, ?, Seq<T>> medianAll(Comparator<? super T> comparator) {
1033+
return medianAllBy(t -> t, comparator);
1034+
}
1035+
9421036
/**
943-
* Get a {@link Collector} that calculates the common prefix of a set of strings.
1037+
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
1038+
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
9441039
*/
1040+
public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> medianAllBy(Function<? super T, ? extends U> function) {
1041+
return medianAllBy(function, naturalOrder());
1042+
}
1043+
1044+
/**
1045+
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
1046+
* Get a {@link Collector} that calculates the derived <code>MEDIAN()</code> function given natural ordering, producing multiple results.
1047+
*/
1048+
public static <T, U> Collector<T, ?, Seq<T>> medianAllBy(Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
1049+
return percentileAllBy(0.5, function, comparator);
1050+
}
1051+
1052+
/**
1053+
* Get a {@link Collector} that calculates the common prefix of a set of strings.
1054+
*/
9451055
public static Collector<CharSequence, ?, String> commonPrefix() {
9461056
return Collectors.collectingAndThen(
9471057
Collectors.reducing((CharSequence s1, CharSequence s2) -> {

0 commit comments

Comments
 (0)