Skip to content

Commit 1bdd69b

Browse files
committed
[jOOQ#221] Add Agg.medianAll(), Agg.percentileAll() to return a Seq of multiple values
A few fixes from the suggested PR: - Removed unnecessary comments - Removed unnecessary covariance in Tuple2<T, U> - Added static imports where appropriate - Factored out common Collector logic between percentileBy() and percentileAllBy() - percentileCollector() is a Collector for common supplier/accumulator/combiner logic as well as handling empty and single element cases and sorting - percentileIndex does the percentile to arraylist index calculation
1 parent d41bd54 commit 1bdd69b

File tree

2 files changed

+107
-143
lines changed

2 files changed

+107
-143
lines changed

jOOL-java-8/src/main/java/org/jooq/lambda/Agg.java

Lines changed: 60 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -19,19 +19,9 @@
1919
import static java.util.stream.Collectors.collectingAndThen;
2020
import static org.jooq.lambda.tuple.Tuple.tuple;
2121

22-
import java.util.ArrayList;
23-
import java.util.Comparator;
24-
import java.util.HashSet;
25-
import java.util.LinkedHashMap;
26-
import java.util.List;
22+
import java.util.*;
2723
import java.util.Map.Entry;
28-
import java.util.Objects;
29-
import java.util.Optional;
30-
import java.util.TreeSet;
31-
import java.util.function.Function;
32-
import java.util.function.Predicate;
33-
import java.util.function.ToIntFunction;
34-
import java.util.function.ToLongFunction;
24+
import java.util.function.*;
3525
import java.util.stream.Collector;
3626
import java.util.stream.Collectors;
3727

@@ -916,107 +906,117 @@ else if (l1[0] == null)
916906
else if (percentile == 1.0)
917907
return collectingAndThen(maxAllBy(function, comparator), s -> s.findLast());
918908
else
919-
return Collector.of(
920-
() -> new ArrayList<Tuple2<T, U>>(),
921-
(l, v) -> l.add(tuple(v, function.apply(v))),
922-
(l1, l2) -> {
923-
l1.addAll(l2);
924-
return l1;
925-
},
926-
l -> {
927-
int size = l.size();
928-
929-
if (size == 0)
930-
return Optional.empty();
931-
else if (size == 1)
932-
return Optional.of(l.get(0).v1);
933-
934-
l.sort(Comparator.comparing(t -> t.v2, comparator));
935-
936-
// x.5 should be rounded down
937-
return Optional.of(l.get((int) -Math.round(-(size * percentile + 0.5)) - 1).v1);
938-
}
909+
return percentileCollector(
910+
function,
911+
comparator,
912+
Optional::empty,
913+
Optional::of,
914+
l -> Optional.of(l.get(percentileIndex(percentile, l.size())).v1)
939915
);
940916
}
941917

942918
/**
943-
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
944-
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
919+
* Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
945920
*/
946921
public static <T extends Comparable<? super T>> Collector<T, ?, Seq<T>> percentileAll(double percentile) {
947922
return percentileAllBy(percentile, t -> t, naturalOrder());
948923
}
949924

950925
/**
951-
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
952-
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
926+
* Get a {@link Collector} that calculates the <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
953927
*/
954928
public static <T> Collector<T, ?, Seq<T>> percentileAll(double percentile, Comparator<? super T> comparator) {
955929
return percentileAllBy(percentile, t -> t, comparator);
956930
}
957931

958932
/**
959-
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
960933
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
961934
*/
962935
public static <T, U extends Comparable<? super U>> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function) {
963936
return percentileAllBy(percentile, function, naturalOrder());
964937
}
965938

966939
/**
967-
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
968940
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.
969941
*/
970942
public static <T, U> Collector<T, ?, Seq<T>> percentileAllBy(double percentile, Function<? super T, ? extends U> function, Comparator<? super U> comparator) {
971943
if (percentile < 0.0 || percentile > 1.0)
972944
throw new IllegalArgumentException("Percentile must be between 0.0 and 1.0");
973945

974946
if (percentile == 0.0)
975-
// If percentile is 0, this is the same as taking the items with the minimum value.
976947
return minAllBy(function, comparator);
977948
else if (percentile == 1.0)
978-
// If percentile is 1, this is the same as taking the items with the maximum value.
979949
return maxAllBy(function, comparator);
980-
981-
return Collector.of(
982-
() -> new ArrayList<Tuple2<? extends T, U>>(),
983-
(l, v) -> l.add(tuple(v, function.apply(v))),
984-
(l1, l2) -> {
985-
l1.addAll(l2);
986-
return l1;
987-
},
950+
else
951+
return percentileCollector(
952+
function,
953+
comparator,
954+
Seq::empty,
955+
Seq::of,
988956
l -> {
989-
int size = l.size();
990-
991-
if (size == 0)
992-
return Seq.empty();
993-
else if (size == 1)
994-
return Seq.of(l.get(0).v1);
995-
996-
l.sort(Comparator.comparing(t -> t.v2, comparator));
997-
998957
int left = 0;
999-
int right = (int) -Math.round(-(size * percentile + 0.5)) - 1;
958+
int right = percentileIndex(percentile, l.size());
1000959
int mid;
1001960

1002-
List<T> result = new ArrayList<>();
1003961
U value = l.get(right).v2;
962+
1004963
// Search the first value equal to the value at the position of PERCENTILE by binary search
1005964
while (left < right) {
1006965
mid = left + (right - left) / 2;
966+
1007967
if (comparator.compare(l.get(mid).v2, value) < 0)
1008968
left = mid + 1;
1009969
else
1010970
right = mid;
1011971
}
1012972

1013-
for (; left < size && comparator.compare(l.get(left).v2, value) == 0; left++)
973+
// We could use Seq.seq(l).skip(left).filter(<comparison>) to prevent the additional allocation
974+
// but it is very likely that l is quite large and result is quite small. So let's not keep an
975+
// unnecessary reference to a possibly large list.
976+
List<T> result = new ArrayList<>();
977+
978+
for (; left < l.size() && comparator.compare(l.get(left).v2, value) == 0; left++)
1014979
result.add(l.get(left).v1);
980+
1015981
return Seq.seq(result);
1016982
}
983+
);
984+
}
985+
986+
private static <T, U, R> Collector<T, List<Tuple2<T, U>>, R> percentileCollector(
987+
Function<? super T, ? extends U> function,
988+
Comparator<? super U> comparator,
989+
Supplier<R> onEmpty,
990+
Function<T, R> onSingle,
991+
Function<List<Tuple2<T, U>>, R> finisher
992+
) {
993+
return Collector.of(
994+
() -> new ArrayList<>(),
995+
(l, v) -> l.add(tuple(v, function.apply(v))),
996+
(l1, l2) -> {
997+
l1.addAll(l2);
998+
return l1;
999+
},
1000+
l -> {
1001+
int size = l.size();
1002+
1003+
if (size == 0)
1004+
return onEmpty.get();
1005+
else if (size == 1)
1006+
return onSingle.apply(l.get(0).v1);
1007+
1008+
l.sort(Comparator.comparing(t -> t.v2, comparator));
1009+
return finisher.apply(l);
1010+
}
10171011
);
10181012
}
10191013

1014+
private static int percentileIndex(double percentile, int size) {
1015+
1016+
// x.5 should be rounded down
1017+
return (int) -Math.round(-(size * percentile + 0.5)) - 1;
1018+
}
1019+
10201020
/**
10211021
* CS304 Issue link: https://github.com/jOOQ/jOOL/issues/221
10221022
* Get a {@link Collector} that calculates the derived <code>PERCENTILE_DISC(percentile)</code> function given a specific ordering, producing multiple results.

0 commit comments

Comments
 (0)