Skip to content

Commit 0052689

Browse files
committed
[jOOQ#151] Add support for linear regression functions
- Added unit tests - Implemented NULL semantics in collectors supporting Double wrappers - Formatting and Javadoc fixes - REGR_AVGX() and REGR_AVGY() should return Optional<Double> - covariance(x, x) is really variance(x) - Fixed some x/y mixups - Use REGR_COUNT(), not COUNT() where appropriate - Nicer Optional usage via Optional::map, where appropriate (still room for improvement) - Fixed REGR_R2(), it returns NULL (Optional.empty()) when VAR_POP(y) = 0.0
1 parent ad61f79 commit 0052689

File tree

4 files changed

+593
-131
lines changed

4 files changed

+593
-131
lines changed

jOOL-java-8/src/main/java/org/jooq/lambda/Agg.java

Lines changed: 140 additions & 126 deletions
Original file line numberDiff line numberDiff line change
@@ -1077,7 +1077,7 @@ private static <T> double variance0(List<T> l, ToDoubleFunction<? super T> funct
10771077
* Get a {@link Collector} that calculates the <code>COVAR_POP()</code> function.
10781078
*/
10791079
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> covarianceDouble() {
1080-
return covarianceDouble(t -> t.v1, t -> t.v2);
1080+
return filter(t -> t.v1 != null && t.v2 != null, covarianceDouble(t -> t.v1, t -> t.v2));
10811081
}
10821082

10831083
/**
@@ -1100,231 +1100,245 @@ else if (l.size() == 1)
11001100
* Get a {@link Collector} that calculates the <code>CORR()</code> function.
11011101
*/
11021102
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> correlationDouble() {
1103-
return correlationDouble(t -> t.v1, t -> t.v2);
1103+
return filter(t -> t.v1 != null && t.v2 != null, correlationDouble(t -> t.v1, t -> t.v2));
11041104
}
11051105

11061106
/**
11071107
* Get a {@link Collector} that calculates the <code>CORR()</code> function.
11081108
*/
11091109
public static <T> Collector<T, ?, Optional<Double>> correlationDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
11101110
return collectingAndThen(
1111-
collectors(covarianceDouble(functionX, functionY),
1112-
stddevDouble(functionX),
1113-
stddevDouble(functionY)),
1111+
collectors(
1112+
covarianceDouble(functionX, functionY),
1113+
stddevDouble(functionX),
1114+
stddevDouble(functionY)
1115+
),
11141116
t -> !t.v1.isPresent() || t.v2.orElse(0.0) == 0.0 || t.v3.orElse(0.0) == 0.0 ? Optional.empty() : Optional.of(t.v1.get() / (t.v2.get() * t.v3.get()))
11151117
);
11161118
}
11171119

1118-
private static <T> double avg0(List<T> list, ToDoubleFunction<? super T> function) {
1119-
double result = 0.0;
1120-
1121-
for (T t : list)
1122-
result += function.applyAsDouble(t);
1123-
1124-
return result / list.size();
1125-
}
1126-
11271120
/**
1128-
* Get a {@link Collector} that calculates the common prefix of a set of strings.
1129-
*/
1130-
public static Collector<CharSequence, ?, String> commonPrefix() {
1131-
return collectingAndThen(
1132-
reducing((CharSequence s1, CharSequence s2) -> {
1133-
if (s1 == null || s2 == null)
1134-
return "";
1135-
1136-
int l = Math.min(s1.length(), s2.length());
1137-
int i;
1138-
for (i = 0; i < l && s1.charAt(i) == s2.charAt(i); i++);
1139-
1140-
return s1.subSequence(0, i);
1141-
}),
1142-
s -> s.map(Objects::toString).orElse("")
1143-
);
1144-
}
1145-
1146-
/**
1147-
* Get a {@link Collector} that calculates the common suffix of a set of strings.
1121+
* Get a {@link Collector} that calculates the <code>REGR_COUNT()</code> function.
11481122
*/
1149-
public static Collector<CharSequence, ?, String> commonSuffix() {
1150-
return collectingAndThen(
1151-
reducing((CharSequence s1, CharSequence s2) -> {
1152-
if (s1 == null || s2 == null)
1153-
return "";
1154-
1155-
int l1 = s1.length();
1156-
int l2 = s2.length();
1157-
int l = Math.min(l1, l2);
1158-
int i;
1159-
for (i = 0; i < l && s1.charAt(l1 - i - 1) == s2.charAt(l2 - i - 1); i++);
1160-
1161-
return s1.subSequence(l1 - i, l1);
1162-
}),
1163-
s -> s.map(Objects::toString).orElse("")
1164-
);
1123+
public static Collector<Tuple2<Double, Double>, ?, Long> regrCount() {
1124+
return filter(t -> t.v1 != null && t.v2 != null, regrCount(t -> t.v1, t -> t.v2));
11651125
}
11661126

11671127
/**
1168-
* Realize of linear regression functions.
1128+
* Get a {@link Collector} that calculates the <code>REGR_COUNT()</code> function.
11691129
*/
1130+
public static <T> Collector<T, ?, Long> regrCount(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1131+
return count();
1132+
}
11701133

11711134
/**
1172-
* Get a {@link Collector} that calculates the <code>regrCountLong()</code> function.
1135+
* Get a {@link Collector} that calculates the <code>REGR_AVGX()</code> function.
11731136
*/
1174-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrCountLong() {
1175-
return regrCountLong(t -> t.v1, t -> t.v2);
1137+
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> regrAvgXDouble() {
1138+
return filter(t -> t.v1 != null && t.v2 != null, regrAvgXDouble(t -> t.v1, t -> t.v2));
11761139
}
11771140

11781141
/**
1179-
* Get a {@link Collector} that calculates the <code>regrCountLong()</code> function.
1142+
* Get a {@link Collector} that calculates the <code>REGR_AVGX()</code> function.
11801143
*/
1181-
public static <T> Collector<T, ?, Optional<Double>> regrCountLong(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1182-
return collectingAndThen(toList(), l -> {
1183-
if (l.isEmpty())
1184-
return Optional.empty();
1185-
long result = 0;
1186-
for (T t : l)
1187-
result += t==null ? 0:1;
1188-
return Optional.of((double)result);
1189-
});
1144+
public static <T> Collector<T, ?, Optional<Double>> regrAvgXDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1145+
return collectingAndThen(
1146+
collectors(summingDouble(functionY), count()),
1147+
t -> t.v2 == 0 ? Optional.empty() : Optional.of(t.v1 / t.v2)
1148+
);
11901149
}
11911150

11921151
/**
1193-
* Get a {@link Collector} that calculates the <code>regrAvgXDouble()</code> function.
1152+
* Get a {@link Collector} that calculates the <code>REGR_AVGY()</code> function.
11941153
*/
1195-
public static Collector<Tuple2<Double,Double>, ?, Double> regrAvgXDouble() {
1196-
return regrAvgXDouble(t -> t.v1);
1154+
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> regrAvgYDouble() {
1155+
return filter(t -> t.v1 != null && t.v2 != null, regrAvgYDouble(t -> t.v1, t -> t.v2));
11971156
}
11981157

11991158
/**
1200-
* Get a {@link Collector} that calculates the <code>regrAvgXDouble()</code> function.
1159+
* Get a {@link Collector} that calculates the <code>REGR_AVGY()</code> function.
12011160
*/
1202-
public static <T> Collector<T, ?, Double> regrAvgXDouble(ToDoubleFunction<? super T> functionX) {
1203-
return filter(a-> a!=null,averagingDouble(functionX));
1161+
public static <T> Collector<T, ?, Optional<Double>> regrAvgYDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1162+
return collectingAndThen(
1163+
collectors(summingDouble(functionX), count()),
1164+
t -> t.v2 == 0 ? Optional.empty() : Optional.of(t.v1 / t.v2)
1165+
);
12041166
}
12051167

12061168
/**
1207-
* Get a {@link Collector} that calculates the <code>regrAvgYDouble()</code> function.
1169+
* Get a {@link Collector} that calculates the <code>REGR_SXX()</code> function.
12081170
*/
1209-
public static Collector<Tuple2<Double,Double>, ?, Double> regrAvgYDouble() {
1210-
return regrAvgYDouble(t -> t.v2);
1171+
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> regrSxxDouble() {
1172+
return filter(t -> t.v1 != null && t.v2 != null, regrSxxDouble(t -> t.v1, t -> t.v2));
12111173
}
12121174

12131175
/**
1214-
* Get a {@link Collector} that calculates the <code>regrAvgYDouble()</code> function.
1176+
* Get a {@link Collector} that calculates the <code>REGR_SXX()</code> function.
12151177
*/
1216-
public static <T> Collector<T, ?, Double> regrAvgYDouble(ToDoubleFunction<? super T> functionY) {
1217-
return regrAvgXDouble(functionY);
1178+
public static <T> Collector<T, ?, Optional<Double>> regrSxxDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1179+
return collectingAndThen(
1180+
collectors(
1181+
regrCount(functionX, functionY),
1182+
varianceDouble(functionY)
1183+
),
1184+
t -> t.v2.map(v2 -> t.v1 * v2)
1185+
);
12181186
}
12191187

12201188
/**
1221-
* Get a {@link Collector} that calculates the <code>regrSxxDouble()</code> function.
1189+
* Get a {@link Collector} that calculates the <code>REGR_SXY()</code> function.
12221190
*/
1223-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrSxxDouble() {
1224-
return regrSxxDouble(t -> t.v1, t -> t.v2);
1191+
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrSxyDouble() {
1192+
return filter(t -> t.v1 != null && t.v2 != null, regrSxyDouble(t -> t.v1, t -> t.v2));
12251193
}
12261194

12271195
/**
1228-
* Get a {@link Collector} that calculates the <code>regrSxxDouble()</code> function.
1196+
* Get a {@link Collector} that calculates the <code>REGR_SXY()</code> function.
12291197
*/
1230-
public static <T> Collector<T, ?, Optional<Double>> regrSxxDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1198+
public static <T> Collector<T, ?, Optional<Double>> regrSxyDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
12311199
return collectingAndThen(
1232-
collectors(covarianceDouble(functionX, functionX),
1233-
count()),
1234-
t -> !t.v1.isPresent() || t.v2==0 ? Optional.empty() : Optional.of(t.v1.get() * t.v2 )
1200+
collectors(
1201+
regrCount(functionX, functionY),
1202+
covarianceDouble(functionX, functionY)
1203+
),
1204+
t -> t.v2.map(v2 -> t.v1 * v2)
12351205
);
12361206
}
12371207

12381208
/**
1239-
* Get a {@link Collector} that calculates the <code>regrSyyDouble()</code> function.
1209+
* Get a {@link Collector} that calculates the <code>REGR_SYY()</code> function.
12401210
*/
12411211
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrSyyDouble() {
1242-
return regrSyyDouble(t -> t.v1, t -> t.v2);
1212+
return filter(t -> t.v1 != null && t.v2 != null, regrSyyDouble(t -> t.v1, t -> t.v2));
12431213
}
12441214

12451215
/**
1246-
* Get a {@link Collector} that calculates the <code>regrSyyDouble()</code> function.
1216+
* Get a {@link Collector} that calculates the <code>REGR_SYY()</code> function.
12471217
*/
12481218
public static <T> Collector<T, ?, Optional<Double>> regrSyyDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
12491219
return collectingAndThen(
1250-
collectors(covarianceDouble(functionY, functionY),
1251-
count()),
1252-
t -> !t.v1.isPresent() || t.v2==0 ? Optional.empty() : Optional.of(t.v1.get() * t.v2 )
1220+
collectors(
1221+
regrCount(functionX, functionY),
1222+
varianceDouble(functionX)
1223+
),
1224+
t -> t.v2.map(v2 -> t.v1 * v2)
12531225
);
12541226
}
12551227

12561228
/**
1257-
* Get a {@link Collector} that calculates the <code>regrSxyDouble()</code> function.
1229+
* Get a {@link Collector} that calculates the <code>REGR_SLOPE()</code> function.
12581230
*/
1259-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrSxyDouble() {
1260-
return regrSxyDouble(t -> t.v1, t -> t.v2);
1231+
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> regrSlopeDouble() {
1232+
return filter(t -> t.v1 != null && t.v2 != null, regrSlopeDouble(t -> t.v1, t -> t.v2));
12611233
}
12621234

12631235
/**
1264-
* Get a {@link Collector} that calculates the <code>regrSxyDouble()</code> function.
1236+
* Get a {@link Collector} that calculates the <code>REGR_SLOPE()</code> function.
12651237
*/
1266-
public static <T> Collector<T, ?, Optional<Double>> regrSxyDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1238+
public static <T> Collector<T, ?, Optional<Double>> regrSlopeDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
12671239
return collectingAndThen(
1268-
collectors(covarianceDouble(functionX, functionY),
1269-
count()),
1270-
t -> !t.v1.isPresent() || t.v2==0 ? Optional.empty() : Optional.of(t.v1.get() * t.v2 )
1240+
collectors(
1241+
covarianceDouble(functionX, functionY),
1242+
varianceDouble(functionY)
1243+
),
1244+
t -> !t.v1.isPresent() || t.v2.orElse(0.0) == 0.0 ? Optional.empty() : Optional.of(t.v1.get() / t.v2.get())
12711245
);
12721246
}
12731247

12741248
/**
1275-
* Get a {@link Collector} that calculates the <code>regrSlopeDouble()</code> function.
1249+
* Get a {@link Collector} that calculates the <code>REGR_INTERCEPT()</code> function.
12761250
*/
1277-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrSlopeDouble() {
1278-
return regrSlopeDouble(t -> t.v1, t -> t.v2);
1251+
public static Collector<Tuple2<Double, Double>, ?, Optional<Double>> regrInterceptDouble() {
1252+
return filter(t -> t.v1 != null && t.v2 != null, regrInterceptDouble(t -> t.v1, t -> t.v2));
12791253
}
12801254

12811255
/**
1282-
* Get a {@link Collector} that calculates the <code>regrSlopeDouble()</code> function.
1256+
* Get a {@link Collector} that calculates the <code>REGR_INTERCEPT()</code> function.
12831257
*/
1284-
public static <T> Collector<T, ?, Optional<Double>> regrSlopeDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1258+
public static <T> Collector<T, ?, Optional<Double>> regrInterceptDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
12851259
return collectingAndThen(
1286-
collectors(covarianceDouble(functionX, functionY),
1287-
varianceDouble(functionY)),
1288-
t -> !t.v1.isPresent() || t.v2.orElse(0.0) == 0.0 ? Optional.empty() : Optional.of(t.v1.get() / t.v2.get() )
1260+
collectors(
1261+
regrAvgYDouble(functionX, functionY),
1262+
regrSlopeDouble(functionX, functionY),
1263+
regrAvgXDouble(functionX, functionY)
1264+
),
1265+
t -> !t.v1.isPresent() || !t.v2.isPresent() || !t.v3.isPresent() ? Optional.empty() : Optional.of(t.v1.get() - t.v2.get() * t.v3.get())
12891266
);
12901267
}
12911268

12921269
/**
1293-
* Get a {@link Collector} that calculates the <code>regrInterceptDouble()</code> function.
1270+
* Get a {@link Collector} that calculates the <code>regrR2Double()</code> function.
12941271
*/
1295-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrInterceptDouble() {
1296-
return regrInterceptDouble(t -> t.v1, t -> t.v2);
1272+
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrR2Double() {
1273+
return filter(t -> t.v1 != null && t.v2 != null, regrR2Double(t -> t.v1, t -> t.v2));
12971274
}
12981275

12991276
/**
1300-
* Get a {@link Collector} that calculates the <code>regrInterceptDouble()</code> function.
1277+
* Get a {@link Collector} that calculates the <code>regrR2Double()</code> function.
13011278
*/
1302-
public static <T> Collector<T, ?, Optional<Double>> regrInterceptDouble(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1279+
public static <T> Collector<T, ?, Optional<Double>> regrR2Double(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
13031280
return collectingAndThen(
1304-
collectors(regrAvgXDouble(functionX),
1305-
regrSlopeDouble(functionX, functionY),
1306-
regrAvgYDouble(functionY)),
1307-
t -> t.v1==0.0 || !t.v2.isPresent() || t.v3==0.0 ? Optional.empty() : Optional.of(t.v1 - (t.v2.get() * t.v3) )
1281+
collectors(
1282+
varianceDouble(functionY),
1283+
varianceDouble(functionX),
1284+
correlationDouble(functionX, functionY)
1285+
),
1286+
t -> !t.v1.isPresent() || !t.v2.isPresent()
1287+
? Optional.empty()
1288+
: t.v1.get() == 0.0
1289+
? Optional.empty()
1290+
: t.v2.get() == 0.0
1291+
? Optional.of(1.0)
1292+
: t.v3.map(v3 -> Math.pow(v3, 2))
13081293
);
13091294
}
13101295

1296+
private static <T> double avg0(List<T> list, ToDoubleFunction<? super T> function) {
1297+
double result = 0.0;
1298+
1299+
for (T t : list)
1300+
result += function.applyAsDouble(t);
1301+
1302+
return result / list.size();
1303+
}
1304+
13111305
/**
1312-
* Get a {@link Collector} that calculates the <code>regrR2Double()</code> function.
1313-
*/
1314-
public static Collector<Tuple2<Double,Double>, ?, Optional<Double>> regrR2Double() {
1315-
return regrR2Double(t -> t.v1, t -> t.v2);
1306+
* Get a {@link Collector} that calculates the common prefix of a set of strings.
1307+
*/
1308+
public static Collector<CharSequence, ?, String> commonPrefix() {
1309+
return collectingAndThen(
1310+
reducing((CharSequence s1, CharSequence s2) -> {
1311+
if (s1 == null || s2 == null)
1312+
return "";
1313+
1314+
int l = Math.min(s1.length(), s2.length());
1315+
int i;
1316+
for (i = 0; i < l && s1.charAt(i) == s2.charAt(i); i++);
1317+
1318+
return s1.subSequence(0, i);
1319+
}),
1320+
s -> s.map(Objects::toString).orElse("")
1321+
);
13161322
}
13171323

13181324
/**
1319-
* Get a {@link Collector} that calculates the <code>regrR2Double()</code> function.
1325+
* Get a {@link Collector} that calculates the common suffix of a set of strings.
13201326
*/
1321-
public static <T> Collector<T, ?, Optional<Double>> regrR2Double(ToDoubleFunction<? super T> functionX, ToDoubleFunction<? super T> functionY) {
1327+
public static Collector<CharSequence, ?, String> commonSuffix() {
13221328
return collectingAndThen(
1323-
collectors(varianceDouble(functionX),
1324-
varianceDouble(functionY),
1325-
correlationDouble(functionX,functionY)),
1326-
t -> t.v2.get()==0.0 ? Optional.of(0.0) : (t.v1.get()!=0.0 ? Optional.of(1.0) : Optional.of(Math.pow(t.v3.get(),2)))
1329+
reducing((CharSequence s1, CharSequence s2) -> {
1330+
if (s1 == null || s2 == null)
1331+
return "";
1332+
1333+
int l1 = s1.length();
1334+
int l2 = s2.length();
1335+
int l = Math.min(l1, l2);
1336+
int i;
1337+
for (i = 0; i < l && s1.charAt(l1 - i - 1) == s2.charAt(l2 - i - 1); i++);
1338+
1339+
return s1.subSequence(l1 - i, l1);
1340+
}),
1341+
s -> s.map(Objects::toString).orElse("")
13271342
);
13281343
}
1329-
13301344
}

0 commit comments

Comments
 (0)