Skip to content

Commit f075103

Browse files
jdunkerleyFrizi
authored andcommitted
Continuing work on Min/Max on Column (#13055)
- Fixed bug in `Table.geo_distance` which wasn't allowing expressions. - Moved min and max for `TextStorage` into `BinaryCoalescingOperation`. - Moved min and max for `NullStorage` into `BinaryCoalescingOperation`. - Added `zip` to `ColumnBooleanStorageIterator` for zipping two `ColumnBooleanStorage`. - Added `ProgressHandler`, which will send progress updates every 5,000 rows. Uses `AutoCloseable` to tidy up if interrupted. - Updated `StorageIterators` to send progress updates. - Added `BinaryCoalescingOperationBool` specialized version for `ColumnBooleanStorage`. - Moved min and max from `BoolStorage` into `BinaryCoalescingOperation`. - Added `BinaryCoalescingOperationNumeric` specialized version from Numeric min and max. - Moved min and max from numeric storage. - Added `NumericType` interface to mark the special types. Next step: optimise the `StorageIterators` for zipping Double/Long and add support for problem aggregation.
1 parent b687cb1 commit f075103

27 files changed

+898
-682
lines changed

distribution/lib/Standard/Geo/0.0.0-dev/src/Table_Extensions.enso

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from Standard.Base import all
22
import Standard.Base.Errors.Common.Missing_Argument
33

4+
import Standard.Table.Expression.Expression
45
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
56
import Standard.Table.Internal.Table_Ref.Table_Ref
67
import Standard.Table.Internal.Widget_Helpers
@@ -22,7 +23,7 @@ from Standard.Table import Column, Column_Ref, Table, Value_Type
2223
@long1 (Widget_Helpers.make_column_ref_by_name_selector_for_type display=..Always value_type=Value_Type.Float)
2324
@lat2 (Widget_Helpers.make_column_ref_by_name_selector_for_type display=..Always value_type=Value_Type.Float)
2425
@long2 (Widget_Helpers.make_column_ref_by_name_selector_for_type display=..Always value_type=Value_Type.Float)
25-
Table.geo_distance self lat1:(Float | Column | Column_Ref)=(_find_column_with_similar_name self "latitude" "lat" (Missing_Argument.throw "lat1")) long1:(Float | Column | Column_Ref)=(_find_column_with_similar_name self "longitude" "long" (Missing_Argument.throw "long1")) lat2:(Float | Column | Column_Ref)=(Missing_Argument.throw "lat2") long2:(Float | Column | Column_Ref)=(Missing_Argument.throw "long2") units:Distance_Units=..Meters as:Text="" -> Table =
26+
Table.geo_distance self lat1:(Float | Column | Column_Ref | Expression)=(_find_column_with_similar_name self "latitude" "lat" (Missing_Argument.throw "lat1")) long1:(Float | Column | Column_Ref | Expression)=(_find_column_with_similar_name self "longitude" "long" (Missing_Argument.throw "long1")) lat2:(Float | Column | Column_Ref | Expression)=(Missing_Argument.throw "lat2") long2:(Float | Column | Column_Ref | Expression)=(Missing_Argument.throw "long2") units:Distance_Units=..Meters as:Text="" -> Table =
2627
table_ref = (self:Table_Ref)
2728
resolved_lat1 = table_ref.resolve lat1
2829
resolved_long1 = table_ref.resolve long1

std-bits/table/src/main/java/org/enso/table/data/column/operation/BinaryCoalescingOperation.java

Lines changed: 60 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,24 @@
33
import java.time.LocalDate;
44
import java.time.LocalTime;
55
import java.time.ZonedDateTime;
6-
import java.time.temporal.Temporal;
76
import java.util.function.BiFunction;
7+
import org.enso.base.Text_Utils;
8+
import org.enso.table.data.column.builder.Builder;
9+
import org.enso.table.data.column.builder.BuilderForType;
810
import org.enso.table.data.column.operation.map.MapOperationProblemAggregator;
911
import org.enso.table.data.column.storage.ColumnStorage;
1012
import org.enso.table.data.column.storage.Storage;
1113
import org.enso.table.data.column.storage.type.DateTimeType;
1214
import org.enso.table.data.column.storage.type.DateType;
1315
import org.enso.table.data.column.storage.type.NullType;
16+
import org.enso.table.data.column.storage.type.NumericType;
1417
import org.enso.table.data.column.storage.type.StorageType;
18+
import org.enso.table.data.column.storage.type.TextType;
1519
import org.enso.table.data.column.storage.type.TimeOfDayType;
1620
import org.enso.table.data.table.Column;
1721
import org.enso.table.error.UnexpectedTypeException;
1822
import org.enso.table.problems.BlackholeProblemAggregator;
23+
import org.graalvm.polyglot.Value;
1924

2025
public class BinaryCoalescingOperation<T> implements BinaryOperation<T> {
2126
private static Column applyOperation(
@@ -25,9 +30,8 @@ private static Column applyOperation(
2530
StorageType<?> fallbackType,
2631
String name,
2732
MapOperationProblemAggregator problemBuilder,
28-
BinaryOperation<? extends Temporal> operation,
29-
Storage<?> leftStorage,
30-
String fallbackName) {
33+
BinaryOperation<?> operation,
34+
Storage<?> leftStorage) {
3135
if (right instanceof Column rightColumn) {
3236
if (operation != null) {
3337
var rightStorage = rightColumn.getStorage();
@@ -37,14 +41,14 @@ private static Column applyOperation(
3741
}
3842
return operation.apply(left, rightColumn, name);
3943
} else {
44+
// Null on left-hand side so just return the right-hand Column
45+
if (leftStorage.getType() instanceof NullType) {
46+
return new Column(name, rightColumn.getStorage());
47+
}
48+
4049
var result =
41-
leftStorage.vectorizedOrFallbackZip(
42-
fallbackName,
43-
problemBuilder,
44-
fallback,
45-
rightColumn.getStorage(),
46-
false,
47-
fallbackType);
50+
leftStorage.zip(
51+
fallback, rightColumn.getStorage(), false, leftStorage.getType(), problemBuilder);
4852
return new Column(name, result);
4953
}
5054
}
@@ -55,9 +59,15 @@ private static Column applyOperation(
5559
}
5660
return operation.apply(left, right, name);
5761
} else {
58-
var result =
59-
leftStorage.vectorizedOrFallbackBinaryMap(
60-
fallbackName, problemBuilder, fallback, right, false, leftStorage.getType());
62+
// Null on left-hand side so just return the right-hand Column
63+
if (leftStorage.getType() instanceof NullType) {
64+
int checkedSize = Builder.checkSize(leftStorage.getSize());
65+
var constantStorage =
66+
Storage.fromRepeatedItem(Value.asValue(right), checkedSize, problemBuilder);
67+
return new Column(name, constantStorage);
68+
}
69+
70+
var result = leftStorage.binaryMap(fallback, right, false, fallbackType, problemBuilder);
6171
return new Column(name, result);
6272
}
6373
}
@@ -68,6 +78,16 @@ private static Column applyOperation(
6878
new BinaryCoalescingOperation<>(DateTimeType.INSTANCE, (a, b) -> a.isBefore(b) ? a : b);
6979
private static final BinaryOperation<LocalTime> TIME_MIN =
7080
new BinaryCoalescingOperation<>(TimeOfDayType.INSTANCE, (a, b) -> a.isBefore(b) ? a : b);
81+
private static final BinaryOperation<String> TEXT_MIN =
82+
new BinaryCoalescingOperation<>(
83+
TextType.VARIABLE_LENGTH, (a, b) -> Text_Utils.compare_normalized(a, b) < 0 ? a : b) {
84+
@Override
85+
protected BuilderForType<String> makeStorageBuilder(
86+
long size, StorageType<?> leftType, StorageType<?> rightType) {
87+
return TextType.maxType(leftType, rightType)
88+
.makeBuilder(size, BlackholeProblemAggregator.INSTANCE);
89+
}
90+
};
7191

7292
public static Column min(
7393
Column left,
@@ -82,10 +102,13 @@ public static Column min(
82102
case DateType d -> DATE_MIN;
83103
case DateTimeType dt -> DATE_TIME_MIN;
84104
case TimeOfDayType t -> TIME_MIN;
105+
case TextType t -> TEXT_MIN;
106+
case NumericType n -> BinaryCoalescingOperationNumeric.create(
107+
leftStorage.getType(), right, BinaryCoalescingOperationNumeric.MIN_OPERATION);
85108
default -> null;
86109
};
87110
return applyOperation(
88-
left, right, fallback, fallbackType, name, problemBuilder, operation, leftStorage, "min");
111+
left, right, fallback, fallbackType, name, problemBuilder, operation, leftStorage);
89112
}
90113

91114
private static final BinaryOperation<LocalDate> DATE_MAX =
@@ -94,6 +117,16 @@ public static Column min(
94117
new BinaryCoalescingOperation<>(DateTimeType.INSTANCE, (a, b) -> a.isAfter(b) ? a : b);
95118
private static final BinaryOperation<LocalTime> TIME_MAX =
96119
new BinaryCoalescingOperation<>(TimeOfDayType.INSTANCE, (a, b) -> a.isAfter(b) ? a : b);
120+
private static final BinaryOperation<String> TEXT_MAX =
121+
new BinaryCoalescingOperation<>(
122+
TextType.VARIABLE_LENGTH, (a, b) -> Text_Utils.compare_normalized(a, b) > 0 ? a : b) {
123+
@Override
124+
protected BuilderForType<String> makeStorageBuilder(
125+
long size, StorageType<?> leftType, StorageType<?> rightType) {
126+
return TextType.maxType(leftType, rightType)
127+
.makeBuilder(size, BlackholeProblemAggregator.INSTANCE);
128+
}
129+
};
97130

98131
public static Column max(
99132
Column left,
@@ -108,16 +141,19 @@ public static Column max(
108141
case DateType d -> DATE_MAX;
109142
case DateTimeType dt -> DATE_TIME_MAX;
110143
case TimeOfDayType t -> TIME_MAX;
144+
case TextType t -> TEXT_MAX;
145+
case NumericType n -> BinaryCoalescingOperationNumeric.create(
146+
leftStorage.getType(), right, BinaryCoalescingOperationNumeric.MAX_OPERATION);
111147
default -> null;
112148
};
113149
return applyOperation(
114-
left, right, fallback, fallbackType, name, problemBuilder, operation, leftStorage, "max");
150+
left, right, fallback, fallbackType, name, problemBuilder, operation, leftStorage);
115151
}
116152

117153
private final StorageType<T> validType;
118154
private final BiFunction<T, T, T> zipOperation;
119155

120-
private BinaryCoalescingOperation(StorageType<T> validType, BiFunction<T, T, T> zipOperation) {
156+
protected BinaryCoalescingOperation(StorageType<T> validType, BiFunction<T, T, T> zipOperation) {
121157
this.validType = validType;
122158
this.zipOperation = zipOperation;
123159
}
@@ -148,7 +184,7 @@ public ColumnStorage<T> applyMap(ColumnStorage<?> left, Object rightValue) {
148184
return StorageIterators.mapOverStorage(
149185
validType.asTypedStorage(left),
150186
false,
151-
validType.makeBuilder(left.getSize(), BlackholeProblemAggregator.INSTANCE),
187+
makeStorageBuilder(left.getSize(), left.getType(), null),
152188
(idx, value) -> zipOperation.apply(value, rightValueTyped));
153189
}
154190

@@ -161,8 +197,13 @@ public ColumnStorage<T> applyZip(ColumnStorage<?> left, ColumnStorage<?> right)
161197
return StorageIterators.zipOverStorages(
162198
validType.asTypedStorage(left),
163199
validType.asTypedStorage(right),
164-
size -> validType.makeBuilder(size, BlackholeProblemAggregator.INSTANCE),
200+
size -> makeStorageBuilder(size, left.getType(), right.getType()),
165201
false,
166202
(index, l, r) -> l == null ? r : (r == null ? l : zipOperation.apply(l, r)));
167203
}
204+
205+
protected BuilderForType<T> makeStorageBuilder(
206+
long size, StorageType<?> leftType, StorageType<?> rightType) {
207+
return validType.makeBuilder(size, BlackholeProblemAggregator.INSTANCE);
208+
}
168209
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
package org.enso.table.data.column.operation;
2+
3+
import org.enso.table.data.column.builder.Builder;
4+
import org.enso.table.data.column.storage.BoolStorage;
5+
import org.enso.table.data.column.storage.ColumnStorage;
6+
import org.enso.table.data.column.storage.type.BooleanType;
7+
import org.enso.table.data.column.storage.type.NullType;
8+
import org.enso.table.error.UnexpectedTypeException;
9+
10+
public abstract class BinaryCoalescingOperationBool extends BinaryCoalescingOperation<Boolean> {
11+
public static final BinaryCoalescingOperationBool MIN_INSTANCE =
12+
new BinaryCoalescingOperationBool() {
13+
@Override
14+
protected boolean applySingle(boolean left, boolean right) {
15+
return left && right;
16+
}
17+
18+
@Override
19+
protected ColumnStorage<Boolean> applyMapBoolStorage(BoolStorage left, boolean rightValue) {
20+
return rightValue
21+
? left.fillMissingBoolean(true)
22+
: BoolStorage.makeConstant(Builder.checkSize(left.getSize()), false);
23+
}
24+
};
25+
26+
public static final BinaryCoalescingOperationBool MAX_INSTANCE =
27+
new BinaryCoalescingOperationBool() {
28+
@Override
29+
protected boolean applySingle(boolean left, boolean right) {
30+
return left || right;
31+
}
32+
33+
@Override
34+
protected ColumnStorage<Boolean> applyMapBoolStorage(BoolStorage left, boolean rightValue) {
35+
return rightValue
36+
? BoolStorage.makeConstant(Builder.checkSize(left.getSize()), true)
37+
: left.fillMissingBoolean(false);
38+
}
39+
};
40+
41+
private BinaryCoalescingOperationBool() {
42+
super(BooleanType.INSTANCE, null);
43+
}
44+
45+
@Override
46+
public ColumnStorage<Boolean> applyMap(ColumnStorage<?> left, Object rightValue) {
47+
var typedStorage = BooleanType.INSTANCE.asTypedStorage(left);
48+
49+
if (rightValue == null) {
50+
return typedStorage;
51+
}
52+
53+
if (rightValue instanceof Boolean boolValue) {
54+
// Special optimised mode
55+
if (typedStorage instanceof BoolStorage boolStorage) {
56+
return applyMapBoolStorage(boolStorage, boolValue);
57+
} else {
58+
return StorageIterators.buildOverBooleanStorage(
59+
typedStorage,
60+
Builder.getForBoolean(typedStorage.getSize()),
61+
(builder, index, value, isNothing) ->
62+
builder.appendBoolean(applySingle(value, boolValue)));
63+
}
64+
}
65+
66+
throw new UnexpectedTypeException("a Boolean", rightValue.toString());
67+
}
68+
69+
@Override
70+
public ColumnStorage<Boolean> applyZip(ColumnStorage<?> left, ColumnStorage<?> right) {
71+
var typedStorage = BooleanType.INSTANCE.asTypedStorage(left);
72+
73+
if (NullType.INSTANCE.isOfType(right.getType())) {
74+
return typedStorage;
75+
}
76+
77+
var typedRightStorage = BooleanType.INSTANCE.asTypedStorage(right);
78+
return StorageIterators.zipOverBooleanStorages(
79+
BooleanType.INSTANCE.asTypedStorage(left),
80+
BooleanType.INSTANCE.asTypedStorage(right),
81+
Builder::getForBoolean,
82+
false,
83+
(index, l, lIsNothing, r, rIsNothing) -> {
84+
if (lIsNothing) {
85+
return rIsNothing ? null : r;
86+
} else if (rIsNothing) {
87+
return l;
88+
} else {
89+
return applySingle(l, r);
90+
}
91+
});
92+
}
93+
94+
protected abstract boolean applySingle(boolean left, boolean right);
95+
96+
protected abstract ColumnStorage<Boolean> applyMapBoolStorage(
97+
BoolStorage left, boolean rightValue);
98+
}

0 commit comments

Comments
 (0)