Skip to content

Commit 0e00904

Browse files
committed
Clean up and reduce helpers
1 parent bdcb2c5 commit 0e00904

File tree

1 file changed

+157
-165
lines changed

1 file changed

+157
-165
lines changed

src/Microsoft.Data.Analysis/NumberMathComputation.cs

Lines changed: 157 additions & 165 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,120 @@ public void Abs(PrimitiveColumnContainer<T> column)
2121
Apply(column, T.Abs);
2222
}
2323

24+
public void All(PrimitiveColumnContainer<T> column, out bool ret)
25+
{
26+
throw new NotSupportedException();
27+
}
28+
29+
public void Any(PrimitiveColumnContainer<T> column, out bool ret)
30+
{
31+
throw new NotSupportedException();
32+
}
33+
34+
public void CumulativeMax(PrimitiveColumnContainer<T> column)
35+
{
36+
CumulativeApply(column, T.Max);
37+
}
38+
39+
public void CumulativeMax(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
40+
{
41+
CumulativeApply(column, T.Max, rows);
42+
}
43+
44+
public void CumulativeMin(PrimitiveColumnContainer<T> column)
45+
{
46+
CumulativeApply(column, T.Min);
47+
}
48+
49+
public void CumulativeMin(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
50+
{
51+
CumulativeApply(column, T.Min, rows);
52+
}
53+
54+
private T Multiply(T left, T right) => left * right;
55+
56+
public void CumulativeProduct(PrimitiveColumnContainer<T> column)
57+
{
58+
CumulativeApply(column, Multiply, T.One);
59+
}
60+
61+
public void CumulativeProduct(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
62+
{
63+
CumulativeApply(column, Multiply, rows);
64+
}
65+
66+
private T Add(T left, T right) => left + right;
67+
public void CumulativeSum(PrimitiveColumnContainer<T> column)
68+
{
69+
CumulativeApply(column, Add, T.Zero);
70+
}
71+
72+
public void CumulativeSum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
73+
{
74+
CumulativeApply(column, Add, rows);
75+
}
76+
77+
public void Max(PrimitiveColumnContainer<T> column, out T ret)
78+
{
79+
ret = CalculateReduction(column, T.Max, column[0].Value);
80+
}
81+
82+
public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
83+
{
84+
ret = CalculateReduction(column, T.Max, rows);
85+
}
86+
87+
public void Min(PrimitiveColumnContainer<T> column, out T ret)
88+
{
89+
ret = CalculateReduction(column, T.Min, column[0].Value);
90+
}
91+
92+
public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
93+
{
94+
95+
ret = CalculateReduction(column, T.Min, rows);
96+
}
97+
98+
public void Product(PrimitiveColumnContainer<T> column, out T ret)
99+
{
100+
ret = CalculateReduction(column, Multiply, T.One);
101+
}
102+
103+
public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
104+
{
105+
ret = CalculateReduction(column, Multiply, rows);
106+
}
107+
108+
public void Sum(PrimitiveColumnContainer<T> column, out T ret)
109+
{
110+
ret = CalculateReduction(column, Add, T.Zero);
111+
}
112+
113+
public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
114+
{
115+
ret = CalculateReduction(column, Add, rows);
116+
}
117+
118+
public virtual void Round(PrimitiveColumnContainer<T> column)
119+
{
120+
// do nothing
121+
}
122+
123+
public override bool Equals(object obj)
124+
{
125+
return base.Equals(obj);
126+
}
127+
128+
public override int GetHashCode()
129+
{
130+
return base.GetHashCode();
131+
}
132+
133+
public override string ToString()
134+
{
135+
return base.ToString();
136+
}
137+
24138
protected void Apply(PrimitiveColumnContainer<T> column, Func<T, T> func)
25139
{
26140
for (int b = 0; b < column.Buffers.Count; b++)
@@ -71,85 +185,59 @@ protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T>
71185
protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T> func, IEnumerable<long> rows)
72186
{
73187
T ret = T.Zero;
74-
var mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(column.Buffers[0]);
75-
var span = mutableBuffer.Span;
76-
long minRange = 0;
77-
long maxRange = ReadOnlyDataFrameBuffer<T>.MaxCapacity;
78-
long maxCapacity = maxRange;
79-
IEnumerator<long> enumerator = rows.GetEnumerator();
80-
81-
InitializeValues(column, ref ret, ref mutableBuffer, ref span, ref minRange, ref maxRange, maxCapacity, enumerator);
82-
CumulativeApply(column, func, ref ret, ref mutableBuffer, ref span, ref minRange, ref maxRange, maxCapacity, enumerator);
83-
}
84-
85-
protected void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T> func, IEnumerable<long> rows, T startingValue)
86-
{
87-
T ret = startingValue;
88-
var mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(column.Buffers[0]);
188+
var mutableBuffer = column.Buffers.GetOrCreateMutable(0);
189+
var nullBitMap = column.NullBitMapBuffers.GetOrCreateMutable(0);
89190
var span = mutableBuffer.Span;
90191
long minRange = 0;
91192
long maxRange = ReadOnlyDataFrameBuffer<T>.MaxCapacity;
92193
long maxCapacity = maxRange;
93194
IEnumerator<long> enumerator = rows.GetEnumerator();
94195

95-
CumulativeApply(column, func, ref ret, ref mutableBuffer, ref span, ref minRange, ref maxRange, maxCapacity, enumerator);
96-
}
97-
98-
99-
protected static void CumulativeApply(PrimitiveColumnContainer<T> column, Func<T, T, T> func, ref T ret, ref DataFrameBuffer<T> mutableBuffer, ref Span<T> span, ref long minRange, ref long maxRange, long maxCapacity, IEnumerator<long> enumerator)
100-
{
101-
while (enumerator.MoveNext())
196+
bool isValid = false;
197+
while (!isValid && enumerator.MoveNext())
102198
{
103199
long row = enumerator.Current;
104200
if (row < minRange || row >= maxRange)
105201
{
106202
int bufferIndex = (int)(row / maxCapacity);
107-
mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(column.Buffers[bufferIndex]);
203+
mutableBuffer = column.Buffers.GetOrCreateMutable(bufferIndex);
204+
nullBitMap = column.NullBitMapBuffers.GetOrCreateMutable(bufferIndex);
108205
span = mutableBuffer.Span;
109206
minRange = checked(bufferIndex * maxCapacity);
110207
maxRange = checked((bufferIndex + 1) * maxCapacity);
111208
}
209+
112210
row -= minRange;
113-
ret = checked((T)(func(span[(int)row], ret)));
114-
span[(int)row] = ret;
211+
if (column.IsValid(nullBitMap.Span, (int)row))
212+
{
213+
isValid = true;
214+
ret = span[(int)row];
215+
}
115216
}
116-
}
117217

118-
protected static void InitializeValues(PrimitiveColumnContainer<T> column, ref T ret, ref DataFrameBuffer<T> mutableBuffer, ref Span<T> span, ref long minRange, ref long maxRange, long maxCapacity, IEnumerator<long> enumerator)
119-
{
120-
if (enumerator.MoveNext())
218+
while (enumerator.MoveNext())
121219
{
122220
long row = enumerator.Current;
123221
if (row < minRange || row >= maxRange)
124222
{
125223
int bufferIndex = (int)(row / maxCapacity);
126-
mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(column.Buffers[bufferIndex]);
224+
mutableBuffer = column.Buffers.GetOrCreateMutable(bufferIndex);
225+
nullBitMap = column.NullBitMapBuffers.GetOrCreateMutable(bufferIndex);
127226
span = mutableBuffer.Span;
128227
minRange = checked(bufferIndex * maxCapacity);
129228
maxRange = checked((bufferIndex + 1) * maxCapacity);
130229
}
131-
row -= minRange;
132-
ret = span[(int)row];
133-
}
134-
}
135230

136-
protected static void InitializeValues(PrimitiveColumnContainer<T> column, Func<T, T, T> func, ref T ret, ref ReadOnlySpan<T> readOnlySpan, ref long minRange, ref long maxRange, long maxCapacity, IEnumerator<long> enumerator)
137-
{
138-
if (enumerator.MoveNext())
139-
{
140-
long row = enumerator.Current;
141-
if (row < minRange || row >= maxRange)
231+
row -= minRange;
232+
if (column.IsValid(nullBitMapBuffer, i))
142233
{
143-
int bufferIndex = (int)(row / maxCapacity);
144-
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan;
145-
minRange = checked(bufferIndex * maxCapacity);
146-
maxRange = checked((bufferIndex + 1) * maxCapacity);
234+
ret = func(ret, readonlySpan[i]);
235+
span[(int)row] = ret;
147236
}
148-
row -= minRange;
149-
ret = readOnlySpan[(int)row];
150237
}
151238
}
152239

240+
153241
protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func, T startValue)
154242
{
155243
var ret = startValue;
@@ -173,146 +261,50 @@ protected T CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T>
173261
{
174262
var ret = T.Zero;
175263
var readOnlySpan = column.Buffers[0].ReadOnlySpan;
264+
var readOnlyNullBitMap = column.NullBitMapBuffers[0].ReadOnlySpan;
176265
long minRange = 0;
177266
long maxRange = ReadOnlyDataFrameBuffer<T>.MaxCapacity;
178267
long maxCapacity = maxRange;
179268
IEnumerator<long> enumerator = rows.GetEnumerator();
180269

181-
InitializeValues(column, func, ref ret, ref readOnlySpan, ref minRange, ref maxRange, maxCapacity, enumerator);
182-
CalculateReduction(column, func, ref ret, ref readOnlySpan, ref minRange, ref maxRange, maxCapacity, enumerator);
270+
bool isValid = false;
271+
while (!isValid && enumerator.MoveNext())
272+
{
273+
long row = enumerator.Current;
274+
if (row < minRange || row >= maxRange)
275+
{
276+
int bufferIndex = (int)(row / maxCapacity);
277+
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan;
278+
readOnlyNullBitMap = column.NullBitMapBuffers[bufferIndex].ReadOnlySpan;
279+
minRange = checked(bufferIndex * maxCapacity);
280+
maxRange = checked((bufferIndex + 1) * maxCapacity);
281+
}
282+
row -= minRange;
183283

184-
return ret;
185-
}
284+
if (column.IsValid(readOnlyNullBitMap, (int)row))
285+
{
286+
isValid = true;
287+
ret = readOnlySpan[(int)row];
288+
}
289+
}
186290

187-
protected static void CalculateReduction(PrimitiveColumnContainer<T> column, Func<T, T, T> func, ref T ret, ref ReadOnlySpan<T> readOnlySpan, ref long minRange, ref long maxRange, long maxCapacity, IEnumerator<long> enumerator)
188-
{
189291
while (enumerator.MoveNext())
190292
{
191293
long row = enumerator.Current;
192294
if (row < minRange || row >= maxRange)
193295
{
194296
int bufferIndex = (int)(row / maxCapacity);
195297
readOnlySpan = column.Buffers[bufferIndex].ReadOnlySpan;
298+
readOnlyNullBitMap = column.NullBitMapBuffers[bufferIndex].ReadOnlySpan;
196299
minRange = checked(bufferIndex * maxCapacity);
197300
maxRange = checked((bufferIndex + 1) * maxCapacity);
198301
}
199302
row -= minRange;
303+
200304
ret = checked(func(readOnlySpan[(int)row], ret));
201305
}
202-
}
203-
204-
public void All(PrimitiveColumnContainer<T> column, out bool ret)
205-
{
206-
throw new NotSupportedException();
207-
}
208-
209-
public void Any(PrimitiveColumnContainer<T> column, out bool ret)
210-
{
211-
throw new NotSupportedException();
212-
}
213-
214-
public void CumulativeMax(PrimitiveColumnContainer<T> column)
215-
{
216-
CumulativeApply(column, T.Max);
217-
}
218-
219-
public void CumulativeMax(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
220-
{
221-
CumulativeApply(column, T.Max, rows);
222-
}
223306

224-
public void CumulativeMin(PrimitiveColumnContainer<T> column)
225-
{
226-
CumulativeApply(column, T.Min);
227-
}
228-
229-
public void CumulativeMin(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
230-
{
231-
CumulativeApply(column, T.Min, rows);
232-
}
233-
234-
private T Multiply(T left, T right) => left * right;
235-
236-
public void CumulativeProduct(PrimitiveColumnContainer<T> column)
237-
{
238-
CumulativeApply(column, Multiply, T.One);
239-
}
240-
241-
public void CumulativeProduct(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
242-
{
243-
CumulativeApply(column, Multiply, rows);
244-
}
245-
246-
private T Add(T left, T right) => left + right;
247-
public void CumulativeSum(PrimitiveColumnContainer<T> column)
248-
{
249-
CumulativeApply(column, Add, T.Zero);
250-
}
251-
252-
public void CumulativeSum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows)
253-
{
254-
CumulativeApply(column, Add, rows, T.Zero);
255-
}
256-
257-
public void Max(PrimitiveColumnContainer<T> column, out T ret)
258-
{
259-
ret = CalculateReduction(column, T.Max, column[0].Value);
260-
}
261-
262-
public void Max(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
263-
{
264-
ret = CalculateReduction(column, T.Max, rows);
265-
}
266-
267-
public void Min(PrimitiveColumnContainer<T> column, out T ret)
268-
{
269-
ret = CalculateReduction(column, T.Min, column[0].Value);
270-
}
271-
272-
public void Min(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
273-
{
274-
275-
ret = CalculateReduction(column, T.Min, rows);
276-
}
277-
278-
public void Product(PrimitiveColumnContainer<T> column, out T ret)
279-
{
280-
ret = CalculateReduction(column, Multiply, T.One);
281-
}
282-
283-
public void Product(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
284-
{
285-
ret = CalculateReduction(column, Multiply, rows);
286-
}
287-
288-
public void Sum(PrimitiveColumnContainer<T> column, out T ret)
289-
{
290-
ret = CalculateReduction(column, Add, T.Zero);
291-
}
292-
293-
public void Sum(PrimitiveColumnContainer<T> column, IEnumerable<long> rows, out T ret)
294-
{
295-
ret = CalculateReduction(column, Add, rows);
296-
}
297-
298-
public virtual void Round(PrimitiveColumnContainer<T> column)
299-
{
300-
// do nothing
301-
}
302-
303-
public override bool Equals(object obj)
304-
{
305-
return base.Equals(obj);
306-
}
307-
308-
public override int GetHashCode()
309-
{
310-
return base.GetHashCode();
311-
}
312-
313-
public override string ToString()
314-
{
315-
return base.ToString();
307+
return ret;
316308
}
317309
}
318310
}

0 commit comments

Comments
 (0)