Skip to content

Commit 247f3a0

Browse files
authored
Clean up PrimitiveColumnContainer (dotnet#6656)
* Try to clean up PrimitiveColumnContainer * Fix helper * Remove duplicate * More clean up * Fix slice * Update name to GetOrCreateMutable * Use null instead of default for nullable values. * Clean up Apply methods.
1 parent 3986fcf commit 247f3a0

7 files changed

+96
-94
lines changed

src/Microsoft.Data.Analysis/DataFrameBuffer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ namespace Microsoft.Data.Analysis
1313
/// </summary>
1414
/// <typeparam name="T"></typeparam>
1515
internal class DataFrameBuffer<T> : ReadOnlyDataFrameBuffer<T>
16-
where T : struct
16+
where T : unmanaged
1717
{
1818
private Memory<byte> _memory;
1919

src/Microsoft.Data.Analysis/PrimitiveColumnContainer.BinaryOperations.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
namespace Microsoft.Data.Analysis
99
{
1010
internal partial class PrimitiveColumnContainer<T>
11-
where T : struct
11+
where T : unmanaged
1212
{
1313
public PrimitiveColumnContainer<T> Add(PrimitiveColumnContainer<T> right)
1414
{

src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs

Lines changed: 60 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ namespace Microsoft.Data.Analysis
1717
/// </summary>
1818
/// <typeparam name="T"></typeparam>
1919
internal partial class PrimitiveColumnContainer<T> : IEnumerable<T?>
20-
where T : struct
20+
where T : unmanaged
2121
{
2222
public IList<ReadOnlyDataFrameBuffer<T>> Buffers = new List<ReadOnlyDataFrameBuffer<T>>();
2323

@@ -90,6 +90,7 @@ public PrimitiveColumnContainer(ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte
9090
dataBuffer = new ReadOnlyDataFrameBuffer<T>(buffer, length);
9191
}
9292
Buffers.Add(dataBuffer);
93+
9394
int bitMapBufferLength = (length + 7) / 8;
9495
ReadOnlyDataFrameBuffer<byte> nullDataFrameBuffer;
9596
if (nullBitMap.IsEmpty)
@@ -127,31 +128,7 @@ public PrimitiveColumnContainer(ReadOnlyMemory<byte> buffer, ReadOnlyMemory<byte
127128

128129
public PrimitiveColumnContainer(long length = 0)
129130
{
130-
while (length > 0)
131-
{
132-
if (Buffers.Count == 0)
133-
{
134-
Buffers.Add(new DataFrameBuffer<T>());
135-
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
136-
}
137-
DataFrameBuffer<T> lastBuffer = (DataFrameBuffer<T>)Buffers[Buffers.Count - 1];
138-
if (lastBuffer.Length == ReadOnlyDataFrameBuffer<T>.MaxCapacity)
139-
{
140-
lastBuffer = new DataFrameBuffer<T>();
141-
Buffers.Add(lastBuffer);
142-
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
143-
}
144-
int allocatable = (int)Math.Min(length, ReadOnlyDataFrameBuffer<T>.MaxCapacity);
145-
lastBuffer.EnsureCapacity(allocatable);
146-
DataFrameBuffer<byte> lastNullBitMapBuffer = (DataFrameBuffer<byte>)(NullBitMapBuffers[NullBitMapBuffers.Count - 1]);
147-
int nullBufferAllocatable = (allocatable + 7) / 8;
148-
lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable);
149-
lastBuffer.Length = allocatable;
150-
lastNullBitMapBuffer.Length = nullBufferAllocatable;
151-
length -= allocatable;
152-
Length += lastBuffer.Length;
153-
NullCount += lastBuffer.Length;
154-
}
131+
AppendMany(null, length);
155132
}
156133

157134
public void Resize(long length)
@@ -168,16 +145,14 @@ public void Append(T? value)
168145
Buffers.Add(new DataFrameBuffer<T>());
169146
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
170147
}
171-
int bufferIndex = Buffers.Count - 1;
172-
ReadOnlyDataFrameBuffer<T> lastBuffer = Buffers[bufferIndex];
173-
if (lastBuffer.Length == ReadOnlyDataFrameBuffer<T>.MaxCapacity)
148+
149+
if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer<T>.MaxCapacity)
174150
{
175-
lastBuffer = new DataFrameBuffer<T>();
176-
Buffers.Add(lastBuffer);
151+
Buffers.Add(new DataFrameBuffer<T>());
177152
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
178153
}
179-
DataFrameBuffer<T> mutableLastBuffer = DataFrameBuffer<T>.GetMutableBuffer(lastBuffer);
180-
Buffers[bufferIndex] = mutableLastBuffer;
154+
155+
DataFrameBuffer<T> mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1);
181156
mutableLastBuffer.Append(value ?? default);
182157
SetValidityBit(Length, value.HasValue);
183158
Length++;
@@ -190,90 +165,91 @@ public void AppendMany(T? value, long count)
190165
NullCount += count;
191166
}
192167

193-
while (count > 0)
168+
var remaining = count;
169+
while (remaining > 0)
194170
{
195171
if (Buffers.Count == 0)
196172
{
197173
Buffers.Add(new DataFrameBuffer<T>());
198174
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
199175
}
200-
int bufferIndex = Buffers.Count - 1;
201-
ReadOnlyDataFrameBuffer<T> lastBuffer = Buffers[bufferIndex];
202-
if (lastBuffer.Length == ReadOnlyDataFrameBuffer<T>.MaxCapacity)
176+
177+
if (Buffers[Buffers.Count - 1].Length == ReadOnlyDataFrameBuffer<T>.MaxCapacity)
203178
{
204-
lastBuffer = new DataFrameBuffer<T>();
205-
Buffers.Add(lastBuffer);
179+
Buffers.Add(new DataFrameBuffer<T>());
206180
NullBitMapBuffers.Add(new DataFrameBuffer<byte>());
207181
}
208-
DataFrameBuffer<T> mutableLastBuffer = DataFrameBuffer<T>.GetMutableBuffer(lastBuffer);
209-
Buffers[bufferIndex] = mutableLastBuffer;
210-
int allocatable = (int)Math.Min(count, ReadOnlyDataFrameBuffer<T>.MaxCapacity);
182+
183+
DataFrameBuffer<T> mutableLastBuffer = Buffers.GetOrCreateMutable(Buffers.Count - 1);
184+
int allocatable = (int)Math.Min(remaining, ReadOnlyDataFrameBuffer<T>.MaxCapacity);
211185
mutableLastBuffer.EnsureCapacity(allocatable);
212-
mutableLastBuffer.RawSpan.Slice(lastBuffer.Length, allocatable).Fill(value ?? default);
186+
187+
DataFrameBuffer<byte> lastNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(NullBitMapBuffers.Count - 1);
188+
int nullBufferAllocatable = (allocatable + 7) / 8;
189+
lastNullBitMapBuffer.EnsureCapacity(nullBufferAllocatable);
190+
191+
213192
mutableLastBuffer.Length += allocatable;
193+
lastNullBitMapBuffer.Length += nullBufferAllocatable;
214194
Length += allocatable;
215195

216-
int nullBitMapBufferIndex = NullBitMapBuffers.Count - 1;
217-
ReadOnlyDataFrameBuffer<byte> lastNullBitMapBuffer = NullBitMapBuffers[nullBitMapBufferIndex];
218-
DataFrameBuffer<byte> mutableLastNullBitMapBuffer = DataFrameBuffer<byte>.GetMutableBuffer(lastNullBitMapBuffer);
219-
NullBitMapBuffers[nullBitMapBufferIndex] = mutableLastNullBitMapBuffer;
220-
int nullBitMapAllocatable = (int)(((uint)allocatable) / 8) + 1;
221-
mutableLastNullBitMapBuffer.EnsureCapacity(nullBitMapAllocatable);
222-
_modifyNullCountWhileIndexing = false;
223-
for (long i = Length - count; i < Length; i++)
196+
if (value.HasValue)
224197
{
225-
SetValidityBit(i, value.HasValue ? true : false);
198+
mutableLastBuffer.RawSpan.Slice(mutableLastBuffer.Length - allocatable, allocatable).Fill(value ?? default);
199+
200+
_modifyNullCountWhileIndexing = false;
201+
for (long i = Length - allocatable; i < Length; i++)
202+
{
203+
SetValidityBit(i, value.HasValue);
204+
}
205+
_modifyNullCountWhileIndexing = true;
226206
}
227-
_modifyNullCountWhileIndexing = true;
228-
count -= allocatable;
207+
208+
209+
remaining -= allocatable;
229210
}
230211
}
231212

232213
public void ApplyElementwise(Func<T?, long, T?> func)
233214
{
215+
var bufferMaxCapacity = ReadOnlyDataFrameBuffer<T>.MaxCapacity;
234216
for (int b = 0; b < Buffers.Count; b++)
235217
{
236-
ReadOnlyDataFrameBuffer<T> buffer = Buffers[b];
237-
long prevLength = checked(Buffers[0].Length * b);
238-
DataFrameBuffer<T> mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(buffer);
239-
Buffers[b] = mutableBuffer;
240-
Span<T> span = mutableBuffer.Span;
241-
DataFrameBuffer<byte> mutableNullBitMapBuffer = DataFrameBuffer<byte>.GetMutableBuffer(NullBitMapBuffers[b]);
242-
NullBitMapBuffers[b] = mutableNullBitMapBuffer;
243-
Span<byte> nullBitMapSpan = mutableNullBitMapBuffer.Span;
244-
for (int i = 0; i < span.Length; i++)
218+
long prevLength = checked(bufferMaxCapacity * b);
219+
220+
Span<T> mutableBuffer = Buffers.GetOrCreateMutable(b).Span;
221+
Span<byte> mutableNullBitMapBuffer = NullBitMapBuffers.GetOrCreateMutable(b).Span;
222+
223+
for (int i = 0; i < mutableBuffer.Length; i++)
245224
{
246225
long curIndex = i + prevLength;
247-
bool isValid = IsValid(nullBitMapSpan, i);
248-
T? value = func(isValid ? span[i] : default(T?), curIndex);
249-
span[i] = value.GetValueOrDefault();
250-
SetValidityBit(nullBitMapSpan, i, value != null);
226+
bool isValid = IsValid(mutableNullBitMapBuffer, i);
227+
T? value = func(isValid ? mutableBuffer[i] : null, curIndex);
228+
mutableBuffer[i] = value.GetValueOrDefault();
229+
SetValidityBit(mutableNullBitMapBuffer, i, value != null);
251230
}
252231
}
253232
}
254233

255234
public void Apply<TResult>(Func<T?, TResult?> func, PrimitiveColumnContainer<TResult> resultContainer)
256235
where TResult : unmanaged
257236
{
237+
var bufferMaxCapacity = ReadOnlyDataFrameBuffer<T>.MaxCapacity;
258238
for (int b = 0; b < Buffers.Count; b++)
259239
{
260-
ReadOnlyDataFrameBuffer<T> sourceBuffer = Buffers[b];
261-
ReadOnlySpan<byte> sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan;
240+
long prevLength = checked(bufferMaxCapacity * b);
241+
var sourceBuffer = Buffers[b];
242+
var sourceNullBitMap = NullBitMapBuffers[b].ReadOnlySpan;
262243

263-
ReadOnlyDataFrameBuffer<TResult> resultBuffer = resultContainer.Buffers[b];
264-
DataFrameBuffer<TResult> resultMutableBuffer = DataFrameBuffer<TResult>.GetMutableBuffer(resultBuffer);
265-
resultContainer.Buffers[b] = resultMutableBuffer;
266-
Span<TResult> resultSpan = resultMutableBuffer.Span;
267-
DataFrameBuffer<byte> resultMutableNullBitMapBuffer = DataFrameBuffer<byte>.GetMutableBuffer(resultContainer.NullBitMapBuffers[b]);
268-
resultContainer.NullBitMapBuffers[b] = resultMutableNullBitMapBuffer;
269-
Span<byte> resultNullBitMapSpan = resultMutableNullBitMapBuffer.Span;
244+
Span<TResult> mutableResultBuffer = resultContainer.Buffers.GetOrCreateMutable(b).Span;
245+
Span<byte> mutableResultNullBitMapBuffers = resultContainer.NullBitMapBuffers.GetOrCreateMutable(b).Span;
270246

271-
for (int i = 0; i < Buffers[b].Length; i++)
247+
for (int i = 0; i < sourceBuffer.Length; i++)
272248
{
273249
bool isValid = IsValid(sourceNullBitMap, i);
274-
TResult? value = func(isValid ? sourceBuffer[i] : default(T?));
275-
resultSpan[i] = value.GetValueOrDefault();
276-
resultContainer.SetValidityBit(resultNullBitMapSpan, i, value != null);
250+
TResult? value = func(isValid ? sourceBuffer[i] : null);
251+
mutableResultBuffer[i] = value.GetValueOrDefault();
252+
resultContainer.SetValidityBit(mutableResultNullBitMapBuffers, i, value != null);
277253
}
278254
}
279255
}
@@ -440,11 +416,10 @@ public T? this[long rowIndex]
440416
{
441417
int arrayIndex = GetArrayContainingRowIndex(rowIndex);
442418
rowIndex = rowIndex - arrayIndex * ReadOnlyDataFrameBuffer<T>.MaxCapacity;
443-
ReadOnlyDataFrameBuffer<T> buffer = Buffers[arrayIndex];
444-
DataFrameBuffer<T> mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(buffer);
445-
Buffers[arrayIndex] = mutableBuffer;
446-
DataFrameBuffer<byte> mutableNullBuffer = DataFrameBuffer<byte>.GetMutableBuffer(NullBitMapBuffers[arrayIndex]);
447-
NullBitMapBuffers[arrayIndex] = mutableNullBuffer;
419+
420+
Buffers.GetOrCreateMutable(arrayIndex);
421+
NullBitMapBuffers.GetOrCreateMutable(arrayIndex);
422+
448423
if (value.HasValue)
449424
{
450425
Buffers[arrayIndex][(int)rowIndex] = value.Value;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Collections.Generic;
6+
7+
namespace Microsoft.Data.Analysis
8+
{
9+
internal static class PrimitiveColumnContainerHelpers
10+
{
11+
internal static DataFrameBuffer<T> GetOrCreateMutable<T>(this IList<ReadOnlyDataFrameBuffer<T>> bufferList, int index)
12+
where T : unmanaged
13+
{
14+
ReadOnlyDataFrameBuffer<T> sourceBuffer = bufferList[index];
15+
DataFrameBuffer<T> mutableBuffer = sourceBuffer as DataFrameBuffer<T>;
16+
17+
if (mutableBuffer == null)
18+
{
19+
mutableBuffer = DataFrameBuffer<T>.GetMutableBuffer(sourceBuffer);
20+
bufferList[index] = mutableBuffer;
21+
}
22+
23+
24+
return mutableBuffer;
25+
}
26+
}
27+
}

src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnArithmetic.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
namespace Microsoft.Data.Analysis
1212
{
1313
internal interface IPrimitiveDataFrameColumnArithmetic<T>
14-
where T : struct
14+
where T : unmanaged
1515
{
1616
void Add(PrimitiveColumnContainer<T> left, PrimitiveColumnContainer<T> right);
1717
void Add(PrimitiveColumnContainer<T> column, T scalar);
@@ -54,15 +54,15 @@ internal interface IPrimitiveDataFrameColumnArithmetic<T>
5454
}
5555

5656
internal static class PrimitiveDataFrameColumnArithmetic<T>
57-
where T : struct
57+
where T : unmanaged
5858
{
5959
public static IPrimitiveDataFrameColumnArithmetic<T> Instance { get; } = PrimitiveDataFrameColumnArithmetic.GetArithmetic<T>();
6060
}
6161

6262
internal static class PrimitiveDataFrameColumnArithmetic
6363
{
6464
public static IPrimitiveDataFrameColumnArithmetic<T> GetArithmetic<T>()
65-
where T : struct
65+
where T : unmanaged
6666
{
6767
if (typeof(T) == typeof(bool))
6868
{

src/Microsoft.Data.Analysis/PrimitiveDataFrameColumnComputations.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
namespace Microsoft.Data.Analysis
1313
{
1414
internal interface IPrimitiveColumnComputation<T>
15-
where T : struct
15+
where T : unmanaged
1616
{
1717
void Abs(PrimitiveColumnContainer<T> column);
1818
void All(PrimitiveColumnContainer<T> column, out bool ret);
@@ -37,15 +37,15 @@ internal interface IPrimitiveColumnComputation<T>
3737
}
3838

3939
internal static class PrimitiveColumnComputation<T>
40-
where T : struct
40+
where T : unmanaged
4141
{
4242
public static IPrimitiveColumnComputation<T> Instance { get; } = PrimitiveColumnComputation.GetComputation<T>();
4343
}
4444

4545
internal static class PrimitiveColumnComputation
4646
{
4747
public static IPrimitiveColumnComputation<T> GetComputation<T>()
48-
where T : struct
48+
where T : unmanaged
4949
{
5050
if (typeof(T) == typeof(bool))
5151
{

src/Microsoft.Data.Analysis/ReadOnlyDataFrameBuffer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ namespace Microsoft.Data.Analysis
1515
/// </summary>
1616
/// <typeparam name="T"></typeparam>
1717
internal class ReadOnlyDataFrameBuffer<T>
18-
where T : struct
18+
where T : unmanaged
1919
{
2020
private readonly ReadOnlyMemory<byte> _readOnlyBuffer;
2121

0 commit comments

Comments
 (0)