diff --git a/eng/Versions.props b/eng/Versions.props
index 902cc3dab3..d13d42aadc 100644
--- a/eng/Versions.props
+++ b/eng/Versions.props
@@ -30,7 +30,7 @@
6.0.1
4.7.1
- 2.0.0
+ 11.0.0
3.19.6
2.3.1
3.3.0
diff --git a/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs b/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs
index 2938413459..270cfff63b 100644
--- a/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs
+++ b/src/Microsoft.Data.Analysis/DataFrame.Arrow.cs
@@ -101,10 +101,18 @@ private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray
AppendDataFrameColumnFromArrowArray(fieldsEnumerator.Current, structArrayEnumerator.Current, ret, field.Name + "_");
}
break;
- case ArrowTypeId.Decimal:
+ case ArrowTypeId.Date64:
+ Date64Array arrowDate64Array = (Date64Array)arrowArray;
+ dataFrameColumn = new PrimitiveDataFrameColumn(fieldName, arrowDate64Array.Data.Length);
+ for (int i = 0; i < arrowDate64Array.Data.Length; i++)
+ {
+ dataFrameColumn[i] = arrowDate64Array.GetDateTime(i);
+ }
+ break;
+ case ArrowTypeId.Decimal128:
+ case ArrowTypeId.Decimal256:
case ArrowTypeId.Binary:
case ArrowTypeId.Date32:
- case ArrowTypeId.Date64:
case ArrowTypeId.Dictionary:
case ArrowTypeId.FixedSizedBinary:
case ArrowTypeId.HalfFloat:
@@ -114,6 +122,7 @@ private static void AppendDataFrameColumnFromArrowArray(Field field, IArrowArray
case ArrowTypeId.Null:
case ArrowTypeId.Time32:
case ArrowTypeId.Time64:
+ case ArrowTypeId.Timestamp:
default:
throw new NotImplementedException($"{fieldType.Name}");
}
@@ -145,7 +154,7 @@ public static DataFrame FromArrowRecordBatch(RecordBatch recordBatch)
}
///
- /// Returns an without copying data
+ /// Returns an mostly without copying data
///
public IEnumerable ToArrowRecordBatches()
{
diff --git a/src/Microsoft.Data.Analysis/DataFrame.Join.cs b/src/Microsoft.Data.Analysis/DataFrame.Join.cs
index 2109573c38..8e4029ffd3 100644
--- a/src/Microsoft.Data.Analysis/DataFrame.Join.cs
+++ b/src/Microsoft.Data.Analysis/DataFrame.Join.cs
@@ -30,7 +30,7 @@ private void SetSuffixForDuplicatedColumnNames(DataFrame dataFrame, DataFrameCol
{
// Pre-existing column. Change name
DataFrameColumn existingColumn = dataFrame.Columns[index];
- dataFrame._columnCollection.SetColumnName(existingColumn, existingColumn.Name + leftSuffix);
+ existingColumn.SetName(existingColumn.Name + leftSuffix);
column.SetName(column.Name + rightSuffix);
index = dataFrame._columnCollection.IndexOf(column.Name);
}
diff --git a/src/Microsoft.Data.Analysis/DataFrame.cs b/src/Microsoft.Data.Analysis/DataFrame.cs
index 20d42bb9f7..25a8cbbfbc 100644
--- a/src/Microsoft.Data.Analysis/DataFrame.cs
+++ b/src/Microsoft.Data.Analysis/DataFrame.cs
@@ -301,7 +301,7 @@ public DataFrame AddPrefix(string prefix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
- df._columnCollection.SetColumnName(column, prefix + column.Name);
+ column.SetName(prefix + column.Name);
df.OnColumnsChanged();
}
return df;
@@ -316,7 +316,7 @@ public DataFrame AddSuffix(string suffix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
- df._columnCollection.SetColumnName(column, column.Name + suffix);
+ column.SetName(column.Name + suffix);
df.OnColumnsChanged();
}
return df;
diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.cs
index 3a2f97f817..1b55b92d8f 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumn.cs
@@ -84,6 +84,26 @@ protected set
}
}
+ // List of ColumnCollections that owns the column
+ // Current API allows column to be added into multiple dataframes, that's why the list is needed
+ private readonly List _ownerColumnCollections = new();
+
+ internal void AddOwner(DataFrameColumnCollection columCollection)
+ {
+ if (!_ownerColumnCollections.Contains(columCollection))
+ {
+ _ownerColumnCollections.Add(columCollection);
+ }
+ }
+
+ internal void RemoveOwner(DataFrameColumnCollection columCollection)
+ {
+ if (_ownerColumnCollections.Contains(columCollection))
+ {
+ _ownerColumnCollections.Remove(columCollection);
+ }
+ }
+
///
/// The number of values in this column.
///
@@ -95,24 +115,30 @@ public abstract long NullCount
private string _name;
///
- /// The name of this column.
+ /// The column name.
///
public string Name => _name;
///
- /// Updates the name of this column.
+ /// Updates the column name.
///
/// The new name.
- /// If passed in, update the column name in
- public void SetName(string newName, DataFrame dataFrame = null)
+ public void SetName(string newName)
{
- if (!(dataFrame is null))
- {
- dataFrame.Columns.SetColumnName(this, newName);
- }
+ foreach (var owner in _ownerColumnCollections)
+ owner.UpdateColumnNameMetadata(this, newName);
+
_name = newName;
}
+ ///
+ /// Updates the name of this column.
+ ///
+ /// The new name.
+ /// Ignored (for backward compatibility)
+ [Obsolete]
+ public void SetName(string newName, DataFrame dataFrame) => SetName(newName);
+
///
/// The type of data this column holds.
///
diff --git a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
index 1fae1168c7..13c363660c 100644
--- a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
+++ b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
@@ -38,11 +38,23 @@ internal IReadOnlyList GetColumnNames()
return ret;
}
+ public void RenameColumn(string currentName, string newName)
+ {
+ var column = this[currentName];
+ column.SetName(newName);
+ }
+
+ [Obsolete]
public void SetColumnName(DataFrameColumn column, string newName)
+ {
+ column.SetName(newName);
+ }
+
+ //Updates column's metadata (is used as a callback from Column class)
+ internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName)
{
string currentName = column.Name;
int currentIndex = _columnNameToIndexDictionary[currentName];
- column.SetName(newName);
_columnNameToIndexDictionary.Remove(currentName);
_columnNameToIndexDictionary.Add(newName, currentIndex);
ColumnsChanged?.Invoke();
@@ -66,7 +78,7 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column)
}
else if (column.Length != RowCount)
{
- //check all columns in the dataframe have the same length (amount of rows)
+ //check all columns in the dataframe have the same lenght (amount of rows)
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
}
@@ -75,7 +87,7 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column)
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}
- RowCount = column.Length;
+ column.AddOwner(this);
_columnNameToIndexDictionary[column.Name] = columnIndex;
for (int i = columnIndex + 1; i < Count; i++)
@@ -100,7 +112,10 @@ protected override void SetItem(int columnIndex, DataFrameColumn column)
}
_columnNameToIndexDictionary.Remove(this[columnIndex].Name);
_columnNameToIndexDictionary[column.Name] = columnIndex;
+
+ this[columnIndex].RemoveOwner(this);
base.SetItem(columnIndex, column);
+
ColumnsChanged?.Invoke();
}
@@ -111,6 +126,8 @@ protected override void RemoveItem(int columnIndex)
{
_columnNameToIndexDictionary[this[i].Name]--;
}
+
+ this[columnIndex].RemoveOwner(this);
base.RemoveItem(columnIndex);
//Reset RowCount if the last column was removed and dataframe is empty
@@ -204,10 +221,10 @@ public PrimitiveDataFrameColumn GetPrimitiveColumn(string name)
}
///
- /// Gets the with the specified .
+ /// Gets the with the specified .
///
/// The name of the column
- /// .
+ /// .
/// A column named cannot be found, or if the column's type doesn't match.
public PrimitiveDataFrameColumn GetDateTimeColumn(string name)
{
diff --git a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
index 92996b136b..d65255d5be 100644
--- a/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
+++ b/src/Microsoft.Data.Analysis/PrimitiveColumnContainer.cs
@@ -374,18 +374,6 @@ internal int MaxRecordBatchLength(long startIndex)
return Buffers[arrayIndex].Length - (int)startIndex;
}
- internal ReadOnlyMemory GetValueBuffer(long startIndex)
- {
- int arrayIndex = GetArrayContainingRowIndex(startIndex);
- return Buffers[arrayIndex].ReadOnlyBuffer;
- }
-
- internal ReadOnlyMemory GetNullBuffer(long startIndex)
- {
- int arrayIndex = GetArrayContainingRowIndex(startIndex);
- return NullBitMapBuffers[arrayIndex].ReadOnlyBuffer;
- }
-
public IReadOnlyList this[long startIndex, int length]
{
get
diff --git a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
index 152a6247dc..0fe7820fe2 100644
--- a/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
+++ b/src/Microsoft.Data.Analysis/PrimitiveDataFrameColumn.cs
@@ -7,6 +7,7 @@
using System.Collections.Generic;
using System.Diagnostics;
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
using Apache.Arrow;
using Apache.Arrow.Types;
using Microsoft.ML;
@@ -104,6 +105,8 @@ private IArrowType GetArrowType()
return UInt64Type.Default;
else if (typeof(T) == typeof(ushort))
return UInt16Type.Default;
+ else if (typeof(T) == typeof(DateTime))
+ return Date64Type.Default;
else
throw new NotImplementedException(nameof(T));
}
@@ -127,36 +130,64 @@ protected internal override Apache.Arrow.Array ToArrowArray(long startIndex, int
{
int arrayIndex = numberOfRows == 0 ? 0 : _columnContainer.GetArrayContainingRowIndex(startIndex);
int offset = (int)(startIndex - arrayIndex * ReadOnlyDataFrameBuffer.MaxCapacity);
+
if (numberOfRows != 0 && numberOfRows > _columnContainer.Buffers[arrayIndex].Length - offset)
{
throw new ArgumentException(Strings.SpansMultipleBuffers, nameof(numberOfRows));
}
- ArrowBuffer valueBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.GetValueBuffer(startIndex));
- ArrowBuffer nullBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.GetNullBuffer(startIndex));
+
int nullCount = GetNullCount(startIndex, numberOfRows);
+
+ //DateTime requires convertion
+ if (this.DataType == typeof(DateTime))
+ {
+ if (numberOfRows == 0)
+ return new Date64Array(ArrowBuffer.Empty, ArrowBuffer.Empty, numberOfRows, nullCount, offset);
+
+ ReadOnlyDataFrameBuffer valueBuffer = (numberOfRows == 0) ? null : _columnContainer.Buffers[arrayIndex];
+ ReadOnlyDataFrameBuffer nullBuffer = (numberOfRows == 0) ? null : _columnContainer.NullBitMapBuffers[arrayIndex];
+
+ ReadOnlySpan valueSpan = MemoryMarshal.Cast(valueBuffer.ReadOnlySpan);
+ Date64Array.Builder builder = new Date64Array.Builder().Reserve(valueBuffer.Length);
+
+ for (int i = 0; i < valueBuffer.Length; i++)
+ {
+ if (BitUtility.GetBit(nullBuffer.ReadOnlySpan, i))
+ builder.Append(valueSpan[i]);
+ else
+ builder.AppendNull();
+ }
+
+ return builder.Build();
+ }
+
+ //No convertion
+ ArrowBuffer arrowValueBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.Buffers[arrayIndex].ReadOnlyBuffer);
+ ArrowBuffer arrowNullBuffer = numberOfRows == 0 ? ArrowBuffer.Empty : new ArrowBuffer(_columnContainer.NullBitMapBuffers[arrayIndex].ReadOnlyBuffer);
+
Type type = this.DataType;
if (type == typeof(bool))
- return new BooleanArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new BooleanArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(double))
- return new DoubleArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new DoubleArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(float))
- return new FloatArray(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new FloatArray(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(int))
- return new Int32Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new Int32Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(long))
- return new Int64Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new Int64Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(sbyte))
- return new Int8Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new Int8Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(short))
- return new Int16Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new Int16Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(uint))
- return new UInt32Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new UInt32Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(ulong))
- return new UInt64Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new UInt64Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(ushort))
- return new UInt16Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new UInt16Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else if (type == typeof(byte))
- return new UInt8Array(valueBuffer, nullBuffer, numberOfRows, nullCount, offset);
+ return new UInt8Array(arrowValueBuffer, arrowNullBuffer, numberOfRows, nullCount, offset);
else
throw new NotImplementedException(type.ToString());
}
diff --git a/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs b/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs
index dacf43a8db..185ab835bb 100644
--- a/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs
+++ b/test/Microsoft.Data.Analysis.Tests/ArrowIntegrationTests.cs
@@ -48,6 +48,7 @@ public void TestArrowIntegration()
.Append("ULongColumn", false, new UInt64Array.Builder().AppendRange(Enumerable.Repeat((ulong)1, 10)).Build())
.Append("ByteColumn", false, new Int8Array.Builder().AppendRange(Enumerable.Repeat((sbyte)1, 10)).Build())
.Append("UByteColumn", false, new UInt8Array.Builder().AppendRange(Enumerable.Repeat((byte)1, 10)).Build())
+ .Append("Date64Column", false, new Date64Array.Builder().AppendRange(Enumerable.Repeat(DateTime.Now, 10)).Build())
.Build();
DataFrame df = DataFrame.FromArrowRecordBatch(originalBatch);
diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
index 086f5101b2..ff7856e984 100644
--- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
+++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
@@ -1,4 +1,4 @@
-// Licensed to the .NET Foundation under one or more agreements.
+// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
@@ -388,6 +388,44 @@ public void ClearColumnsTests()
Assert.Equal(0, dataFrame.Columns.LongCount());
}
+ [Fact]
+ public void RenameColumnWithSetNameTests()
+ {
+ StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
+ PrimitiveDataFrameColumn temp = new PrimitiveDataFrameColumn("Temperature", new int[] { 12, 13 });
+
+ DataFrame dataframe = new DataFrame(city, temp);
+
+ // Change the name of the column:
+ dataframe["City"].SetName("Town");
+ var renamedColumn = dataframe["Town"];
+
+ Assert.Throws(() => dataframe["City"]);
+
+ Assert.NotNull(renamedColumn);
+ Assert.Equal("Town", renamedColumn.Name);
+ Assert.True(ReferenceEquals(city, renamedColumn));
+ }
+
+ [Fact]
+ public void RenameColumnWithRenameColumnTests()
+ {
+ StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
+ PrimitiveDataFrameColumn temp = new PrimitiveDataFrameColumn("Temperature", new int[] { 12, 13 });
+
+ DataFrame dataframe = new DataFrame(city, temp);
+
+ // Change the name of the column:
+ dataframe.Columns.RenameColumn("City", "Town");
+ var renamedColumn = dataframe["Town"];
+
+ Assert.Throws(() => dataframe["City"]);
+
+ Assert.NotNull(renamedColumn);
+ Assert.Equal("Town", renamedColumn.Name);
+ Assert.True(ReferenceEquals(city, renamedColumn));
+ }
+
[Fact]
public void TestBinaryOperations()
{
diff --git a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj
index cb9461bb50..c1dd6a4f0c 100644
--- a/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj
+++ b/test/Microsoft.Data.Analysis.Tests/Microsoft.Data.Analysis.Tests.csproj
@@ -9,7 +9,6 @@
-