Skip to content

Commit b28710a

Browse files
Reset DataFrame.RowCount to zero, when DataFrame is empty (dotnet#6698)
* Reset RowCount to zero, when DataFrame is empty * Fix typo. --------- Co-authored-by: Michael Sharp <51342856+michaelgsharp@users.noreply.github.com>
1 parent 31e4b64 commit b28710a

File tree

2 files changed

+99
-22
lines changed

2 files changed

+99
-22
lines changed

src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,21 +58,25 @@ public void Insert<T>(int columnIndex, IEnumerable<T> column, string columnName)
5858
protected override void InsertItem(int columnIndex, DataFrameColumn column)
5959
{
6060
column = column ?? throw new ArgumentNullException(nameof(column));
61-
if (RowCount > 0 && column.Length != RowCount)
61+
62+
if (Count == 0)
6263
{
63-
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
64+
//change RowCount on inserting first row to dataframe
65+
RowCount = column.Length;
6466
}
65-
66-
if (Count >= 1 && RowCount == 0 && column.Length != RowCount)
67+
else if (column.Length != RowCount)
6768
{
69+
//check all columns in the dataframe have the same length (amount of rows)
6870
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
6971
}
7072

7173
if (_columnNameToIndexDictionary.ContainsKey(column.Name))
7274
{
7375
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
7476
}
77+
7578
RowCount = column.Length;
79+
7680
_columnNameToIndexDictionary[column.Name] = columnIndex;
7781
for (int i = columnIndex + 1; i < Count; i++)
7882
{
@@ -108,6 +112,11 @@ protected override void RemoveItem(int columnIndex)
108112
_columnNameToIndexDictionary[this[i].Name]--;
109113
}
110114
base.RemoveItem(columnIndex);
115+
116+
//Reset RowCount if the last column was removed and dataframe is empty
117+
if (Count == 0)
118+
RowCount = 0;
119+
111120
ColumnsChanged?.Invoke();
112121
}
113122

@@ -138,6 +147,9 @@ protected override void ClearItems()
138147
base.ClearItems();
139148
ColumnsChanged?.Invoke();
140149
_columnNameToIndexDictionary.Clear();
150+
151+
//reset RowCount as DataFrame is now empty
152+
RowCount = 0;
141153
}
142154

143155
/// <summary>

test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,29 +271,44 @@ public void TestIndexer()
271271
[Fact]
272272
public void ColumnAndTableCreationTest()
273273
{
274-
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
275-
DataFrameColumn floatColumn = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
274+
const int rowCount = 10;
275+
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
276+
DataFrameColumn floatColumn = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
276277
DataFrame dataFrame = new DataFrame();
277278
dataFrame.Columns.Insert(0, intColumn);
278279
dataFrame.Columns.Insert(1, floatColumn);
279-
Assert.Equal(10, dataFrame.Rows.Count);
280+
Assert.Equal(rowCount, dataFrame.Rows.Count);
280281
Assert.Equal(2, dataFrame.Columns.Count);
281-
Assert.Equal(10, dataFrame.Columns[0].Length);
282+
Assert.Equal(2, dataFrame.Columns.LongCount());
283+
Assert.Equal(rowCount, dataFrame.Columns[0].Length);
282284
Assert.Equal("IntColumn", dataFrame.Columns[0].Name);
283-
Assert.Equal(10, dataFrame.Columns[1].Length);
285+
Assert.Equal(rowCount, dataFrame.Columns[1].Length);
284286
Assert.Equal("FloatColumn", dataFrame.Columns[1].Name);
285287

286-
DataFrameColumn bigColumn = new SingleDataFrameColumn("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
287-
DataFrameColumn repeatedName = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
288+
//add column with bigger length than other columns in the dataframe
289+
DataFrameColumn bigColumn = new SingleDataFrameColumn("BigColumn", Enumerable.Range(0, rowCount + 1).Select(x => (float)x));
288290
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, bigColumn));
291+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(bigColumn));
292+
293+
//add column smaller than other columns in the dataframe
294+
DataFrameColumn smallColumn = new SingleDataFrameColumn("SmallColumn", Enumerable.Range(0, rowCount - 1).Select(x => (float)x));
295+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, smallColumn));
296+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(smallColumn));
297+
298+
//add column with duplicate name
299+
DataFrameColumn repeatedName = new SingleDataFrameColumn("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
289300
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, repeatedName));
290-
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(10, repeatedName));
301+
302+
//Insert column at index out of range
303+
DataFrameColumn extraColumn = new SingleDataFrameColumn("OtherFloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
304+
var columnCount = dataFrame.Columns.Count;
305+
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(columnCount + 1, repeatedName));
291306

292307
Assert.Equal(2, dataFrame.Columns.Count);
293-
DataFrameColumn intColumnCopy = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
308+
DataFrameColumn intColumnCopy = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
294309
Assert.Throws<ArgumentException>(() => dataFrame.Columns[1] = intColumnCopy);
295310

296-
DataFrameColumn differentIntColumn = new Int32DataFrameColumn("IntColumn1", Enumerable.Range(0, 10).Select(x => x));
311+
DataFrameColumn differentIntColumn = new Int32DataFrameColumn("IntColumn1", Enumerable.Range(0, rowCount).Select(x => x));
297312
dataFrame.Columns[1] = differentIntColumn;
298313
Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Columns[1]));
299314

@@ -309,18 +324,68 @@ public void ColumnAndTableCreationTest()
309324
}
310325

311326
[Fact]
312-
public void InsertAndRemoveColumnTests()
327+
public void InsertAndRemoveColumnToTheEndOfNotEmptyDataFrameTests()
313328
{
314329
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
315-
DataFrameColumn intColumn = new Int32DataFrameColumn("IntColumn", Enumerable.Range(0, 10).Select(x => x));
316-
DataFrameColumn charColumn = dataFrame.Columns["Char"];
317-
int insertedIndex = dataFrame.Columns.Count;
318-
dataFrame.Columns.Insert(dataFrame.Columns.Count, intColumn);
330+
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
331+
332+
int columnCount = dataFrame.Columns.Count;
333+
DataFrameColumn originalLastColumn = dataFrame.Columns[columnCount - 1];
334+
335+
//Insert new column at the end
336+
dataFrame.Columns.Insert(columnCount, intColumn);
337+
Assert.Equal(columnCount + 1, dataFrame.Columns.Count);
338+
339+
//Remove first
319340
dataFrame.Columns.RemoveAt(0);
320-
DataFrameColumn intColumn_1 = dataFrame.Columns["IntColumn"];
321-
DataFrameColumn charColumn_1 = dataFrame.Columns["Char"];
341+
Assert.Equal(columnCount, dataFrame.Columns.Count);
342+
343+
//Check that int column was inserted
344+
DataFrameColumn intColumn_1 = dataFrame.Columns["NewIntColumn"];
322345
Assert.True(ReferenceEquals(intColumn, intColumn_1));
323-
Assert.True(ReferenceEquals(charColumn, charColumn_1));
346+
347+
//Check that last column of the original dataframe was not removed
348+
DataFrameColumn lastColumn_1 = dataFrame.Columns[originalLastColumn.Name];
349+
Assert.True(ReferenceEquals(originalLastColumn, lastColumn_1));
350+
351+
//Check that new column is the last one
352+
int newIndex = dataFrame.Columns.IndexOf("NewIntColumn");
353+
Assert.Equal(columnCount - 1, newIndex);
354+
355+
//Check that original last column now has correct index
356+
int newIndexForOriginalLastColumn = dataFrame.Columns.IndexOf(originalLastColumn.Name);
357+
Assert.Equal(columnCount - 2, newIndexForOriginalLastColumn);
358+
}
359+
360+
[Fact]
361+
public void AddAndRemoveColumnToTheEmptyDataFrameTests()
362+
{
363+
DataFrame dataFrame = new DataFrame();
364+
DataFrameColumn intColumn = new Int32DataFrameColumn("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
365+
366+
dataFrame.Columns.Add(intColumn);
367+
Assert.Single(dataFrame.Columns);
368+
Assert.Equal(10, dataFrame.Rows.Count);
369+
370+
dataFrame.Columns.Remove(intColumn);
371+
Assert.Empty(dataFrame.Columns);
372+
Assert.Equal(0, dataFrame.Rows.Count);
373+
}
374+
375+
[Fact]
376+
public void ClearColumnsTests()
377+
{
378+
//Arrange
379+
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
380+
381+
//Act
382+
dataFrame.Columns.Clear();
383+
384+
//Assert
385+
Assert.Empty(dataFrame.Columns);
386+
387+
Assert.Equal(0, dataFrame.Rows.Count);
388+
Assert.Equal(0, dataFrame.Columns.LongCount());
324389
}
325390

326391
[Fact]

0 commit comments

Comments
 (0)