Skip to content

Commit 92e2d72

Browse files
committed
Reset RowCount to zero, when DataFrame is empty
# Conflicts: # src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs # test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
1 parent 8cd3d39 commit 92e2d72

File tree

2 files changed

+96
-23
lines changed

2 files changed

+96
-23
lines changed

src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,13 +74,15 @@ public void Insert<T>(int columnIndex, IEnumerable<T> column, string columnName)
7474
protected override void InsertItem(int columnIndex, DataFrameColumn column)
7575
{
7676
column = column ?? throw new ArgumentNullException(nameof(column));
77-
if (RowCount > 0 && column.Length != RowCount)
77+
78+
if (Count == 0)
7879
{
79-
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
80+
//change RowCount on inserting first row to dataframe
81+
RowCount = column.Length;
8082
}
81-
82-
if (Count >= 1 && RowCount == 0 && column.Length != RowCount)
83+
else if (column.Length != RowCount)
8384
{
85+
//check all columns in the dataframe have the same lenght (amount of rows)
8486
throw new ArgumentException(Strings.MismatchedColumnLengths, nameof(column));
8587
}
8688

@@ -91,7 +93,6 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column)
9193

9294
column.AddOwner(this);
9395

94-
RowCount = column.Length;
9596
_columnNames.Insert(columnIndex, column.Name);
9697
_columnNameToIndexDictionary[column.Name] = columnIndex;
9798
for (int i = columnIndex + 1; i < Count; i++)
@@ -137,6 +138,10 @@ protected override void RemoveItem(int columnIndex)
137138
this[columnIndex].RemoveOwner(this);
138139
base.RemoveItem(columnIndex);
139140

141+
//Reset RowCount if the last column was removed and dataframe is empty
142+
if (Count == 0)
143+
RowCount = 0;
144+
140145
ColumnsChanged?.Invoke();
141146
}
142147

@@ -168,6 +173,9 @@ protected override void ClearItems()
168173
ColumnsChanged?.Invoke();
169174
_columnNames.Clear();
170175
_columnNameToIndexDictionary.Clear();
176+
177+
//reset RowCount as DataFrame is now empty
178+
RowCount = 0;
171179
}
172180

173181
/// <summary>

test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs

Lines changed: 83 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -271,29 +271,44 @@ public void TestIndexer()
271271
[Fact]
272272
public void ColumnAndTableCreationTest()
273273
{
274-
DataFrameColumn intColumn = new PrimitiveDataFrameColumn<int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
275-
DataFrameColumn floatColumn = new PrimitiveDataFrameColumn<float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
274+
const int rowCount = 10;
275+
DataFrameColumn intColumn = new PrimitiveDataFrameColumn<int>("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
276+
DataFrameColumn floatColumn = new PrimitiveDataFrameColumn<float>("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
276277
DataFrame dataFrame = new DataFrame();
277278
dataFrame.Columns.Insert(0, intColumn);
278279
dataFrame.Columns.Insert(1, floatColumn);
279-
Assert.Equal(10, dataFrame.Rows.Count);
280+
Assert.Equal(rowCount, dataFrame.Rows.Count);
280281
Assert.Equal(2, dataFrame.Columns.Count);
281-
Assert.Equal(10, dataFrame.Columns[0].Length);
282+
Assert.Equal(2, dataFrame.Columns.LongCount());
283+
Assert.Equal(rowCount, dataFrame.Columns[0].Length);
282284
Assert.Equal("IntColumn", dataFrame.Columns[0].Name);
283-
Assert.Equal(10, dataFrame.Columns[1].Length);
285+
Assert.Equal(rowCount, dataFrame.Columns[1].Length);
284286
Assert.Equal("FloatColumn", dataFrame.Columns[1].Name);
285287

286-
DataFrameColumn bigColumn = new PrimitiveDataFrameColumn<float>("BigColumn", Enumerable.Range(0, 11).Select(x => (float)x));
287-
DataFrameColumn repeatedName = new PrimitiveDataFrameColumn<float>("FloatColumn", Enumerable.Range(0, 10).Select(x => (float)x));
288+
//add column with bigger length than other columns in the dataframe
289+
DataFrameColumn bigColumn = new PrimitiveDataFrameColumn<float>("BigColumn", Enumerable.Range(0, rowCount + 1).Select(x => (float)x));
288290
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, bigColumn));
291+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(bigColumn));
292+
293+
//add column smaller than other columns in the dataframe
294+
DataFrameColumn smallColumn = new PrimitiveDataFrameColumn<float>("SmallColumn", Enumerable.Range(0, rowCount - 1).Select(x => (float)x));
295+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, smallColumn));
296+
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Add(smallColumn));
297+
298+
//add column with duplicate name
299+
DataFrameColumn repeatedName = new PrimitiveDataFrameColumn<float>("FloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
289300
Assert.Throws<ArgumentException>(() => dataFrame.Columns.Insert(2, repeatedName));
290-
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(10, repeatedName));
301+
302+
//Insert column at index out of range
303+
DataFrameColumn extraColumn = new PrimitiveDataFrameColumn<float>("OtherFloatColumn", Enumerable.Range(0, rowCount).Select(x => (float)x));
304+
var columnCount = dataFrame.Columns.Count;
305+
Assert.Throws<ArgumentOutOfRangeException>(() => dataFrame.Columns.Insert(columnCount + 1, repeatedName));
291306

292307
Assert.Equal(2, dataFrame.Columns.Count);
293-
DataFrameColumn intColumnCopy = new PrimitiveDataFrameColumn<int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
308+
DataFrameColumn intColumnCopy = new PrimitiveDataFrameColumn<int>("IntColumn", Enumerable.Range(0, rowCount).Select(x => x));
294309
Assert.Throws<ArgumentException>(() => dataFrame.Columns[1] = intColumnCopy);
295310

296-
DataFrameColumn differentIntColumn = new PrimitiveDataFrameColumn<int>("IntColumn1", Enumerable.Range(0, 10).Select(x => x));
311+
DataFrameColumn differentIntColumn = new PrimitiveDataFrameColumn<int>("IntColumn1", Enumerable.Range(0, rowCount).Select(x => x));
297312
dataFrame.Columns[1] = differentIntColumn;
298313
Assert.True(object.ReferenceEquals(differentIntColumn, dataFrame.Columns[1]));
299314

@@ -309,18 +324,68 @@ public void ColumnAndTableCreationTest()
309324
}
310325

311326
[Fact]
312-
public void InsertAndRemoveColumnTests()
327+
public void InsertAndRemoveColumnToTheEndOfNotEmptyDataFrameTests()
313328
{
314329
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
315-
DataFrameColumn intColumn = new PrimitiveDataFrameColumn<int>("IntColumn", Enumerable.Range(0, 10).Select(x => x));
316-
DataFrameColumn charColumn = dataFrame.Columns["Char"];
317-
int insertedIndex = dataFrame.Columns.Count;
318-
dataFrame.Columns.Insert(dataFrame.Columns.Count, intColumn);
330+
DataFrameColumn intColumn = new PrimitiveDataFrameColumn<int>("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
331+
332+
int columnCount = dataFrame.Columns.Count;
333+
DataFrameColumn originalLastColumn = dataFrame.Columns[columnCount - 1];
334+
335+
//Insert new column at the end
336+
dataFrame.Columns.Insert(columnCount, intColumn);
337+
Assert.Equal(columnCount + 1, dataFrame.Columns.Count);
338+
339+
//Remove first
319340
dataFrame.Columns.RemoveAt(0);
320-
DataFrameColumn intColumn_1 = dataFrame.Columns["IntColumn"];
321-
DataFrameColumn charColumn_1 = dataFrame.Columns["Char"];
341+
Assert.Equal(columnCount, dataFrame.Columns.Count);
342+
343+
//Check that int column was inserted
344+
DataFrameColumn intColumn_1 = dataFrame.Columns["NewIntColumn"];
322345
Assert.True(ReferenceEquals(intColumn, intColumn_1));
323-
Assert.True(ReferenceEquals(charColumn, charColumn_1));
346+
347+
//Check that last column of the original dataframe was not removed
348+
DataFrameColumn lastColumn_1 = dataFrame.Columns[originalLastColumn.Name];
349+
Assert.True(ReferenceEquals(originalLastColumn, lastColumn_1));
350+
351+
//Check that new column is the last one
352+
int newIndex = dataFrame.Columns.IndexOf("NewIntColumn");
353+
Assert.Equal(columnCount - 1, newIndex);
354+
355+
//Check that original last column now has correct index
356+
int newIndexForOriginalLastColumn = dataFrame.Columns.IndexOf(originalLastColumn.Name);
357+
Assert.Equal(columnCount - 2, newIndexForOriginalLastColumn);
358+
}
359+
360+
[Fact]
361+
public void AddAndRemoveColumnToTheEmptyDataFrameTests()
362+
{
363+
DataFrame dataFrame = new DataFrame();
364+
DataFrameColumn intColumn = new PrimitiveDataFrameColumn<int>("NewIntColumn", Enumerable.Range(0, 10).Select(x => x));
365+
366+
dataFrame.Columns.Add(intColumn);
367+
Assert.Single(dataFrame.Columns);
368+
Assert.Equal(10, dataFrame.Rows.Count);
369+
370+
dataFrame.Columns.Remove(intColumn);
371+
Assert.Empty(dataFrame.Columns);
372+
Assert.Equal(0, dataFrame.Rows.Count);
373+
}
374+
375+
[Fact]
376+
public void ClearColumnsTests()
377+
{
378+
//Arrange
379+
DataFrame dataFrame = MakeDataFrameWithAllMutableColumnTypes(10);
380+
381+
//Act
382+
dataFrame.Columns.Clear();
383+
384+
//Assert
385+
Assert.Empty(dataFrame.Columns);
386+
387+
Assert.Equal(0, dataFrame.Rows.Count);
388+
Assert.Equal(0, dataFrame.Columns.LongCount());
324389
}
325390

326391
[Fact]

0 commit comments

Comments
 (0)