Skip to content

Fix the behavior or column SetName method #6676

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jul 6, 2023
Merged
2 changes: 1 addition & 1 deletion src/Microsoft.Data.Analysis/DataFrame.Join.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ private void SetSuffixForDuplicatedColumnNames(DataFrame dataFrame, DataFrameCol
{
// Pre-existing column. Change name
DataFrameColumn existingColumn = dataFrame.Columns[index];
dataFrame._columnCollection.SetColumnName(existingColumn, existingColumn.Name + leftSuffix);
existingColumn.SetName(existingColumn.Name + leftSuffix);
column.SetName(column.Name + rightSuffix);
index = dataFrame._columnCollection.IndexOf(column.Name);
}
Expand Down
4 changes: 2 additions & 2 deletions src/Microsoft.Data.Analysis/DataFrame.cs
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ public DataFrame AddPrefix(string prefix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
df._columnCollection.SetColumnName(column, prefix + column.Name);
column.SetName(prefix + column.Name);
df.OnColumnsChanged();
}
return df;
Expand All @@ -316,7 +316,7 @@ public DataFrame AddSuffix(string suffix, bool inPlace = false)
for (int i = 0; i < df.Columns.Count; i++)
{
DataFrameColumn column = df.Columns[i];
df._columnCollection.SetColumnName(column, column.Name + suffix);
column.SetName(column.Name + suffix);
df.OnColumnsChanged();
}
return df;
Expand Down
42 changes: 34 additions & 8 deletions src/Microsoft.Data.Analysis/DataFrameColumn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,26 @@ protected set
}
}

// List of ColumnCollections that owns the column
// Current API allows column to be added into multiple dataframes, that's why the list is needed
private readonly List<DataFrameColumnCollection> _ownerColumnCollections = new();

internal void AddOwner(DataFrameColumnCollection columCollection)
{
if (!_ownerColumnCollections.Contains(columCollection))
{
_ownerColumnCollections.Add(columCollection);
}
}

internal void RemoveOwner(DataFrameColumnCollection columCollection)
{
if (_ownerColumnCollections.Contains(columCollection))
{
_ownerColumnCollections.Remove(columCollection);
}
}

/// <summary>
/// The number of <see langword="null" /> values in this column.
/// </summary>
Expand All @@ -95,24 +115,30 @@ public abstract long NullCount
private string _name;

/// <summary>
/// The name of this column.
/// The column name.
/// </summary>
public string Name => _name;

/// <summary>
/// Updates the name of this column.
/// Updates the column name.
/// </summary>
/// <param name="newName">The new name.</param>
/// <param name="dataFrame">If passed in, update the column name in <see cref="DataFrame.Columns"/></param>
public void SetName(string newName, DataFrame dataFrame = null)
public void SetName(string newName)
{
if (!(dataFrame is null))
{
dataFrame.Columns.SetColumnName(this, newName);
}
foreach (var owner in _ownerColumnCollections)
owner.UpdateColumnNameMetadata(this, newName);

_name = newName;
}

/// <summary>
/// Updates the name of this column.
/// </summary>
/// <param name="newName">The new name.</param>
/// <param name="dataFrame">Ignored (for backward compatibility)</param>
[Obsolete]
public void SetName(string newName, DataFrame dataFrame) => SetName(newName);

/// <summary>
/// The type of data this column holds.
/// </summary>
Expand Down
23 changes: 21 additions & 2 deletions src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,23 @@ internal IReadOnlyList<string> GetColumnNames()
return ret;
}

public void RenameColumn(string currentName, string newName)
{
var column = this[currentName];
column.SetName(newName);
}

[Obsolete]
public void SetColumnName(DataFrameColumn column, string newName)
{
column.SetName(newName);
}

//Updates column's metadata (is used as a callback from Column class)
internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName)
{
string currentName = column.Name;
int currentIndex = _columnNameToIndexDictionary[currentName];
column.SetName(newName);
_columnNameToIndexDictionary.Remove(currentName);
_columnNameToIndexDictionary.Add(newName, currentIndex);
ColumnsChanged?.Invoke();
Expand Down Expand Up @@ -75,6 +87,8 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column)
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}

column.AddOwner(this);

RowCount = column.Length;

_columnNameToIndexDictionary[column.Name] = columnIndex;
Expand All @@ -98,9 +112,13 @@ protected override void SetItem(int columnIndex, DataFrameColumn column)
{
throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column));
}

_columnNameToIndexDictionary.Remove(this[columnIndex].Name);
_columnNameToIndexDictionary[column.Name] = columnIndex;

this[columnIndex].RemoveOwner(this);
base.SetItem(columnIndex, column);

ColumnsChanged?.Invoke();
}

Expand All @@ -111,6 +129,8 @@ protected override void RemoveItem(int columnIndex)
{
_columnNameToIndexDictionary[this[i].Name]--;
}

this[columnIndex].RemoveOwner(this);
base.RemoveItem(columnIndex);

//Reset RowCount if the last column was removed and dataframe is empty
Expand Down Expand Up @@ -474,6 +494,5 @@ public UInt16DataFrameColumn GetUInt16Column(string name)

throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(UInt16)));
}

}
}
38 changes: 38 additions & 0 deletions test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,44 @@ public void ClearColumnsTests()
Assert.Equal(0, dataFrame.Columns.LongCount());
}

[Fact]
public void RenameColumnWithSetNameTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });

DataFrame dataframe = new DataFrame(city, temp);

// Change the name of the column:
dataframe["City"].SetName("Town");
var renamedColumn = dataframe["Town"];

Assert.Throws<ArgumentException>(() => dataframe["City"]);

Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}

[Fact]
public void RenameColumnWithRenameColumnTests()
{
StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" });
PrimitiveDataFrameColumn<int> temp = new PrimitiveDataFrameColumn<int>("Temperature", new int[] { 12, 13 });

DataFrame dataframe = new DataFrame(city, temp);

// Change the name of the column:
dataframe.Columns.RenameColumn("City", "Town");
var renamedColumn = dataframe["Town"];

Assert.Throws<ArgumentException>(() => dataframe["City"]);

Assert.NotNull(renamedColumn);
Assert.Equal("Town", renamedColumn.Name);
Assert.True(ReferenceEquals(city, renamedColumn));
}

[Fact]
public void TestBinaryOperations()
{
Expand Down