From 535ac07f3191bff9d192b1b64f8784bea7db0ab9 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Sun, 14 May 2023 10:35:30 +0300 Subject: [PATCH 1/3] Fix the behavior or column SetName method --- src/Microsoft.Data.Analysis/DataFrame.Join.cs | 2 +- src/Microsoft.Data.Analysis/DataFrame.cs | 4 +- .../DataFrameColumn.cs | 42 +++++++++++++++---- .../DataFrameColumnCollection.cs | 24 ++++++++++- .../DataFrameTests.cs | 38 +++++++++++++++++ 5 files changed, 98 insertions(+), 12 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrame.Join.cs b/src/Microsoft.Data.Analysis/DataFrame.Join.cs index 79291a691e..fe4b44e0d5 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.Join.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.Join.cs @@ -30,7 +30,7 @@ private void SetSuffixForDuplicatedColumnNames(DataFrame dataFrame, DataFrameCol { // Pre-existing column. Change name DataFrameColumn existingColumn = dataFrame.Columns[index]; - dataFrame._columnCollection.SetColumnName(existingColumn, existingColumn.Name + leftSuffix); + existingColumn.SetName(existingColumn.Name + leftSuffix); column.SetName(column.Name + rightSuffix); index = dataFrame._columnCollection.IndexOf(column.Name); } diff --git a/src/Microsoft.Data.Analysis/DataFrame.cs b/src/Microsoft.Data.Analysis/DataFrame.cs index 6c24476549..78d453d44a 100644 --- a/src/Microsoft.Data.Analysis/DataFrame.cs +++ b/src/Microsoft.Data.Analysis/DataFrame.cs @@ -301,7 +301,7 @@ public DataFrame AddPrefix(string prefix, bool inPlace = false) for (int i = 0; i < df.Columns.Count; i++) { DataFrameColumn column = df.Columns[i]; - df._columnCollection.SetColumnName(column, prefix + column.Name); + column.SetName(prefix + column.Name); df.OnColumnsChanged(); } return df; @@ -316,7 +316,7 @@ public DataFrame AddSuffix(string suffix, bool inPlace = false) for (int i = 0; i < df.Columns.Count; i++) { DataFrameColumn column = df.Columns[i]; - df._columnCollection.SetColumnName(column, column.Name + suffix); + column.SetName(column.Name + suffix); df.OnColumnsChanged(); } return df; diff --git a/src/Microsoft.Data.Analysis/DataFrameColumn.cs b/src/Microsoft.Data.Analysis/DataFrameColumn.cs index 636d7a8db1..d978dc69c3 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumn.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumn.cs @@ -84,6 +84,26 @@ protected set } } + // List of ColumnCollections that owns the column + // Current API allows column to be added into multiple dataframes, that's why the list is needed + private readonly List _ownerColumnCollections = new(); + + internal void AddOwner(DataFrameColumnCollection columCollection) + { + if (!_ownerColumnCollections.Contains(columCollection)) + { + _ownerColumnCollections.Add(columCollection); + } + } + + internal void RemoveOwner(DataFrameColumnCollection columCollection) + { + if (_ownerColumnCollections.Contains(columCollection)) + { + _ownerColumnCollections.Remove(columCollection); + } + } + /// /// The number of values in this column. /// @@ -95,24 +115,30 @@ public abstract long NullCount private string _name; /// - /// The name of this column. + /// The column name. /// public string Name => _name; /// - /// Updates the name of this column. + /// Updates the column name. /// /// The new name. - /// If passed in, update the column name in - public void SetName(string newName, DataFrame dataFrame = null) + public void SetName(string newName) { - if (!(dataFrame is null)) - { - dataFrame.Columns.SetColumnName(this, newName); - } + foreach (var owner in _ownerColumnCollections) + owner.UpdateColumnNameMetadata(this, newName); + _name = newName; } + /// + /// Updates the name of this column. + /// + /// The new name. + /// Ignored (for backward compatibility) + [Obsolete] + public void SetName(string newName, DataFrame dataFrame) => SetName(newName); + /// /// The type of data this column holds. /// diff --git a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs index 0701588f3f..3c18a1f8a6 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs @@ -41,11 +41,23 @@ internal IReadOnlyList GetColumnNames() return ret; } + public void RenameColumn(string currentName, string newName) + { + var column = this[currentName]; + column.SetName(newName); + } + + [Obsolete] public void SetColumnName(DataFrameColumn column, string newName) + { + column.SetName(newName); + } + + //Updates column's metadata (is used as a callback from Column class) + internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName) { string currentName = column.Name; int currentIndex = _columnNameToIndexDictionary[currentName]; - column.SetName(newName); _columnNames[currentIndex] = newName; _columnNameToIndexDictionary.Remove(currentName); _columnNameToIndexDictionary.Add(newName, currentIndex); @@ -76,6 +88,9 @@ protected override void InsertItem(int columnIndex, DataFrameColumn column) { throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column)); } + + column.AddOwner(this); + RowCount = column.Length; _columnNames.Insert(columnIndex, column.Name); _columnNameToIndexDictionary[column.Name] = columnIndex; @@ -99,10 +114,14 @@ protected override void SetItem(int columnIndex, DataFrameColumn column) { throw new ArgumentException(string.Format(Strings.DuplicateColumnName, column.Name), nameof(column)); } + _columnNameToIndexDictionary.Remove(_columnNames[columnIndex]); _columnNames[columnIndex] = column.Name; _columnNameToIndexDictionary[column.Name] = columnIndex; + + this[columnIndex].RemoveOwner(this); base.SetItem(columnIndex, column); + ColumnsChanged?.Invoke(); } @@ -114,7 +133,10 @@ protected override void RemoveItem(int columnIndex) _columnNameToIndexDictionary[_columnNames[i]]--; } _columnNames.RemoveAt(columnIndex); + + this[columnIndex].RemoveOwner(this); base.RemoveItem(columnIndex); + ColumnsChanged?.Invoke(); } diff --git a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs index 1695ded747..7960dbf73e 100644 --- a/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs +++ b/test/Microsoft.Data.Analysis.Tests/DataFrameTests.cs @@ -323,6 +323,44 @@ public void InsertAndRemoveColumnTests() Assert.True(ReferenceEquals(charColumn, charColumn_1)); } + [Fact] + public void RenameColumnWithSetNameTests() + { + StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" }); + PrimitiveDataFrameColumn temp = new PrimitiveDataFrameColumn("Temperature", new int[] { 12, 13 }); + + DataFrame dataframe = new DataFrame(city, temp); + + // Change the name of the column: + dataframe["City"].SetName("Town"); + var renamedColumn = dataframe["Town"]; + + Assert.Throws(() => dataframe["City"]); + + Assert.NotNull(renamedColumn); + Assert.Equal("Town", renamedColumn.Name); + Assert.True(ReferenceEquals(city, renamedColumn)); + } + + [Fact] + public void RenameColumnWithRenameColumnTests() + { + StringDataFrameColumn city = new StringDataFrameColumn("City", new string[] { "London", "Berlin" }); + PrimitiveDataFrameColumn temp = new PrimitiveDataFrameColumn("Temperature", new int[] { 12, 13 }); + + DataFrame dataframe = new DataFrame(city, temp); + + // Change the name of the column: + dataframe.Columns.RenameColumn("City", "Town"); + var renamedColumn = dataframe["Town"]; + + Assert.Throws(() => dataframe["City"]); + + Assert.NotNull(renamedColumn); + Assert.Equal("Town", renamedColumn.Name); + Assert.True(ReferenceEquals(city, renamedColumn)); + } + [Fact] public void TestBinaryOperations() { From f52eca04d79a6c9733d435412d94eed140d321ee Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Wed, 14 Jun 2023 11:08:30 +0300 Subject: [PATCH 2/3] Fix stack overflow exception --- src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs index 93638799cb..9dbe4ea840 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs @@ -55,9 +55,6 @@ internal void UpdateColumnNameMetadata(DataFrameColumn column, string newName) { string currentName = column.Name; int currentIndex = _columnNameToIndexDictionary[currentName]; - - column.SetName(newName); - _columnNameToIndexDictionary.Remove(currentName); _columnNameToIndexDictionary.Add(newName, currentIndex); ColumnsChanged?.Invoke(); From 34896325b61049535da1a22537309cda4afe6ba2 Mon Sep 17 00:00:00 2001 From: Aleksei Smirnov Date: Wed, 14 Jun 2023 11:28:11 +0300 Subject: [PATCH 3/3] Fix merge issues --- src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs index 9dbe4ea840..6dce481588 100644 --- a/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs +++ b/src/Microsoft.Data.Analysis/DataFrameColumnCollection.cs @@ -130,6 +130,7 @@ protected override void RemoveItem(int columnIndex) _columnNameToIndexDictionary[this[i].Name]--; } + this[columnIndex].RemoveOwner(this); base.RemoveItem(columnIndex); //Reset RowCount if the last column was removed and dataframe is empty @@ -493,6 +494,5 @@ public UInt16DataFrameColumn GetUInt16Column(string name) throw new ArgumentException(string.Format(Strings.BadColumnCast, column.DataType, typeof(UInt16))); } - } }