Skip to content

Commit c696e09

Browse files
authored
Add support for string vectors (#6628)
1 parent fc67fd1 commit c696e09

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

src/Microsoft.Data.Analysis/IDataView.Extension.cs

+4
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,10 @@ private static DataFrameColumn GetVectorDataFrame(VectorDataViewType vectorType,
204204
{
205205
return new VBufferDataFrameColumn<decimal>(name);
206206
}
207+
else if (itemType.RawType == typeof(ReadOnlyMemory<char>))
208+
{
209+
return new VBufferDataFrameColumn<ReadOnlyMemory<char>>(name);
210+
}
207211

208212
throw new NotSupportedException(String.Format(Microsoft.Data.Strings.VectorSubTypeNotSupported, itemType.ToString()));
209213
}

src/Microsoft.Data.Analysis/VBufferDataFrameColumn.cs

+4
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,10 @@ private static VectorDataViewType GetDataViewType()
390390
{
391391
return new VectorDataViewType(NumberDataViewType.Double);
392392
}
393+
else if (typeof(T) == typeof(ReadOnlyMemory<char>))
394+
{
395+
return new VectorDataViewType(TextDataViewType.Instance);
396+
}
393397

394398
throw new NotSupportedException();
395399
}

test/Microsoft.Data.Analysis.Tests/DataFrameIDataViewTests.cs

+3-1
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,7 @@ public void TestDataFrameFromIDataView_VBufferType()
461461
ushortFeatures = new ushort[] {0, 0},
462462
uintFeatures = new uint[] {0, 0},
463463
ulongFeatures = new ulong[] {0, 0},
464+
stringFeatures = new string[]{ "A", "B"},
464465
},
465466
new {
466467
boolFeature = new bool[] {false, false},
@@ -474,13 +475,14 @@ public void TestDataFrameFromIDataView_VBufferType()
474475
ushortFeatures = new ushort[] {0, 0},
475476
uintFeatures = new uint[] {0, 0},
476477
ulongFeatures = new ulong[] {0, 0},
478+
stringFeatures = new string[]{ "A", "B"},
477479
}
478480
};
479481

480482
var data = mlContext.Data.LoadFromEnumerable(inputData);
481483
var df = data.ToDataFrame();
482484

483-
Assert.Equal(11, df.Columns.Count);
485+
Assert.Equal(12, df.Columns.Count);
484486
Assert.Equal(2, df.Rows.Count);
485487
}
486488
}

0 commit comments

Comments
 (0)