Skip to content

Commit dcc4e0e

Browse files
authored
Fix WordHashBagTransform bug (#1511)
* Fix WordHashBagTransform bug * Add baseline files
1 parent 2eea47f commit dcc4e0e

File tree

4 files changed

+318
-3
lines changed

4 files changed

+318
-3
lines changed

src/Microsoft.ML.Transforms/Text/WordHashBagTransform.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV
103103
var uniqueSourceNames = NgramExtractionUtils.GenerateUniqueSourceNames(h, args.Column, view.Schema);
104104
Contracts.Assert(uniqueSourceNames.Length == args.Column.Length);
105105

106-
var tokenizeColumns = new WordTokenizeTransform.ColumnInfo[args.Column.Length];
106+
var tokenizeColumns = new List<WordTokenizeTransform.ColumnInfo>();
107107
var extractorCols = new NgramHashExtractorTransform.Column[args.Column.Length];
108108
var colCount = args.Column.Length;
109109
List<string> tmpColNames = new List<string>();
@@ -114,7 +114,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV
114114
var curTmpNames = new string[srcCount];
115115
Contracts.Assert(uniqueSourceNames[iinfo].Length == args.Column[iinfo].Source.Length);
116116
for (int isrc = 0; isrc < srcCount; isrc++)
117-
tokenizeColumns[iinfo] = new WordTokenizeTransform.ColumnInfo(args.Column[iinfo].Source[isrc], curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc]);
117+
tokenizeColumns.Add(new WordTokenizeTransform.ColumnInfo(args.Column[iinfo].Source[isrc], curTmpNames[isrc] = uniqueSourceNames[iinfo][isrc]));
118118

119119
tmpColNames.AddRange(curTmpNames);
120120
extractorCols[iinfo] =
@@ -133,7 +133,7 @@ public static IDataTransform Create(IHostEnvironment env, Arguments args, IDataV
133133
};
134134
}
135135

136-
view = new WordTokenizingEstimator(env, tokenizeColumns).Fit(view).Transform(view);
136+
view = new WordTokenizingEstimator(env, tokenizeColumns.ToArray()).Fit(view).Transform(view);
137137

138138
var featurizeArgs =
139139
new NgramHashExtractorTransform.Arguments

0 commit comments

Comments
 (0)