Skip to content

Commit 8a9b016

Browse files
authored
Nadrop (dotnet#1810)
* Fix MissingValueDroppingTransformer bug * Add unit test * Add baseline
1 parent a06a0b7 commit 8a9b016

File tree

3 files changed

+47
-1
lines changed

3 files changed

+47
-1
lines changed

src/Microsoft.ML.Transforms/MissingValueDroppingTransformer.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ protected override Delegate MakeGetter(IRow input, int iinfo, Func<int, bool> ac
204204

205205
private ValueGetter<VBuffer<TDst>> MakeVecGetter<TDst>(IRow input, int iinfo)
206206
{
207-
var srcGetter = input.GetGetter<VBuffer<TDst>>(iinfo);
207+
var srcGetter = input.GetGetter<VBuffer<TDst>>(_srcCols[iinfo]);
208208
var buffer = default(VBuffer<TDst>);
209209
var isNA = (InPredicate<TDst>)_isNAs[iinfo];
210210
var def = default(TDst);
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#@ TextLoader{
2+
#@ header+
3+
#@ sep=tab
4+
#@ col=Num:R4:0-1
5+
#@ col=Sep:TX:2
6+
#@ col=NumNAsDropped:R4:3-**
7+
#@ col={name=Sep2 type=TX src={ min=-1}}
8+
#@ col={name=Text type=TX src={ min=-1 var=+}}
9+
#@ col={name=Sep3 type=TX src={ min=-1}}
10+
#@ col={name=TextNAsDropped type=U4 src={ min=-1 var=+} key=0-3}
11+
#@ }
12+
"" "" Sep Sep2 Sep3
13+
2 0 | 2 0 | Hello World! | 0
14+
3 4 | 3 4 | |
15+
0 ? | 0 | Bye all | 3 1
16+
7 8 | 7 8 | Good bye | 2
17+
? ? | | this is a |

test/Microsoft.ML.TestFramework/DataPipe/TestDataPipe.cs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -785,6 +785,35 @@ public void SavePipeTokenizerAndStopWords()
785785
Done();
786786
}
787787

788+
[Fact]
789+
public void SavePipeDropNAs()
790+
{
791+
string pathData = DeleteOutputPath("SavePipe", "DropNAs.txt");
792+
File.WriteAllLines(pathData,
793+
new[]
794+
{
795+
"2,0,|,Hello World!",
796+
"3,4,|,",
797+
"0,nan,|,Bye all",
798+
"7,8,|,Good bye",
799+
"?,nan,|,this is a"
800+
});
801+
802+
TestCore(pathData, false,
803+
new[]
804+
{
805+
"loader=Text{header- sep=, col=Num:R4:0-1 col=Sep:TX:2 col=Text:TX:3}",
806+
"xf=NADrop{col=NumNAsDropped:Num}",
807+
"xf=Token{col=Text}",
808+
"xf=Term{col=Text2:Text terms=Hello,all,Good,Bye}",
809+
"xf=NADrop{col=TextNAsDropped:Text2}",
810+
"xf=Copy{col=Sep2:Sep col=Sep3:Sep}",
811+
"xf=Select{keepcol=Num keepcol=Sep keepcol=NumNAsDropped keepcol=Sep2 keepcol=Text keepcol=Sep3 keepcol=TextNAsDropped}"
812+
}, baselineSchema: false, roundTripText: false);
813+
814+
Done();
815+
}
816+
788817
[Fact]
789818
public void TestHashTransformFloat()
790819
{

0 commit comments

Comments
 (0)