Skip to content

Commit b82f4c6

Browse files
author
Pete Luferenko
committed
Saving/loading data
1 parent bf097ab commit b82f4c6

File tree

2 files changed

+42
-1
lines changed

2 files changed

+42
-1
lines changed

test/Microsoft.ML.Tests/Scenarios/Api/FileBasedSavingOfData.cs

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,38 @@ void FileBasedSavingOfData()
4444
DeleteOutputPath("i.idv");
4545
}
4646
}
47+
48+
/// <summary>
49+
/// File-based saving of data: Come up with transform pipeline. Transform training and
50+
/// test data, and save the featurized data to some file, using the .idv format.
51+
/// Train and evaluate multiple models over that pre-featurized data. (Useful for
52+
/// sweeping scenarios, where you are training many times on the same data,
53+
/// and don't necessarily want to transform it every single time.)
54+
/// </summary>
55+
[Fact]
56+
void New_FileBasedSavingOfData()
57+
{
58+
var dataPath = GetDataPath(SentimentDataPath);
59+
var testDataPath = GetDataPath(SentimentTestPath);
60+
61+
using (var env = new TlcEnvironment(seed: 1, conc: 1))
62+
{
63+
// Pipeline.
64+
var pipeline = new MyTextLoader(env, MakeSentimentTextLoaderArgs())
65+
.Append(new MyTextTransform(env, MakeSentimentTextTransformArgs()));
66+
67+
var trainData = pipeline.Fit(new MultiFileSource(dataPath)).Read(new MultiFileSource(dataPath));
68+
69+
using (var file = env.CreateOutputFile("i.idv"))
70+
trainData.SaveAsBinary(env, file.CreateWriteStream());
71+
72+
var trainer = new MySdca(env, new LinearClassificationTrainer.Arguments { NumThreads = 1 }, "Features", "Label");
73+
var loadedTrainData = new BinaryLoader(env, new BinaryLoader.Arguments(), new MultiFileSource("i.idv"));
74+
75+
// Train.
76+
var model = trainer.Train(loadedTrainData);
77+
DeleteOutputPath("i.idv");
78+
}
79+
}
4780
}
4881
}

test/Microsoft.ML.Tests/Scenarios/Api/Wrappers.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -374,5 +374,13 @@ public BinaryClassificationMetrics Evaluate(IDataView data, string labelColumn,
374374
}
375375
}
376376

377-
377+
public static class MyHelperExtensions
378+
{
379+
public static void SaveAsBinary(this IDataView data, IHostEnvironment env, Stream stream)
380+
{
381+
var saver = new BinarySaver(env, new BinarySaver.Arguments());
382+
using (var ch = env.Start("SaveData"))
383+
DataSaverUtils.SaveDataView(ch, saver, data, stream);
384+
}
385+
}
378386
}

0 commit comments

Comments
 (0)