Skip to content

Commit f40fb02

Browse files
authored
Sample for IID spike and changepoint detection using time series stateful prediction engine. (#1762)
* Sample for IID spike and changepoint detection using time series stateful prediction engine.
1 parent d6ad1b4 commit f40fb02

File tree

7 files changed

+245
-18
lines changed

7 files changed

+245
-18
lines changed

docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs

Lines changed: 122 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@
44
using Microsoft.ML.Runtime.Data;
55
using Microsoft.ML.Runtime.Api;
66
using Microsoft.ML.Runtime.TimeSeriesProcessing;
7+
using Microsoft.ML.Core.Data;
8+
using Microsoft.ML.TimeSeries;
9+
using System.IO;
10+
using Microsoft.ML.Data;
711

812
namespace Microsoft.ML.Samples.Dynamic
913
{
@@ -34,26 +38,26 @@ public static void IidChangePointDetectorTransform()
3438
var ml = new MLContext();
3539

3640
// Generate sample series data with a change
37-
const int size = 16;
38-
var data = new List<IidChangePointData>(size);
39-
for (int i = 0; i < size / 2; i++)
41+
const int Size = 16;
42+
var data = new List<IidChangePointData>(Size);
43+
for (int i = 0; i < Size / 2; i++)
4044
data.Add(new IidChangePointData(5));
4145
// This is a change point
42-
for (int i = 0; i < size / 2; i++)
46+
for (int i = 0; i < Size / 2; i++)
4347
data.Add(new IidChangePointData(7));
4448

4549
// Convert data to IDataView.
4650
var dataView = ml.CreateStreamingDataView(data);
4751

4852
// Setup IidSpikeDetector arguments
49-
string outputColumnName = "Prediction";
50-
string inputColumnName = "Value";
53+
string outputColumnName = nameof(ChangePointPrediction.Prediction);
54+
string inputColumnName = nameof(IidChangePointData.Value);
5155
var args = new IidChangePointDetector.Arguments()
5256
{
5357
Source = inputColumnName,
5458
Name = outputColumnName,
5559
Confidence = 95, // The confidence for spike detection in the range [0, 100]
56-
ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score.
60+
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score.
5761
};
5862

5963
// The transformed data.
@@ -88,5 +92,116 @@ public static void IidChangePointDetectorTransform()
8892
// 7 0 7.00 0.50 0.00
8993
// 7 0 7.00 0.50 0.00
9094
}
95+
96+
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot).
97+
// IidChangePointDetector is applied then to identify points where data distribution changed using time series
98+
// prediction engine. The engine is checkpointed and then loaded back from disk into memory and used for prediction.
99+
public static void IidChangePointDetectorPrediction()
100+
{
101+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
102+
// as well as the source of randomness.
103+
var ml = new MLContext();
104+
105+
// Generate sample series data with a change
106+
const int Size = 16;
107+
var data = new List<IidChangePointData>(Size);
108+
for (int i = 0; i < Size / 2; i++)
109+
data.Add(new IidChangePointData(5));
110+
// This is a change point
111+
for (int i = 0; i < Size / 2; i++)
112+
data.Add(new IidChangePointData(7));
113+
114+
// Convert data to IDataView.
115+
var dataView = ml.CreateStreamingDataView(data);
116+
117+
// Setup IidSpikeDetector arguments
118+
string outputColumnName = nameof(ChangePointPrediction.Prediction);
119+
string inputColumnName = nameof(IidChangePointData.Value);
120+
var args = new IidChangePointDetector.Arguments()
121+
{
122+
Source = inputColumnName,
123+
Name = outputColumnName,
124+
Confidence = 95, // The confidence for spike detection in the range [0, 100]
125+
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score.
126+
};
127+
128+
// Time Series model.
129+
ITransformer model = new IidChangePointEstimator(ml, args).Fit(dataView);
130+
131+
// Create a time series prediction engine from the model.
132+
var engine = model.CreateTimeSeriesPredictionFunction<IidChangePointData, ChangePointPrediction>(ml);
133+
for(int index = 0; index < 8; index++)
134+
{
135+
// Anomaly change point detection.
136+
var prediction = engine.Predict(new IidChangePointData(5));
137+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 5, prediction.Prediction[0],
138+
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
139+
}
140+
141+
// Change point
142+
var changePointPrediction = engine.Predict(new IidChangePointData(7));
143+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, changePointPrediction.Prediction[0],
144+
changePointPrediction.Prediction[1], changePointPrediction.Prediction[2], changePointPrediction.Prediction[3]);
145+
146+
// Checkpoint the model.
147+
var modelPath = "temp.zip";
148+
engine.CheckPoint(ml, modelPath);
149+
150+
// Reference to current time series engine because in the next step "engine" will point to the
151+
// checkpointed model being loaded from disk.
152+
var timeseries1 = engine;
153+
154+
// Load the model.
155+
using (var file = File.OpenRead(modelPath))
156+
model = TransformerChain.LoadFrom(ml, file);
157+
158+
// Create a time series prediction engine from the checkpointed model.
159+
engine = model.CreateTimeSeriesPredictionFunction<IidChangePointData, ChangePointPrediction>(ml);
160+
for (int index = 0; index < 8; index++)
161+
{
162+
// Anomaly change point detection.
163+
var prediction = engine.Predict(new IidChangePointData(7));
164+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0],
165+
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
166+
}
167+
168+
// Prediction from the original time series engine should match the prediction from
169+
// check pointed model.
170+
engine = timeseries1;
171+
for (int index = 0; index < 8; index++)
172+
{
173+
// Anomaly change point detection.
174+
var prediction = engine.Predict(new IidChangePointData(7));
175+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0],
176+
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
177+
}
178+
179+
// Data Alert Score P-Value Martingale value
180+
// 5 0 5.00 0.50 0.00 <-- Time Series 1.
181+
// 5 0 5.00 0.50 0.00
182+
// 5 0 5.00 0.50 0.00
183+
// 5 0 5.00 0.50 0.00
184+
// 5 0 5.00 0.50 0.00
185+
// 5 0 5.00 0.50 0.00
186+
// 5 0 5.00 0.50 0.00
187+
// 5 0 5.00 0.50 0.00
188+
// 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint (and model is checkpointed).
189+
190+
// 7 0 7.00 0.13 33950.16 <-- Time Series 2 : Model loaded back from disk and prediction is made.
191+
// 7 0 7.00 0.26 60866.34
192+
// 7 0 7.00 0.38 78362.04
193+
// 7 0 7.00 0.50 0.01
194+
// 7 0 7.00 0.50 0.00
195+
// 7 0 7.00 0.50 0.00
196+
// 7 0 7.00 0.50 0.00
197+
198+
// 7 0 7.00 0.13 33950.16 <-- Time Series 1 and prediction is made.
199+
// 7 0 7.00 0.26 60866.34
200+
// 7 0 7.00 0.38 78362.04
201+
// 7 0 7.00 0.50 0.01
202+
// 7 0 7.00 0.50 0.00
203+
// 7 0 7.00 0.50 0.00
204+
// 7 0 7.00 0.50 0.00
205+
}
91206
}
92207
}

docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs

Lines changed: 89 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,13 @@
11
using System;
2+
using System.IO;
23
using System.Linq;
34
using System.Collections.Generic;
5+
using Microsoft.ML.Data;
46
using Microsoft.ML.Runtime.Data;
57
using Microsoft.ML.Runtime.Api;
68
using Microsoft.ML.Runtime.TimeSeriesProcessing;
9+
using Microsoft.ML.Core.Data;
10+
using Microsoft.ML.TimeSeries;
711

812
namespace Microsoft.ML.Samples.Dynamic
913
{
@@ -34,27 +38,27 @@ public static void IidSpikeDetectorTransform()
3438
var ml = new MLContext();
3539

3640
// Generate sample series data with a spike
37-
const int size = 10;
38-
var data = new List<IidSpikeData>(size);
39-
for (int i = 0; i < size / 2; i++)
41+
const int Size = 10;
42+
var data = new List<IidSpikeData>(Size);
43+
for (int i = 0; i < Size / 2; i++)
4044
data.Add(new IidSpikeData(5));
4145
// This is a spike
4246
data.Add(new IidSpikeData(10));
43-
for (int i = 0; i < size / 2; i++)
47+
for (int i = 0; i < Size / 2; i++)
4448
data.Add(new IidSpikeData(5));
4549

4650
// Convert data to IDataView.
4751
var dataView = ml.CreateStreamingDataView(data);
4852

4953
// Setup IidSpikeDetector arguments
50-
string outputColumnName = "Prediction";
51-
string inputColumnName = "Value";
54+
string outputColumnName = nameof(IidSpikePrediction.Prediction);
55+
string inputColumnName = nameof(IidSpikeData.Value);
5256
var args = new IidSpikeDetector.Arguments()
5357
{
5458
Source = inputColumnName,
5559
Name = outputColumnName,
5660
Confidence = 95, // The confidence for spike detection in the range [0, 100]
57-
PvalueHistoryLength = size / 4 // The size of the sliding window for computing the p-value
61+
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
5862
};
5963

6064
// The transformed data.
@@ -83,5 +87,83 @@ public static void IidSpikeDetectorTransform()
8387
// 0 5.00 0.50
8488
// 0 5.00 0.50
8589
}
90+
91+
public static void IidSpikeDetectorPrediction()
92+
{
93+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
94+
// as well as the source of randomness.
95+
var ml = new MLContext();
96+
97+
// Generate sample series data with a spike
98+
const int Size = 10;
99+
var data = new List<IidSpikeData>(Size);
100+
for (int i = 0; i < Size / 2; i++)
101+
data.Add(new IidSpikeData(5));
102+
// This is a spike
103+
data.Add(new IidSpikeData(10));
104+
for (int i = 0; i < Size / 2; i++)
105+
data.Add(new IidSpikeData(5));
106+
107+
// Convert data to IDataView.
108+
var dataView = ml.CreateStreamingDataView(data);
109+
110+
// Setup IidSpikeDetector arguments
111+
string outputColumnName = nameof(IidSpikePrediction.Prediction);
112+
string inputColumnName = nameof(IidSpikeData.Value);
113+
var args = new IidSpikeDetector.Arguments()
114+
{
115+
Source = inputColumnName,
116+
Name = outputColumnName,
117+
Confidence = 95, // The confidence for spike detection in the range [0, 100]
118+
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes.
119+
};
120+
121+
// The transformed model.
122+
ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView);
123+
124+
// Create a time series prediction engine from the model.
125+
var engine = model.CreateTimeSeriesPredictionFunction<IidSpikeData, IidSpikePrediction>(ml);
126+
for (int index = 0; index < 5; index++)
127+
{
128+
// Anomaly spike detection.
129+
var prediction = engine.Predict(new IidSpikeData(5));
130+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0],
131+
prediction.Prediction[1], prediction.Prediction[2]);
132+
}
133+
134+
// Spike.
135+
var spikePrediction = engine.Predict(new IidSpikeData(10));
136+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 10, spikePrediction.Prediction[0],
137+
spikePrediction.Prediction[1], spikePrediction.Prediction[2]);
138+
139+
// Checkpoint the model.
140+
var modelPath = "temp.zip";
141+
engine.CheckPoint(ml, modelPath);
142+
143+
// Load the model.
144+
using (var file = File.OpenRead(modelPath))
145+
model = TransformerChain.LoadFrom(ml, file);
146+
147+
for (int index = 0; index < 5; index++)
148+
{
149+
// Anomaly spike detection.
150+
var prediction = engine.Predict(new IidSpikeData(5));
151+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0],
152+
prediction.Prediction[1], prediction.Prediction[2]);
153+
}
154+
155+
// Data Alert Score P-Value
156+
// 5 0 5.00 0.50
157+
// 5 0 5.00 0.50
158+
// 5 0 5.00 0.50
159+
// 5 0 5.00 0.50
160+
// 5 0 5.00 0.50
161+
// 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model)
162+
// 5 0 5.00 0.26 <-- load model from disk.
163+
// 5 0 5.00 0.26
164+
// 5 0 5.00 0.50
165+
// 5 0 5.00 0.50
166+
// 5 0 5.00 0.50
167+
}
86168
}
87169
}

src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ public sealed class IidChangePointEstimator : TrivialEstimator<IidChangePointDet
212212
/// </summary>
213213
/// <param name="env">Host Environment.</param>
214214
/// <param name="inputColumn">Name of the input column.</param>
215-
/// <param name="outputColumn">Name of the output column.</param>
215+
/// <param name="outputColumn">Name of the output column. Column is a vector of type double and size 4.
216+
/// The vector contains Alert, Raw Score, P-Value and Martingale score as first four values.</param>
216217
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
217218
/// <param name="changeHistoryLength">The length of the sliding window on p-values for computing the martingale score.</param>
218219
/// <param name="martingale">The martingale used for scoring.</param>

src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,8 @@ public sealed class IidSpikeEstimator : TrivialEstimator<IidSpikeDetector>
191191
/// </summary>
192192
/// <param name="env">Host Environment.</param>
193193
/// <param name="inputColumn">Name of the input column.</param>
194-
/// <param name="outputColumn">Name of the output column.</param>
194+
/// <param name="outputColumn">Name of the output column. Column is a vector of type double and size 3.
195+
/// The vector contains Alert, Raw Score, P-Value as first three values.</param>
195196
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
196197
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
197198
/// <param name="side">The argument that determines whether to detect positive or negative anomalies, or both.</param>

src/Microsoft.ML.TimeSeries/PredictionFunction.cs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@ public sealed class TimeSeriesPredictionFunction<TSrc, TDst> : PredictionEngineB
5353
private long _rowPosition;
5454
private ITransformer InputTransformer { get; set; }
5555

56+
/// <summary>
57+
/// Checkpoints <see cref="TimeSeriesPredictionFunction{TSrc, TDst}"/> to disk with the updated
58+
/// state.
59+
/// </summary>
60+
/// <param name="env">Usually <see cref="MLContext"/>.</param>
61+
/// <param name="modelPath">Path to file on disk where the updated model needs to be saved.</param>
5662
public void CheckPoint(IHostEnvironment env, string modelPath)
5763
{
5864
using (var file = File.Create(modelPath))
@@ -246,6 +252,26 @@ public override void Predict(TSrc example, ref TDst prediction)
246252

247253
public static class PredictionFunctionExtensions
248254
{
255+
/// <summary>
256+
/// <see cref="TimeSeriesPredictionFunction{TSrc, TDst}"/> creates a prediction function/engine for a time series pipeline
257+
/// It updates the state of time series model with observations seen at prediction phase and allows checkpointing the model.
258+
/// </summary>
259+
/// <typeparam name="TSrc">Class describing input schema to the model.</typeparam>
260+
/// <typeparam name="TDst">Class describing the output schema of the prediction.</typeparam>
261+
/// <param name="transformer">The time series pipeline in the form of a <see cref="ITransformer"/>.</param>
262+
/// <param name="env">Usually <see cref="MLContext"/></param>
263+
/// <param name="ignoreMissingColumns">To ignore missing columns. Default is false.</param>
264+
/// <param name="inputSchemaDefinition">Input schema definition. Default is null.</param>
265+
/// <param name="outputSchemaDefinition">Output schema definition. Default is null.</param>
266+
/// <p>Example code can be found by searching for <i>TimeSeriesPredictionFunction</i> in <a href='https://github.com/dotnet/machinelearning'>ML.NET.</a></p>
267+
/// <example>
268+
/// <format type="text/markdown">
269+
/// <![CDATA[
270+
/// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidSpikeDetectorTransform.cs)]
271+
/// [!code-csharp[MF](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/IidChangePointDetectorTransform.cs)]
272+
/// ]]>
273+
/// </format>
274+
/// </example>
249275
public static TimeSeriesPredictionFunction<TSrc, TDst> CreateTimeSeriesPredictionFunction<TSrc, TDst>(this ITransformer transformer, IHostEnvironment env,
250276
bool ignoreMissingColumns = false, SchemaDefinition inputSchemaDefinition = null, SchemaDefinition outputSchemaDefinition = null)
251277
where TSrc : class

src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,8 @@ public sealed class SsaChangePointEstimator : IEstimator<SsaChangePointDetector>
225225
/// </summary>
226226
/// <param name="env">Host Environment.</param>
227227
/// <param name="inputColumn">Name of the input column.</param>
228-
/// <param name="outputColumn">Name of the output column.</param>
228+
/// <param name="outputColumn">Name of the output column. Column is a vector of type double and size 4.
229+
/// The vector contains Alert, Raw Score, P-Value and Martingale score as first four values.</param>
229230
/// <param name="confidence">The confidence for change point detection in the range [0, 100].</param>
230231
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>
231232
/// <param name="changeHistoryLength">The size of the sliding window for computing the p-value.</param>

src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,8 @@ public sealed class SsaSpikeEstimator : IEstimator<SsaSpikeDetector>
206206
/// </summary>
207207
/// <param name="env">Host Environment.</param>
208208
/// <param name="inputColumn">Name of the input column.</param>
209-
/// <param name="outputColumn">Name of the output column.</param>
209+
/// <param name="outputColumn">Name of the output column. Column is a vector of type double and size 3.
210+
/// The vector contains Alert, Raw Score, P-Value as first three values.</param>
210211
/// <param name="confidence">The confidence for spike detection in the range [0, 100].</param>
211212
/// <param name="pvalueHistoryLength">The size of the sliding window for computing the p-value.</param>
212213
/// <param name="trainingWindowSize">The number of points from the beginning of the sequence used for training.</param>

0 commit comments

Comments
 (0)