-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Sample for IID spike and changepoint detection using time series stateful prediction engine. #1762
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
96aa6ec
d5ba578
93119f9
43a4395
5ed8fbe
ce6e9b1
fdb09cd
0810506
41e4cec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,10 @@ | |
using Microsoft.ML.Runtime.Data; | ||
using Microsoft.ML.Runtime.Api; | ||
using Microsoft.ML.Runtime.TimeSeriesProcessing; | ||
using Microsoft.ML.Core.Data; | ||
using Microsoft.ML.TimeSeries; | ||
using System.IO; | ||
using Microsoft.ML.Data; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
|
@@ -34,26 +38,26 @@ public static void IidChangePointDetectorTransform() | |
var ml = new MLContext(); | ||
|
||
// Generate sample series data with a change | ||
const int size = 16; | ||
var data = new List<IidChangePointData>(size); | ||
for (int i = 0; i < size / 2; i++) | ||
const int Size = 16; | ||
var data = new List<IidChangePointData>(Size); | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidChangePointData(5)); | ||
// This is a change point | ||
for (int i = 0; i < size / 2; i++) | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidChangePointData(7)); | ||
|
||
// Convert data to IDataView. | ||
var dataView = ml.CreateStreamingDataView(data); | ||
|
||
// Setup IidSpikeDetector arguments | ||
string outputColumnName = "Prediction"; | ||
string inputColumnName = "Value"; | ||
string outputColumnName = nameof(ChangePointPrediction.Prediction); | ||
string inputColumnName = nameof(IidChangePointData.Value); | ||
var args = new IidChangePointDetector.Arguments() | ||
{ | ||
Source = inputColumnName, | ||
Name = outputColumnName, | ||
Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score. | ||
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score. | ||
}; | ||
|
||
// The transformed data. | ||
|
@@ -88,5 +92,116 @@ public static void IidChangePointDetectorTransform() | |
// 7 0 7.00 0.50 0.00 | ||
// 7 0 7.00 0.50 0.00 | ||
} | ||
|
||
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot). | ||
// IidChangePointDetector is applied then to identify points where data distribution changed using time series | ||
// prediction engine. The engine is checkpointed and then loaded back from disk into memory and used for prediction. | ||
public static void IidChangePointDetectorPrediction() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var ml = new MLContext(); | ||
|
||
// Generate sample series data with a change | ||
const int Size = 16; | ||
var data = new List<IidChangePointData>(Size); | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidChangePointData(5)); | ||
// This is a change point | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidChangePointData(7)); | ||
|
||
// Convert data to IDataView. | ||
var dataView = ml.CreateStreamingDataView(data); | ||
|
||
// Setup IidSpikeDetector arguments | ||
string outputColumnName = nameof(ChangePointPrediction.Prediction); | ||
string inputColumnName = nameof(IidChangePointData.Value); | ||
var args = new IidChangePointDetector.Arguments() | ||
{ | ||
Source = inputColumnName, | ||
Name = outputColumnName, | ||
Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
ChangeHistoryLength = Size / 4, // The length of the sliding window on p-values for computing the martingale score. | ||
}; | ||
|
||
// Time Series model. | ||
ITransformer model = new IidChangePointEstimator(ml, args).Fit(dataView); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should the |
||
|
||
// Create a time series prediction engine from the model. | ||
var engine = model.CreateTimeSeriesPredictionFunction<IidChangePointData, ChangePointPrediction>(ml); | ||
for(int index = 0; index < 8; index++) | ||
{ | ||
// Anomaly change point detection. | ||
var prediction = engine.Predict(new IidChangePointData(5)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 5, prediction.Prediction[0], | ||
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); | ||
} | ||
|
||
// Change point | ||
var changePointPrediction = engine.Predict(new IidChangePointData(7)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, changePointPrediction.Prediction[0], | ||
changePointPrediction.Prediction[1], changePointPrediction.Prediction[2], changePointPrediction.Prediction[3]); | ||
|
||
// Checkpoint the model. | ||
var modelPath = "temp.zip"; | ||
engine.CheckPoint(ml, modelPath); | ||
|
||
// Reference to current time series engine because in the next step "engine" will point to the | ||
// checkpointed model being loaded from disk. | ||
var timeseries1 = engine; | ||
|
||
// Load the model. | ||
using (var file = File.OpenRead(modelPath)) | ||
model = TransformerChain.LoadFrom(ml, file); | ||
|
||
// Create a time series prediction engine from the checkpointed model. | ||
engine = model.CreateTimeSeriesPredictionFunction<IidChangePointData, ChangePointPrediction>(ml); | ||
for (int index = 0; index < 8; index++) | ||
{ | ||
// Anomaly change point detection. | ||
var prediction = engine.Predict(new IidChangePointData(7)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0], | ||
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); | ||
} | ||
|
||
// Prediction from the original time series engine should match the prediction from | ||
// check pointed model. | ||
engine = timeseries1; | ||
for (int index = 0; index < 8; index++) | ||
{ | ||
// Anomaly change point detection. | ||
var prediction = engine.Predict(new IidChangePointData(7)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", 7, prediction.Prediction[0], | ||
prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]); | ||
} | ||
|
||
// Data Alert Score P-Value Martingale value | ||
// 5 0 5.00 0.50 0.00 <-- Time Series 1. | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 5 0 5.00 0.50 0.00 | ||
// 7 1 7.00 0.00 10298.67 <-- alert is on, predicted changepoint (and model is checkpointed). | ||
|
||
// 7 0 7.00 0.13 33950.16 <-- Time Series 2 : Model loaded back from disk and prediction is made. | ||
// 7 0 7.00 0.26 60866.34 | ||
// 7 0 7.00 0.38 78362.04 | ||
// 7 0 7.00 0.50 0.01 | ||
// 7 0 7.00 0.50 0.00 | ||
// 7 0 7.00 0.50 0.00 | ||
// 7 0 7.00 0.50 0.00 | ||
|
||
// 7 0 7.00 0.13 33950.16 <-- Time Series 1 and prediction is made. | ||
// 7 0 7.00 0.26 60866.34 | ||
// 7 0 7.00 0.38 78362.04 | ||
// 7 0 7.00 0.50 0.01 | ||
// 7 0 7.00 0.50 0.00 | ||
// 7 0 7.00 0.50 0.00 | ||
// 7 0 7.00 0.50 0.00 | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
using System; | ||
using System.IO; | ||
using System.Linq; | ||
using System.Collections.Generic; | ||
using Microsoft.ML.Data; | ||
using Microsoft.ML.Runtime.Data; | ||
using Microsoft.ML.Runtime.Api; | ||
using Microsoft.ML.Runtime.TimeSeriesProcessing; | ||
using Microsoft.ML.Core.Data; | ||
using Microsoft.ML.TimeSeries; | ||
|
||
namespace Microsoft.ML.Samples.Dynamic | ||
{ | ||
|
@@ -34,27 +38,27 @@ public static void IidSpikeDetectorTransform() | |
var ml = new MLContext(); | ||
|
||
// Generate sample series data with a spike | ||
const int size = 10; | ||
var data = new List<IidSpikeData>(size); | ||
for (int i = 0; i < size / 2; i++) | ||
const int Size = 10; | ||
var data = new List<IidSpikeData>(Size); | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidSpikeData(5)); | ||
// This is a spike | ||
data.Add(new IidSpikeData(10)); | ||
for (int i = 0; i < size / 2; i++) | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidSpikeData(5)); | ||
|
||
// Convert data to IDataView. | ||
var dataView = ml.CreateStreamingDataView(data); | ||
|
||
// Setup IidSpikeDetector arguments | ||
string outputColumnName = "Prediction"; | ||
string inputColumnName = "Value"; | ||
string outputColumnName = nameof(IidSpikePrediction.Prediction); | ||
string inputColumnName = nameof(IidSpikeData.Value); | ||
var args = new IidSpikeDetector.Arguments() | ||
{ | ||
Source = inputColumnName, | ||
Name = outputColumnName, | ||
Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
PvalueHistoryLength = size / 4 // The size of the sliding window for computing the p-value | ||
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. | ||
}; | ||
|
||
// The transformed data. | ||
|
@@ -83,5 +87,83 @@ public static void IidSpikeDetectorTransform() | |
// 0 5.00 0.50 | ||
// 0 5.00 0.50 | ||
} | ||
|
||
public static void IidSpikeDetectorPrediction() | ||
{ | ||
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, | ||
// as well as the source of randomness. | ||
var ml = new MLContext(); | ||
|
||
// Generate sample series data with a spike | ||
const int Size = 10; | ||
var data = new List<IidSpikeData>(Size); | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidSpikeData(5)); | ||
// This is a spike | ||
data.Add(new IidSpikeData(10)); | ||
for (int i = 0; i < Size / 2; i++) | ||
data.Add(new IidSpikeData(5)); | ||
|
||
// Convert data to IDataView. | ||
var dataView = ml.CreateStreamingDataView(data); | ||
|
||
// Setup IidSpikeDetector arguments | ||
string outputColumnName = nameof(IidSpikePrediction.Prediction); | ||
string inputColumnName = nameof(IidSpikeData.Value); | ||
var args = new IidSpikeDetector.Arguments() | ||
{ | ||
Source = inputColumnName, | ||
Name = outputColumnName, | ||
Confidence = 95, // The confidence for spike detection in the range [0, 100] | ||
PvalueHistoryLength = Size / 4 // The size of the sliding window for computing the p-value; shorter windows are more sensitive to spikes. | ||
}; | ||
|
||
// The transformed model. | ||
ITransformer model = new IidSpikeEstimator(ml, args).Fit(dataView); | ||
|
||
// Create a time series prediction engine from the model. | ||
var engine = model.CreateTimeSeriesPredictionFunction<IidSpikeData, IidSpikePrediction>(ml); | ||
for (int index = 0; index < 5; index++) | ||
{ | ||
// Anomaly spike detection. | ||
var prediction = engine.Predict(new IidSpikeData(5)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], | ||
prediction.Prediction[1], prediction.Prediction[2]); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No offense, but this is awful. Why this not wrap in a proper class with properties? #WontFix There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think Zeeshan made a note of this in the PR for prediction engine, and mentioned that it would require a bit of extra refactoring so he would make it in a separate PR. I'll let him elaborate. In reply to: 237329405 [](ancestors = 237329405) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You are supposed to know the size of the array you will need to pass same way as you would need to know the info about output columns produced by other trainers and transforms in ML.NET. In reply to: 237595774 [](ancestors = 237595774,237329405) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we still want to do the future work to wrap this output vector column in a class with named properties, or even turning it into three scalar output columns? In reply to: 237625679 [](ancestors = 237625679,237595774,237329405) |
||
} | ||
|
||
// Spike. | ||
var spikePrediction = engine.Predict(new IidSpikeData(10)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 10, spikePrediction.Prediction[0], | ||
spikePrediction.Prediction[1], spikePrediction.Prediction[2]); | ||
|
||
// Checkpoint the model. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Why we doing this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We are checkpointing the model so that we can reload it with the updated saved from the predictions. Yep, I have addressed that as part of documentation. In reply to: 237327645 [](ancestors = 237327645) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Where is the documentation? I wanted to see if I need to add anything for Martingale, P-Value, etc. In reply to: 237702797 [](ancestors = 237702797,237327645) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its in this PR, check files under src/Microsoft.ML.TimeSeries #Resolved |
||
var modelPath = "temp.zip"; | ||
engine.CheckPoint(ml, modelPath); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand what you trying to show up here, but I'm not sure it would be clear to user. Can you do following: To show up what we preserve state. Right now you have following: It's great, but I think it just bring confusion to the sample, like why we checkpoint and load, why we doing this? In my suggestion I think it clearly shows what you can preserve state, and show you reason why you want to do that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I made this change in IID Change Point detector example. In reply to: 238025595 [](ancestors = 238025595) |
||
// Load the model. | ||
using (var file = File.OpenRead(modelPath)) | ||
model = TransformerChain.LoadFrom(ml, file); | ||
|
||
for (int index = 0; index < 5; index++) | ||
{ | ||
// Anomaly spike detection. | ||
var prediction = engine.Predict(new IidSpikeData(5)); | ||
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}", 5, prediction.Prediction[0], | ||
prediction.Prediction[1], prediction.Prediction[2]); | ||
} | ||
|
||
// Data Alert Score P-Value | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
// 10 1 10.00 0.00 <-- alert is on, predicted spike (check-point model) | ||
// 5 0 5.00 0.26 <-- load model from disk. | ||
// 5 0 5.00 0.26 | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
// 5 0 5.00 0.50 | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
After talking to @ @sfilipi I realized that this will automatically go into the docs site, showing both samples on the docs for IidChangePointDetector because of the xml tag here:
machinelearning/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs
Line 204 in 533e186
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Isn't this supposed to be a sample for how to use IidChangePointDetector? #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, I was just mentioning that everything we add to this file will show up verbatim in the docs site on the page for IidChangePointDetector. Since this adds a second sample to the file, we now have two samples on the docs site - just checking if that is what we actually want.
In reply to: 237313709 [](ancestors = 237313709)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'll sync offline with you on this.
In reply to: 237596871 [](ancestors = 237596871,237313709)