Skip to content

Commit 93156b6

Browse files
authored
SSA time series samples (#1788)
* Added SsaChangePointDetectorPrediction sample. * Completed SsaChangePointDetectorPrediction sample. * Added SsaSpikeDetectorPrediction sample. * Cleanup usings, etc. * Added note when saved and loaded from disk.
1 parent f40fb02 commit 93156b6

File tree

2 files changed

+300
-73
lines changed

2 files changed

+300
-73
lines changed
Lines changed: 148 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
1-
using System;
2-
using System.Linq;
3-
using System.Collections.Generic;
4-
using Microsoft.ML.Runtime.Data;
1+
using Microsoft.ML.Core.Data;
2+
using Microsoft.ML.Data;
53
using Microsoft.ML.Runtime.Api;
64
using Microsoft.ML.Runtime.TimeSeriesProcessing;
5+
using Microsoft.ML.TimeSeries;
6+
using System;
7+
using System.Collections.Generic;
8+
using System.IO;
9+
using System.Linq;
710

811
namespace Microsoft.ML.Samples.Dynamic
912
{
@@ -21,35 +24,41 @@ public SsaChangePointData(float value)
2124

2225
// This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot).
2326
// SsaChangePointDetector is applied then to identify points where data distribution changed.
27+
// SsaChangePointDetector differs from IidChangePointDetector in that it can account for temporal seasonality
28+
// in the data.
2429
public static void SsaChangePointDetectorTransform()
2530
{
2631
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
2732
// as well as the source of randomness.
2833
var ml = new MLContext();
2934

30-
// Generate sample series data with a change
31-
const int size = 16;
32-
var data = new List<SsaChangePointData>(size);
33-
for (int i = 0; i < size / 2; i++)
34-
data.Add(new SsaChangePointData(5));
35+
// Generate sample series data with a recurring pattern and then a change in trend
36+
const int SeasonalitySize = 5;
37+
const int TrainingSeasons = 3;
38+
const int TrainingSize = SeasonalitySize * TrainingSeasons;
39+
var data = new List<SsaChangePointData>();
40+
for (int i = 0; i < TrainingSeasons; i++)
41+
for (int j = 0; j < SeasonalitySize; j++)
42+
data.Add(new SsaChangePointData(j));
3543
// This is a change point
36-
for (int i = 0; i < size / 2; i++)
37-
data.Add(new SsaChangePointData(7));
44+
for (int i = 0; i < SeasonalitySize; i++)
45+
data.Add(new SsaChangePointData(i * 100));
3846

3947
// Convert data to IDataView.
4048
var dataView = ml.CreateStreamingDataView(data);
4149

42-
// Setup IidSpikeDetector arguments
43-
string outputColumnName = "Prediction";
44-
string inputColumnName = "Value";
50+
// Setup SsaChangePointDetector arguments
51+
var inputColumnName = nameof(SsaChangePointData.Value);
52+
var outputColumnName = nameof(ChangePointPrediction.Prediction);
4553
var args = new SsaChangePointDetector.Arguments()
4654
{
4755
Source = inputColumnName,
4856
Name = outputColumnName,
49-
Confidence = 95, // The confidence for spike detection in the range [0, 100]
50-
ChangeHistoryLength = size / 4, // The length of the sliding window on p-values for computing the martingale score.
51-
TrainingWindowSize = size / 2, // The number of points from the beginning of the sequence used for training.
52-
SeasonalWindowSize = size / 8, // An upper bound on the largest relevant seasonality in the input time - series."
57+
Confidence = 95, // The confidence for spike detection in the range [0, 100]
58+
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
59+
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
60+
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."
61+
5362
};
5463

5564
// The transformed data.
@@ -66,23 +75,127 @@ public static void SsaChangePointDetectorTransform()
6675
Console.WriteLine("");
6776

6877
// Prediction column obtained post-transformation.
69-
// Data Alert Score P-Value Martingale value
70-
// 5 0 0.00 0.50 0.00
71-
// 5 0 0.00 0.50 0.00
72-
// 5 0 0.00 0.50 0.00
73-
// 5 0 0.00 0.50 0.00
74-
// 5 0 0.00 0.50 0.00
75-
// 5 0 0.00 0.50 0.00
76-
// 5 0 0.00 0.50 0.00
77-
// 5 0 0.00 0.50 0.00
78-
// 7 1 2.00 0.00 10298.67 <-- alert is on, predicted changepoint
79-
// 7 0 1.00 0.31 15741.58
80-
// 7 0 0.00 0.28 26487.48
81-
// 7 0 0.00 0.28 44569.02
82-
// 7 0 0.00 0.28 0.01
83-
// 7 0 0.00 0.38 0.01
84-
// 7 0 0.00 0.50 0.00
85-
// 7 0 0.00 0.50 0.00
78+
// Data Alert Score P-Value Martingale value
79+
// 0 0 - 2.53 0.50 0.00
80+
// 1 0 - 0.01 0.01 0.00
81+
// 2 0 0.76 0.14 0.00
82+
// 3 0 0.69 0.28 0.00
83+
// 4 0 1.44 0.18 0.00
84+
// 0 0 - 1.84 0.17 0.00
85+
// 1 0 0.22 0.44 0.00
86+
// 2 0 0.20 0.45 0.00
87+
// 3 0 0.16 0.47 0.00
88+
// 4 0 1.33 0.18 0.00
89+
// 0 0 - 1.79 0.07 0.00
90+
// 1 0 0.16 0.50 0.00
91+
// 2 0 0.09 0.50 0.00
92+
// 3 0 0.08 0.45 0.00
93+
// 4 0 1.31 0.12 0.00
94+
// 0 0 - 1.79 0.07 0.00
95+
// 100 1 99.16 0.00 4031.94 <-- alert is on, predicted changepoint
96+
// 200 0 185.23 0.00 731260.87
97+
// 300 0 270.40 0.01 3578470.47
98+
// 400 0 357.11 0.03 45298370.86
99+
}
100+
101+
// This example shows change point detection as above, but demonstrates how to train a model
102+
// that can run predictions on streaming data, and how to persist the trained model and then re-load it.
103+
public static void SsaChangePointDetectorPrediction()
104+
{
105+
// Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging,
106+
// as well as the source of randomness.
107+
var ml = new MLContext();
108+
109+
// Generate sample series data with a recurring pattern
110+
const int SeasonalitySize = 5;
111+
const int TrainingSeasons = 3;
112+
const int TrainingSize = SeasonalitySize * TrainingSeasons;
113+
var data = new List<SsaChangePointData>();
114+
for (int i = 0; i < TrainingSeasons; i++)
115+
for (int j = 0; j < SeasonalitySize; j++)
116+
data.Add(new SsaChangePointData(j));
117+
118+
// Convert data to IDataView.
119+
var dataView = ml.CreateStreamingDataView(data);
120+
121+
// Setup SsaChangePointDetector arguments
122+
var inputColumnName = nameof(SsaChangePointData.Value);
123+
var outputColumnName = nameof(ChangePointPrediction.Prediction);
124+
var args = new SsaChangePointDetector.Arguments()
125+
{
126+
Source = inputColumnName,
127+
Name = outputColumnName,
128+
Confidence = 95, // The confidence for spike detection in the range [0, 100]
129+
ChangeHistoryLength = 8, // The length of the window for detecting a change in trend; shorter windows are more sensitive to spikes.
130+
TrainingWindowSize = TrainingSize, // The number of points from the beginning of the sequence used for training.
131+
SeasonalWindowSize = SeasonalitySize + 1 // An upper bound on the largest relevant seasonality in the input time series."
132+
133+
};
134+
135+
// Train the change point detector.
136+
ITransformer model = new SsaChangePointEstimator(ml, args).Fit(dataView);
137+
138+
// Create a prediction engine from the model for feeding new data.
139+
var engine = model.CreateTimeSeriesPredictionFunction<SsaChangePointData, ChangePointPrediction>(ml);
140+
141+
// Start streaming new data points with no change point to the prediction engine.
142+
Console.WriteLine($"Output from ChangePoint predictions on new data:");
143+
Console.WriteLine("Data\tAlert\tScore\tP-Value\tMartingale value");
144+
ChangePointPrediction prediction = null;
145+
for (int i = 0; i < 5; i++)
146+
{
147+
var value = i;
148+
prediction = engine.Predict(new SsaChangePointData(value));
149+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
150+
}
151+
152+
// Now stream data points that reflect a change in trend.
153+
for (int i = 0; i < 5; i++)
154+
{
155+
var value = (i + 1) * 100;
156+
prediction = engine.Predict(new SsaChangePointData(value));
157+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
158+
}
159+
160+
// Now we demonstrate saving and loading the model.
161+
162+
// Save the model that exists within the prediction engine.
163+
// The engine has been updating this model with every new data point.
164+
var modelPath = "model.zip";
165+
engine.CheckPoint(ml, modelPath);
166+
167+
// Load the model.
168+
using (var file = File.OpenRead(modelPath))
169+
model = TransformerChain.LoadFrom(ml, file);
170+
171+
// We must create a new prediction engine from the persisted model.
172+
engine = model.CreateTimeSeriesPredictionFunction<SsaChangePointData, ChangePointPrediction>(ml);
173+
174+
// Run predictions on the loaded model.
175+
for (int i = 0; i < 5; i++)
176+
{
177+
var value = (i + 1) * 100;
178+
prediction = engine.Predict(new SsaChangePointData(value));
179+
Console.WriteLine("{0}\t{1}\t{2:0.00}\t{3:0.00}\t{4:0.00}", value, prediction.Prediction[0], prediction.Prediction[1], prediction.Prediction[2], prediction.Prediction[3]);
180+
}
181+
182+
// Output from ChangePoint predictions on new data:
183+
// Data Alert Score P-Value Martingale value
184+
// 0 0 - 1.01 0.50 0.00
185+
// 1 0 - 0.24 0.22 0.00
186+
// 2 0 - 0.31 0.30 0.00
187+
// 3 0 0.44 0.01 0.00
188+
// 4 0 2.16 0.00 0.24
189+
// 100 0 86.23 0.00 2076098.24
190+
// 200 0 171.38 0.00 809668524.21
191+
// 300 1 256.83 0.01 22130423541.93 <-- alert is on, note that delay is expected
192+
// 400 0 326.55 0.04 241162710263.29
193+
// 500 0 364.82 0.08 597660527041.45 <-- saved to disk
194+
// 100 0 - 58.58 0.15 1096021098844.34 <-- loaded from disk and running new predictions
195+
// 200 0 - 41.24 0.20 97579154688.98
196+
// 300 0 - 30.61 0.24 95319753.87
197+
// 400 0 58.87 0.38 14.24
198+
// 500 0 219.28 0.36 0.05
86199
}
87200
}
88201
}

0 commit comments

Comments
 (0)