Skip to content

Commit 33f5f32

Browse files
Add deseasonality in SrCnnEntireAnomalyDetect (#5202)
* add seasonality detect and stl decompose in srcnn * optimizations in performance * Add parameter period in SrCnn interface, remove inner period detect logic * add periodical data * add test * Remove unused files * further remove unused codes * remove unused functions * update * optimize WeightedRegression; clean code; add null checks * recover * reduce file numbers * restore * move stl related codes to a subfolder * fix sln file * update code style * fix members initialization outside the constructor * remove unused using * refactor InnerStl * use contract exception * remove unused class * update stl * remove unused usings * add readonly * fix bug * add deseasonality * update deseasonality * update * add options * refine code style * refine code * update * updates * remove max neighbor number constraint * remove the max neightbor count constraint * update SrCnnEntireDetectOptions, move input/output column name out; fix unit tests * refactor the constructor of Loess * remove unused imports * refactor and optimization * optimize * unfold pow(x, 2) to x * x for performance optimization * refactor polynomial model class and deseasonality functions, refine comments * refine * update comment * updates * update some wordings * update comments * update some comments * wording Co-authored-by: [email protected] <[email protected]>
1 parent 45a16dc commit 33f5f32

15 files changed

+9758
-72
lines changed
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System;
6+
using System.Collections.Generic;
7+
8+
namespace Microsoft.ML.TimeSeries
9+
{
10+
internal interface IDeseasonality
11+
{
12+
/// <summary>
13+
/// Remove the seasonality component from the given time-series.
14+
/// </summary>
15+
/// <param name="values">An array representing the input time-series.</param>
16+
/// <param name="period">The period value of the time-series.</param>
17+
/// <param name="results">The de-seasonalized time-series.</param>
18+
public abstract void Deseasonality(ref double[] values, int period, ref double[] results);
19+
}
20+
21+
internal sealed class MeanDeseasonality : IDeseasonality
22+
{
23+
private double[] _circularComponent;
24+
25+
public void Deseasonality(ref double[] values, int period, ref double[] results)
26+
{
27+
Array.Resize(ref _circularComponent, period);
28+
29+
var length = values.Length;
30+
31+
// Initialize the circular component to 0.
32+
for (int i = 0; i < period; ++i)
33+
{
34+
_circularComponent[i] = 0;
35+
}
36+
37+
// Sum up values that locate at the same position in one period.
38+
for (int i = 0; i < length; ++i)
39+
{
40+
var indexInPeriod = i % period;
41+
_circularComponent[indexInPeriod] += values[i];
42+
}
43+
44+
// Calculate the mean value as circular component.
45+
var cnt = (length - 1) / period;
46+
var rest = (length - 1) % period;
47+
for (int i = 0; i < period; ++i)
48+
{
49+
var lastCircle = i <= rest ? 1 : 0;
50+
_circularComponent[i] = _circularComponent[i] / (cnt + lastCircle);
51+
}
52+
53+
// Substract the circular component from the original series.
54+
for (int i = 0; i < length; ++i)
55+
{
56+
var indexInPeriod = i % period;
57+
results[i] -= _circularComponent[indexInPeriod];
58+
}
59+
}
60+
}
61+
62+
internal sealed class MedianDeseasonality : IDeseasonality
63+
{
64+
private List<double>[] _subSeries;
65+
private double[] _circularComponent;
66+
67+
public void Deseasonality(ref double[] values, int period, ref double[] results)
68+
{
69+
Array.Resize(ref _circularComponent, period);
70+
Array.Resize(ref _subSeries, period);
71+
72+
var length = values.Length;
73+
74+
for (int i = 0; i < period; ++i)
75+
{
76+
_subSeries[i] = new List<double>();
77+
}
78+
79+
// Split the original series into #period subseries.
80+
for (int i = 0; i < length; ++i)
81+
{
82+
var indexInPeriod = i % period;
83+
_subSeries[indexInPeriod].Add(values[i]);
84+
}
85+
86+
// Calculate the median value as circular component.
87+
for (int i = 0; i < period; ++i)
88+
{
89+
_circularComponent[i] = MathUtility.QuickMedian(_subSeries[i]);
90+
}
91+
92+
// Substract the circular component from the original series.
93+
for (int i = 0; i < length; ++i)
94+
{
95+
var indexInPeriod = i % period;
96+
results[i] -= _circularComponent[indexInPeriod];
97+
}
98+
}
99+
}
100+
101+
/// <summary>
102+
/// This class takes the residual component of stl decompose as the deseasonality result.
103+
/// </summary>
104+
internal sealed class StlDeseasonality : IDeseasonality
105+
{
106+
private readonly InnerStl _stl;
107+
108+
public StlDeseasonality()
109+
{
110+
_stl = new InnerStl(true);
111+
}
112+
113+
public void Deseasonality(ref double[] values, int period, ref double[] results)
114+
{
115+
bool success = _stl.Decomposition(values, period);
116+
if (success)
117+
{
118+
for (int i = 0; i < _stl.Residual.Count; ++i)
119+
{
120+
results[i] = _stl.Residual[i];
121+
}
122+
}
123+
else
124+
{
125+
for (int i = 0; i < values.Length; ++i)
126+
{
127+
results[i] = values[i];
128+
}
129+
}
130+
}
131+
}
132+
}

src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -176,7 +176,36 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
176176
/// </example>
177177
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
178178
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
179-
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);
179+
{
180+
var options = new SrCnnEntireAnomalyDetectorOptions()
181+
{
182+
Threshold = threshold,
183+
BatchSize = batchSize,
184+
Sensitivity = sensitivity,
185+
DetectMode = detectMode,
186+
};
187+
188+
return DetectEntireAnomalyBySrCnn(catalog, input, outputColumnName, inputColumnName, options);
189+
}
190+
191+
/// <summary>
192+
/// Create <see cref="SrCnnEntireAnomalyDetector"/>, which detects timeseries anomalies for entire input using SRCNN algorithm.
193+
/// </summary>
194+
/// <param name="catalog">The AnomalyDetectionCatalog.</param>
195+
/// <param name="input">Input DataView.</param>
196+
/// <param name="outputColumnName">Name of the column resulting from data processing of <paramref name="inputColumnName"/>.
197+
/// The column data is a vector of <see cref="System.Double"/>. The length of this vector varies depending on <paramref name="options.DetectMode"/>.</param>
198+
/// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
199+
/// <param name="options">Defines the settings of the load operation.</param>
200+
/// <example>
201+
/// <format type="text/markdown">
202+
/// <![CDATA[
203+
/// [!code-csharp[DetectEntireAnomalyBySrCnn](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectEntireAnomalyBySrCnn.cs)]
204+
/// ]]>
205+
/// </format>
206+
/// </example>
207+
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName, SrCnnEntireAnomalyDetectorOptions options)
208+
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, outputColumnName, inputColumnName, options);
180209

181210
/// <summary>
182211
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
5+
using System.Collections.Generic;
6+
using Microsoft.ML.Runtime;
7+
8+
namespace Microsoft.ML.TimeSeries
9+
{
10+
/// <summary>
11+
/// This is the fast version of Loess. There are several alternatives to improve the performance. This one is an approximation approach.
12+
/// The smoothing is conducted on a sample set, and then the values on the left points are assigned directly.
13+
/// </summary>
14+
internal class FastLoess
15+
{
16+
/// <summary>
17+
/// This class is a sampling based method, so here specifies the sample size.
18+
/// </summary>
19+
private const int _sampleSize = 100;
20+
21+
/// <summary>
22+
/// The minimum length of a valid time series. A time series with length equals 2 is so trivial and meaningless less than 2.
23+
/// </summary>
24+
public const int MinTimeSeriesLength = 3;
25+
26+
private readonly IReadOnlyList<double> _x;
27+
private readonly IReadOnlyList<double> _y;
28+
private readonly int _length;
29+
30+
private readonly Loess _smoother;
31+
32+
/// <summary>
33+
/// Initializes a new instance of the <see cref="FastLoess"/> class.
34+
/// The fast version of the Loess method. when the time series is too long, the sampling will be conducted first to improve the performance.
35+
/// </summary>
36+
/// <param name="xValues">The input x-axis values</param>
37+
/// <param name="yValues">The input y-axis values</param>
38+
/// <param name="isTemporal">If the regression is considered to take temporal information into account. In general, this is true if we are regressing a time series, and false if we are regressing scatter plot data</param>
39+
/// <param name="r">This method will provide default smoothing ratio if user did not specify</param>
40+
public FastLoess(IReadOnlyList<double> xValues, IReadOnlyList<double> yValues, bool isTemporal = true, int r = -1)
41+
{
42+
Contracts.CheckValue(xValues, nameof(xValues));
43+
Contracts.CheckValue(yValues, nameof(yValues));
44+
Y = new List<double>();
45+
46+
if (yValues.Count < MinTimeSeriesLength)
47+
throw Contracts.Except("input data structure cannot be 0-length: lowess");
48+
49+
_x = xValues;
50+
_y = yValues;
51+
_length = _y.Count;
52+
53+
if (_length <= FastLoess._sampleSize)
54+
{
55+
if (r == -1)
56+
_smoother = new Loess(_x, _y, isTemporal);
57+
else
58+
_smoother = new Loess(_x, _y, isTemporal, r);
59+
}
60+
else
61+
{
62+
// Conduct sampling based strategy, to boost the performance.
63+
double step = _length * 1.0 / FastLoess._sampleSize;
64+
var sampleX = new double[FastLoess._sampleSize];
65+
var sampleY = new double[FastLoess._sampleSize];
66+
for (int i = 0; i < FastLoess._sampleSize; i++)
67+
{
68+
int index = (int)(i * step);
69+
sampleX[i] = _x[index];
70+
sampleY[i] = _y[index];
71+
}
72+
if (r == -1)
73+
_smoother = new Loess(sampleX, sampleY, isTemporal);
74+
else
75+
_smoother = new Loess(sampleX, sampleY, isTemporal, r);
76+
}
77+
}
78+
79+
/// <summary>
80+
/// The estimated y values.
81+
/// </summary>
82+
public List<double> Y { get; }
83+
84+
/// <summary>
85+
/// Assign the smoothing values to all the data points, not only on the sample size.
86+
/// </summary>
87+
public void Estimate()
88+
{
89+
for (int i = 0; i < _length; i++)
90+
{
91+
double yValue = _smoother.EstimateY(_x[i]);
92+
Y.Add(yValue);
93+
}
94+
}
95+
96+
/// <summary>
97+
/// Estimate a y value by giving an x value, even if the x value is not one of the input points.
98+
/// </summary>
99+
public double EstimateY(double xValue)
100+
{
101+
return _smoother.EstimateY(xValue);
102+
}
103+
}
104+
}

0 commit comments

Comments
 (0)