Skip to content

Add deseasonality in SrCnnEntireAnomalyDetect #5202

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 52 commits into from
Jun 29, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
78961c7
add seasonality detect and stl decompose in srcnn
Jun 1, 2020
468d444
optimizations in performance
Jun 3, 2020
45b6971
Add parameter period in SrCnn interface, remove inner period detect l…
Jun 3, 2020
245731f
add periodical data
Jun 3, 2020
604084a
add test
Jun 3, 2020
7c6734f
Remove unused files
Jun 3, 2020
8ea07f6
further remove unused codes
Jun 3, 2020
b398fdb
remove unused functions
Jun 3, 2020
cce849d
update
Jun 3, 2020
10935c3
optimize WeightedRegression; clean code; add null checks
Jun 4, 2020
b524957
recover
Jun 4, 2020
d40440b
reduce file numbers
Jun 4, 2020
92c6aab
restore
Jun 4, 2020
f6e8566
move stl related codes to a subfolder
Jun 4, 2020
d4515ee
fix sln file
Jun 5, 2020
8778e5a
update code style
Jun 5, 2020
219decd
fix members initialization outside the constructor
Jun 5, 2020
e039cba
remove unused using
Jun 5, 2020
3553277
refactor InnerStl
Jun 9, 2020
8c693c9
use contract exception
Jun 9, 2020
37e7c6f
remove unused class
Jun 9, 2020
65fb7a5
update stl
Jun 9, 2020
d2e2653
remove unused usings
Jun 9, 2020
edc8a21
add readonly
Jun 10, 2020
7737a57
fix bug
Jun 11, 2020
2e6b0e5
Merge branch 'master' into dev/srcnn_deseasonality
Jun 11, 2020
8df11b5
Merge branch 'master' into dev/srcnn_deseasonality
guinao Jun 11, 2020
fbf3e0e
Merge branch 'dev/srcnn_deseasonality' of https://github.com/guinao/m…
Jun 11, 2020
86f64f7
add deseasonality
Jun 11, 2020
0d1a038
update deseasonality
Jun 11, 2020
c87ae95
update
Jun 16, 2020
b59660c
add options
Jun 16, 2020
5a93fa1
refine code style
Jun 16, 2020
1a7b071
refine code
Jun 16, 2020
b6fa553
update
Jun 16, 2020
192d782
updates
Jun 17, 2020
1b90669
remove max neighbor number constraint
Jun 17, 2020
92b0963
remove the max neightbor count constraint
Jun 17, 2020
59e7f2b
update SrCnnEntireDetectOptions, move input/output column name out; …
Jun 17, 2020
b38c123
refactor the constructor of Loess
Jun 17, 2020
d72d97b
remove unused imports
Jun 17, 2020
c58a45b
refactor and optimization
Jun 18, 2020
3939380
optimize
Jun 18, 2020
7e48255
unfold pow(x, 2) to x * x for performance optimization
Jun 18, 2020
c3f74d5
refactor polynomial model class and deseasonality functions, refine c…
Jun 22, 2020
816479a
refine
Jun 22, 2020
a239a89
update comment
Jun 22, 2020
1446115
updates
Jun 22, 2020
8b10cc0
update some wordings
Jun 23, 2020
a817829
update comments
Jun 23, 2020
8fac10e
update some comments
Jun 26, 2020
2cad48f
wording
Jun 28, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
402 changes: 207 additions & 195 deletions Microsoft.ML.sln

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
/// When set to AnomalyAndMargin, the output vector would be a 7-element Double vector of (IsAnomaly, AnomalyScore, Mag, ExpectedValue, BoundaryUnit, UpperBoundary, LowerBoundary).
/// The RawScore is output by SR to determine whether a point is an anomaly or not, under AnomalyAndMargin mode, when a point is an anomaly, an AnomalyScore will be calculated according to sensitivity setting.
/// Default value is AnomalyOnly.</param>
/// <param name="period">The period of the data to be detected. If the data has a circular pattern, set this parameter to the length of one period, otherwise 0 which indicates that there is no circular pattern. Default value is 0.</param>
/// <example>
/// <format type="text/markdown">
/// <![CDATA[
Expand All @@ -175,8 +176,8 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog
/// </format>
/// </example>
public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog catalog, IDataView input, string outputColumnName, string inputColumnName,
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode);
double threshold = 0.3, int batchSize = 1024, double sensitivity = 99, SrCnnDetectMode detectMode = SrCnnDetectMode.AnomalyOnly, int period = 0)
=> new SrCnnEntireAnomalyDetector(CatalogUtils.GetEnvironment(catalog), input, inputColumnName, outputColumnName, threshold, batchSize, sensitivity, detectMode, period);

/// <summary>
/// Create <see cref="RootCause"/>, which localizes root causes using decision tree algorithm.
Expand Down
111 changes: 111 additions & 0 deletions src/Microsoft.ML.TimeSeries/STL/BackendStructureBase.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Microsoft.ML.TimeSeries
{
public enum TimeSeriesInfoKind
{
/// <summary>
/// (single-time series) the outliers of a single time series
/// </summary>
Outlier,

/// <summary>
/// (single-time series) the seasonal signal of a single time series
/// </summary>
Seasonal,

/// <summary>
/// (single-time series) the trend curve of a single time series
/// </summary>
Trend,

/// <summary>
/// (two-time series) the lead/lag correlation between two time series
/// </summary>
CrossCorrelation,

/// <summary>
/// (two-time series) the correlation of outliers from two time series
/// </summary>
OutlierCorrelation,

/// <summary>
/// (two-time series) the correlation of trends from two time series
/// </summary>
TrendCorrelation,
}

public abstract class TimeSeriesInfoBase
{
/// <summary>
/// each insight should be ranked, so that it can be compared with other insights.
/// </summary>
public abstract double Rank { get; set; }

/// <summary>
/// the description of this particular insight
/// </summary>
public abstract string Description { get; protected set; }

/// <summary>
/// indicate the kind of insight.
/// </summary>
public abstract TimeSeriesInfoKind Kind { get; protected set; }

/// <summary>
/// basic comparison function, used for quick sort.
/// </summary>
/// <param name="left">the left element</param>
/// <param name="right">the right element</param>
public static int Compare(TimeSeriesInfoBase left, TimeSeriesInfoBase right)
{
if (object.ReferenceEquals(left, right))
return 0;
if (left == null)
return -1;
if (right == null)
return 1;
return left.Rank.CompareTo(right.Rank);
}
}

/// <summary>
/// the characteristic of single time series.
/// </summary>
public abstract class SingleSeriesInfo : TimeSeriesInfoBase
{
/// <summary>
/// x-axis values of original curve
/// </summary>
public IReadOnlyList<double> X { get; protected set; }

/// <summary>
/// y-axis values of original curve
/// </summary>
public IReadOnlyList<double> Y { get; protected set; }
}

/// <summary>
/// the characteristic between two time series, such as the lead/lag correlation,
/// and the correlation of the outliers.
/// </summary>
public abstract class TwoSeriesInfo : TimeSeriesInfoBase
{
/// <summary>
/// x-axis values of two time series. they should be aligned.
/// </summary>
public IReadOnlyList<double> X { get; protected set; }

/// <summary>
/// y-axis values of first series
/// </summary>
public IReadOnlyList<double> Y1 { get; protected set; }

/// <summary>
/// y-axis values of second series
/// </summary>
public IReadOnlyList<double> Y2 { get; protected set; }
}
}
30 changes: 30 additions & 0 deletions src/Microsoft.ML.TimeSeries/STL/BasicParameters.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
using System;
using System.Collections.Generic;
using System.Text;

namespace Microsoft.ML.TimeSeries
{
public class BasicParameters
{
/// <summary>
/// the minimum length of a valid time series. a time series with length equals 2 is so trivial. when less than 2, meaningless.
/// </summary>
public const int MinTimeSeriesLength = 3;

/// <summary>
/// the maximum length of a valid time series. when there are too many data points, the chart will look so dense that details are lost.
/// this number is tuned so that the bird strike data can still preserve results.
/// </summary>
public const int MaxTimeSeriesLength = 4000;

/// <summary>
/// the minimum count of repeated periods. this is used for determining a noticeable seasonal signal.
/// </summary>
public const int MinPeriodRepeatCount = 3;

/// <summary>
/// the minimum count of regular gaps. when there are too few gaps, the time series will look odd, which will impact the seasonality analysis
/// </summary>
public const int MinRegularGap = 5;
}
}
99 changes: 99 additions & 0 deletions src/Microsoft.ML.TimeSeries/STL/FastLoess.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.ML.Runtime;

namespace Microsoft.ML.TimeSeries
{
/// <summary>
/// this is the fast version of Loess. there are several alternatives to improve the performance. this one is an approximation approach.
/// the smoothing is conducted on a sample set, and then the values on the left points are assigned directly.
/// </summary>
public class FastLoess
{
/// <summary>
/// this class is a sampling based method, so here specifies the sample size.
/// </summary>
private static int _sampleSize = 100;

private readonly IReadOnlyList<double> _x;
private readonly IReadOnlyList<double> _y;
private readonly int _length;

private readonly Loess _smoother;

/// <summary>
/// Initializes a new instance of the <see cref="FastLoess"/> class.
/// the fast version of the Loess method. when the time series is too long, the sampling will be conducted first
/// to improve the performance.
/// </summary>
/// <param name="xValues">the input x-axis values</param>
/// <param name="yValues">the input y-axis values</param>
/// <param name="isTemporal">if the regression is considered to take temporal information into account. in general, this is true if we are regressing a time series, and false if we are regressing scatter plot data</param>
/// <param name="r">this method will provide default smoothing ratio if user did not specify</param>
public FastLoess(IReadOnlyList<double> xValues, IReadOnlyList<double> yValues, bool isTemporal = true, int r = -1)
{
Contracts.CheckValue(xValues, nameof(xValues));
Contracts.CheckValue(yValues, nameof(yValues));
Y = new List<double>();

if (yValues.Count < BasicParameters.MinTimeSeriesLength)
throw new Exception("input data structure cannot be 0-length: lowess");

_x = xValues;
_y = yValues;
_length = _y.Count;

// the sampling is not neccessary
if (_length <= FastLoess._sampleSize)
{
if (r == -1)
_smoother = new Loess(_x, _y, isTemporal);
else
_smoother = new Loess(_x, _y, r, isTemporal);
}
else
{
// conduct sampling based strategy, to boost the performance.
double step = _length * 1.0 / FastLoess._sampleSize;
var sampleX = new double[FastLoess._sampleSize];
var sampleY = new double[FastLoess._sampleSize];
for (int i = 0; i < FastLoess._sampleSize; i++)
{
int index = (int)(i * step);
sampleX[i] = _x[index];
sampleY[i] = _y[index];
}
if (r == -1)
_smoother = new Loess(sampleX, sampleY, isTemporal);
else
_smoother = new Loess(sampleX, sampleY, r, isTemporal);
}
}

/// <summary>
/// the estimated y values. this is the very cool smoothing method.
/// </summary>
public List<double> Y { get; }

/// <summary>
/// assign the smoothing values to all the data points, not only on the sample size.
/// </summary>
public void Estimate()
{
for (int i = 0; i < _length; i++)
{
double yValue = _smoother.EstimateY(_x[i]);
Y.Add(yValue);
}
}

/// <summary>
/// estimate any y value by given any x value, event the x value is not one of the input points.
/// </summary>
public double EstimateY(double xValue)
{
return _smoother.EstimateY(xValue);
}
}
}
Loading