-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Add deseasonality in SrCnnEntireAnomalyDetect #5202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 14 commits
Commits
Show all changes
52 commits
Select commit
Hold shift + click to select a range
78961c7
add seasonality detect and stl decompose in srcnn
468d444
optimizations in performance
45b6971
Add parameter period in SrCnn interface, remove inner period detect l…
245731f
add periodical data
604084a
add test
7c6734f
Remove unused files
8ea07f6
further remove unused codes
b398fdb
remove unused functions
cce849d
update
10935c3
optimize WeightedRegression; clean code; add null checks
b524957
recover
d40440b
reduce file numbers
92c6aab
restore
f6e8566
move stl related codes to a subfolder
d4515ee
fix sln file
8778e5a
update code style
219decd
fix members initialization outside the constructor
e039cba
remove unused using
3553277
refactor InnerStl
8c693c9
use contract exception
37e7c6f
remove unused class
65fb7a5
update stl
d2e2653
remove unused usings
edc8a21
add readonly
7737a57
fix bug
2e6b0e5
Merge branch 'master' into dev/srcnn_deseasonality
8df11b5
Merge branch 'master' into dev/srcnn_deseasonality
guinao fbf3e0e
Merge branch 'dev/srcnn_deseasonality' of https://github.com/guinao/m…
86f64f7
add deseasonality
0d1a038
update deseasonality
c87ae95
update
b59660c
add options
5a93fa1
refine code style
1a7b071
refine code
b6fa553
update
192d782
updates
1b90669
remove max neighbor number constraint
92b0963
remove the max neightbor count constraint
59e7f2b
update SrCnnEntireDetectOptions, move input/output column name out; …
b38c123
refactor the constructor of Loess
d72d97b
remove unused imports
c58a45b
refactor and optimization
3939380
optimize
7e48255
unfold pow(x, 2) to x * x for performance optimization
c3f74d5
refactor polynomial model class and deseasonality functions, refine c…
816479a
refine
a239a89
update comment
1446115
updates
8b10cc0
update some wordings
a817829
update comments
8fac10e
update some comments
2cad48f
wording
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
111 changes: 111 additions & 0 deletions
111
src/Microsoft.ML.TimeSeries/STL/BackendStructureBase.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace Microsoft.ML.TimeSeries | ||
{ | ||
public enum TimeSeriesInfoKind | ||
{ | ||
/// <summary> | ||
/// (single-time series) the outliers of a single time series | ||
/// </summary> | ||
Outlier, | ||
|
||
/// <summary> | ||
/// (single-time series) the seasonal signal of a single time series | ||
/// </summary> | ||
Seasonal, | ||
|
||
/// <summary> | ||
/// (single-time series) the trend curve of a single time series | ||
/// </summary> | ||
Trend, | ||
|
||
/// <summary> | ||
/// (two-time series) the lead/lag correlation between two time series | ||
/// </summary> | ||
CrossCorrelation, | ||
|
||
/// <summary> | ||
/// (two-time series) the correlation of outliers from two time series | ||
/// </summary> | ||
OutlierCorrelation, | ||
|
||
/// <summary> | ||
/// (two-time series) the correlation of trends from two time series | ||
/// </summary> | ||
TrendCorrelation, | ||
} | ||
|
||
public abstract class TimeSeriesInfoBase | ||
{ | ||
/// <summary> | ||
/// each insight should be ranked, so that it can be compared with other insights. | ||
/// </summary> | ||
public abstract double Rank { get; set; } | ||
|
||
/// <summary> | ||
/// the description of this particular insight | ||
/// </summary> | ||
public abstract string Description { get; protected set; } | ||
|
||
/// <summary> | ||
/// indicate the kind of insight. | ||
/// </summary> | ||
public abstract TimeSeriesInfoKind Kind { get; protected set; } | ||
|
||
/// <summary> | ||
/// basic comparison function, used for quick sort. | ||
/// </summary> | ||
/// <param name="left">the left element</param> | ||
/// <param name="right">the right element</param> | ||
public static int Compare(TimeSeriesInfoBase left, TimeSeriesInfoBase right) | ||
{ | ||
if (object.ReferenceEquals(left, right)) | ||
return 0; | ||
if (left == null) | ||
return -1; | ||
if (right == null) | ||
return 1; | ||
return left.Rank.CompareTo(right.Rank); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// the characteristic of single time series. | ||
/// </summary> | ||
public abstract class SingleSeriesInfo : TimeSeriesInfoBase | ||
{ | ||
/// <summary> | ||
/// x-axis values of original curve | ||
/// </summary> | ||
public IReadOnlyList<double> X { get; protected set; } | ||
|
||
/// <summary> | ||
/// y-axis values of original curve | ||
/// </summary> | ||
public IReadOnlyList<double> Y { get; protected set; } | ||
} | ||
|
||
/// <summary> | ||
/// the characteristic between two time series, such as the lead/lag correlation, | ||
/// and the correlation of the outliers. | ||
/// </summary> | ||
public abstract class TwoSeriesInfo : TimeSeriesInfoBase | ||
{ | ||
/// <summary> | ||
/// x-axis values of two time series. they should be aligned. | ||
/// </summary> | ||
public IReadOnlyList<double> X { get; protected set; } | ||
|
||
/// <summary> | ||
/// y-axis values of first series | ||
/// </summary> | ||
public IReadOnlyList<double> Y1 { get; protected set; } | ||
|
||
/// <summary> | ||
/// y-axis values of second series | ||
/// </summary> | ||
public IReadOnlyList<double> Y2 { get; protected set; } | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
|
||
namespace Microsoft.ML.TimeSeries | ||
{ | ||
public class BasicParameters | ||
antoniovs1029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
/// <summary> | ||
/// the minimum length of a valid time series. a time series with length equals 2 is so trivial. when less than 2, meaningless. | ||
/// </summary> | ||
public const int MinTimeSeriesLength = 3; | ||
|
||
/// <summary> | ||
/// the maximum length of a valid time series. when there are too many data points, the chart will look so dense that details are lost. | ||
/// this number is tuned so that the bird strike data can still preserve results. | ||
/// </summary> | ||
public const int MaxTimeSeriesLength = 4000; | ||
|
||
/// <summary> | ||
/// the minimum count of repeated periods. this is used for determining a noticeable seasonal signal. | ||
/// </summary> | ||
public const int MinPeriodRepeatCount = 3; | ||
|
||
/// <summary> | ||
/// the minimum count of regular gaps. when there are too few gaps, the time series will look odd, which will impact the seasonality analysis | ||
/// </summary> | ||
public const int MinRegularGap = 5; | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Text; | ||
using Microsoft.ML.Runtime; | ||
|
||
namespace Microsoft.ML.TimeSeries | ||
{ | ||
/// <summary> | ||
/// this is the fast version of Loess. there are several alternatives to improve the performance. this one is an approximation approach. | ||
/// the smoothing is conducted on a sample set, and then the values on the left points are assigned directly. | ||
/// </summary> | ||
public class FastLoess | ||
antoniovs1029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
/// <summary> | ||
/// this class is a sampling based method, so here specifies the sample size. | ||
guinao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
/// </summary> | ||
private static int _sampleSize = 100; | ||
antoniovs1029 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
private readonly IReadOnlyList<double> _x; | ||
private readonly IReadOnlyList<double> _y; | ||
private readonly int _length; | ||
|
||
private readonly Loess _smoother; | ||
|
||
/// <summary> | ||
/// Initializes a new instance of the <see cref="FastLoess"/> class. | ||
/// the fast version of the Loess method. when the time series is too long, the sampling will be conducted first | ||
/// to improve the performance. | ||
/// </summary> | ||
/// <param name="xValues">the input x-axis values</param> | ||
/// <param name="yValues">the input y-axis values</param> | ||
/// <param name="isTemporal">if the regression is considered to take temporal information into account. in general, this is true if we are regressing a time series, and false if we are regressing scatter plot data</param> | ||
/// <param name="r">this method will provide default smoothing ratio if user did not specify</param> | ||
public FastLoess(IReadOnlyList<double> xValues, IReadOnlyList<double> yValues, bool isTemporal = true, int r = -1) | ||
{ | ||
Contracts.CheckValue(xValues, nameof(xValues)); | ||
Contracts.CheckValue(yValues, nameof(yValues)); | ||
Y = new List<double>(); | ||
|
||
if (yValues.Count < BasicParameters.MinTimeSeriesLength) | ||
throw new Exception("input data structure cannot be 0-length: lowess"); | ||
|
||
_x = xValues; | ||
_y = yValues; | ||
_length = _y.Count; | ||
|
||
// the sampling is not neccessary | ||
if (_length <= FastLoess._sampleSize) | ||
{ | ||
if (r == -1) | ||
_smoother = new Loess(_x, _y, isTemporal); | ||
else | ||
_smoother = new Loess(_x, _y, r, isTemporal); | ||
} | ||
else | ||
{ | ||
// conduct sampling based strategy, to boost the performance. | ||
double step = _length * 1.0 / FastLoess._sampleSize; | ||
var sampleX = new double[FastLoess._sampleSize]; | ||
var sampleY = new double[FastLoess._sampleSize]; | ||
for (int i = 0; i < FastLoess._sampleSize; i++) | ||
{ | ||
int index = (int)(i * step); | ||
sampleX[i] = _x[index]; | ||
sampleY[i] = _y[index]; | ||
} | ||
if (r == -1) | ||
_smoother = new Loess(sampleX, sampleY, isTemporal); | ||
else | ||
_smoother = new Loess(sampleX, sampleY, r, isTemporal); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// the estimated y values. this is the very cool smoothing method. | ||
/// </summary> | ||
public List<double> Y { get; } | ||
|
||
/// <summary> | ||
/// assign the smoothing values to all the data points, not only on the sample size. | ||
/// </summary> | ||
public void Estimate() | ||
{ | ||
for (int i = 0; i < _length; i++) | ||
{ | ||
double yValue = _smoother.EstimateY(_x[i]); | ||
Y.Add(yValue); | ||
} | ||
} | ||
|
||
/// <summary> | ||
/// estimate any y value by given any x value, event the x value is not one of the input points. | ||
/// </summary> | ||
guinao marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public double EstimateY(double xValue) | ||
{ | ||
return _smoother.EstimateY(xValue); | ||
} | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.