Skip to content

Commit 3d3d45c

Browse files
Fix issue in SRCnnEntireAnomalyDetector (#5579)
* update * refine codes * update comments * update for nit Co-authored-by: [email protected] <[email protected]>
1 parent 335330d commit 3d3d45c

File tree

3 files changed

+221
-4
lines changed

3 files changed

+221
-4
lines changed

src/Microsoft.ML.TimeSeries/SrCnnEntireAnomalyDetector.cs

+30-3
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ internal sealed class SrCnnEntireModeler
362362
private static readonly double _unitForZero = 0.3;
363363
private static readonly double _minimumScore = 0.0;
364364
private static readonly double _maximumScore = 1.0;
365+
// Use this threshold to correct false anomalies
366+
private static readonly double _zscoreThreshold = 1.5;
367+
365368
// If the score window is smaller than this value, the anomaly score is tend to be small.
366369
// Proof: For each point, the SR anomaly score is calculated as (w is average window size):
367370
// (mag - avg_mag) / avg_mag
@@ -426,6 +429,8 @@ internal sealed class SrCnnEntireModeler
426429
//used in all modes
427430
private double _minimumOriginValue;
428431
private double _maximumOriginValue;
432+
private double _std;
433+
private double _mean;
429434
private readonly double[] _predictArray;
430435
private double[] _backAddArray;
431436
private double[] _fftRe;
@@ -491,14 +496,23 @@ public void Train(double[] values, ref double[][] results)
491496
_minimumOriginValue = Double.MaxValue;
492497
_maximumOriginValue = Double.MinValue;
493498

499+
var sum = 0.0;
500+
var squareSum = 0.0;
501+
494502
Array.Resize(ref _seriesToDetect, values.Length);
495503
for (int i = 0; i < values.Length; ++i)
496504
{
497-
_seriesToDetect[i] = values[i];
498-
_minimumOriginValue = Math.Min(_minimumOriginValue, values[i]);
499-
_maximumOriginValue = Math.Max(_maximumOriginValue, values[i]);
505+
var value = values[i];
506+
_seriesToDetect[i] = value;
507+
_minimumOriginValue = Math.Min(_minimumOriginValue, value);
508+
_maximumOriginValue = Math.Max(_maximumOriginValue, value);
509+
sum += value;
510+
squareSum += value * value;
500511
}
501512

513+
_mean = sum / values.Length;
514+
_std = Math.Sqrt((squareSum - (sum * sum) / values.Length) / values.Length);
515+
502516
if (_period > 0)
503517
{
504518
_deseasonalityFunction.Deseasonality(ref values, _period, ref _seriesToDetect);
@@ -612,9 +626,22 @@ private void SpectralResidual(double[] values, double[][] results, double thresh
612626

613627
var detres = score > threshold ? 1 : 0;
614628

629+
// Anomalies correction by zscore
630+
if (detres > 0)
631+
{
632+
// Use zscore to filter out those false anomalies that lie within 1.5 sigma region.
633+
var zscore = Math.Abs(values[i] - _mean) / _std;
634+
if (_std < _eps || zscore < _zscoreThreshold)
635+
{
636+
detres = 0;
637+
score = 0.0;
638+
}
639+
}
640+
615641
results[i][0] = detres;
616642
results[i][1] = score;
617643
results[i][2] = _ifftMagList[i];
644+
618645
}
619646
}
620647

test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs

+55-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44

55
using System;
66
using System.Collections.Generic;
7-
using System.Data;
87
using System.IO;
98
using System.Linq;
109
using Microsoft.ML.Data;
@@ -717,6 +716,61 @@ public void TestSrCnnAnomalyDetectorWithSeasonalData(
717716
}
718717
}
719718

719+
[Theory, CombinatorialData]
720+
public void TestSrCnnAnomalyDetectorBigSpike(
721+
[CombinatorialValues(SrCnnDetectMode.AnomalyOnly, SrCnnDetectMode.AnomalyAndExpectedValue, SrCnnDetectMode.AnomalyOnly)] SrCnnDetectMode mode
722+
)
723+
{
724+
var ml = new MLContext(1);
725+
IDataView dataView;
726+
List<TimeSeriesDataDouble> data;
727+
728+
var dataPath = GetDataPath("Timeseries", "big_spike_data.csv");
729+
730+
// Load data from file into the dataView
731+
dataView = ml.Data.LoadFromTextFile<TimeSeriesDataDouble>(dataPath, hasHeader: true);
732+
data = ml.Data.CreateEnumerable<TimeSeriesDataDouble>(dataView, reuseRowObject: false).ToList();
733+
734+
// Setup the detection arguments
735+
string outputColumnName = nameof(SrCnnAnomalyDetection.Prediction);
736+
string inputColumnName = nameof(TimeSeriesDataDouble.Value);
737+
738+
// Do batch anomaly detection
739+
var options = new SrCnnEntireAnomalyDetectorOptions()
740+
{
741+
Threshold = 0.3,
742+
BatchSize = -1,
743+
Sensitivity = 80.0,
744+
DetectMode = mode,
745+
Period = 0,
746+
DeseasonalityMode = SrCnnDeseasonalityMode.Stl
747+
};
748+
749+
var outputDataView = ml.AnomalyDetection.DetectEntireAnomalyBySrCnn(dataView, outputColumnName, inputColumnName, options);
750+
751+
// Getting the data of the newly created column as an IEnumerable of SrCnnAnomalyDetection.
752+
var predictionColumn = ml.Data.CreateEnumerable<SrCnnAnomalyDetection>(
753+
outputDataView, reuseRowObject: false);
754+
755+
var anomalyIndex = 26;
756+
757+
int k = 0;
758+
foreach (var prediction in predictionColumn)
759+
{
760+
if (anomalyIndex == k)
761+
{
762+
Assert.Equal(1, prediction.Prediction[0]);
763+
}
764+
else
765+
{
766+
Assert.Equal(0, prediction.Prediction[0]);
767+
}
768+
769+
++k;
770+
}
771+
772+
}
773+
720774
[Theory, CombinatorialData]
721775
public void TestSrCnnAnomalyDetectorWithSeasonalAnomalyData(
722776
[CombinatorialValues(SrCnnDeseasonalityMode.Stl, SrCnnDeseasonalityMode.Mean, SrCnnDeseasonalityMode.Median)] SrCnnDeseasonalityMode mode
+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
Value
2+
0.333061106
3+
2.198203303
4+
1.705836778
5+
1.861708215
6+
1.085050871
7+
0.548409541
8+
0.365537211
9+
0.433823922
10+
0.450379649
11+
0.485662867
12+
0.59162219
13+
0.678494031
14+
0.735315015
15+
0.780228908
16+
0.779309892
17+
0.71637311
18+
0.783369345
19+
0.829129842
20+
0.769519564
21+
0.74230352
22+
0.914116686
23+
0.970162226
24+
0.964537878
25+
0.983059421
26+
1.009637074
27+
1.054769667
28+
48232.24413
29+
4739.675242
30+
4963.982698
31+
8555.732913
32+
75.25537709
33+
11.2742621
34+
4.388301951
35+
2.584960796
36+
2.273629928
37+
1.972334276
38+
1.811987528
39+
1.854365004
40+
1.581860355
41+
1.478895939
42+
1.447799312
43+
1.406460886
44+
1.333295368
45+
1.282260475
46+
1.345933543
47+
1.264431234
48+
1.235222153
49+
1.204307109
50+
1.133533648
51+
1.110515351
52+
1.017397262
53+
1.103902775
54+
1.099039227
55+
1.061479438
56+
1.063725177
57+
1.072777829
58+
1.044107263
59+
0.981847451
60+
1.038324454
61+
1.033883341
62+
1.004416487
63+
1.017918007
64+
0.345233269
65+
1.092365812
66+
1.078005286
67+
1.033142227
68+
1.024832225
69+
1.098672969
70+
1.092767871
71+
1.095272293
72+
1.139357768
73+
1.0711793
74+
1.119012071
75+
1.11906761
76+
1.131538563
77+
1.113967769
78+
1.141610905
79+
1.14317559
80+
1.108130866
81+
1.083645413
82+
1.147460394
83+
1.177086603
84+
1.153490106
85+
1.145660569
86+
1.132464809
87+
1.106364602
88+
1.003350151
89+
1.099011524
90+
1.109557478
91+
1.065336146
92+
1.081590334
93+
1.075768021
94+
0.986278889
95+
1.001219623
96+
1.080312553
97+
1.075076345
98+
1.057146027
99+
1.106862867
100+
1.084433852
101+
0.975639541
102+
0.944182773
103+
1.088712253
104+
1.067152572
105+
1.107507855
106+
1.069142173
107+
1.036247939
108+
0.995907308
109+
0.932153379
110+
1.074865283
111+
1.065780376
112+
1.05063751
113+
1.077263172
114+
1.033459106
115+
0.985960758
116+
0.981842413
117+
1.032862035
118+
1.005063722
119+
0.862145269
120+
0.491629016
121+
0.473904777
122+
0.777874357
123+
0.945595834
124+
1.020180047
125+
1.025171701
126+
1.031632464
127+
1.02571454
128+
0.950313827
129+
0.935412116
130+
0.991591559
131+
1.013279894
132+
0.991734823
133+
1.007466737
134+
1.019160801
135+
0.919227208
136+
0.977617794

0 commit comments

Comments
 (0)