Skip to content

Commit 1c2469f

Browse files
authored
add threshold for RCA (#5218)
1 parent 3d11ce4 commit 1c2469f

File tree

4 files changed

+119
-49
lines changed

4 files changed

+119
-49
lines changed

src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,25 +187,28 @@ public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog
187187
/// It is used when score is calculated for each root cause item. The range of beta should be in [0,1].
188188
/// For a larger beta, root cause items which have a large difference between value and expected value will get a high score.
189189
/// For a small beta, root cause items which have a high relative change will get a low score.</param>
190+
/// <param name="rootCauseThreshold">A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
191+
/// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.</param>
190192
/// <example>
191193
/// <format type="text/markdown">
192194
/// <![CDATA[
193195
/// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs)]
194196
/// ]]>
195197
/// </format>
196198
/// </example>
197-
public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5)
199+
public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.3, double rootCauseThreshold = 0.95)
198200
{
199201
IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);
200202

201203
//check the root cause input
202204
CheckRootCauseInput(host, src);
203205

204-
//check beta
206+
//check parameters
205207
host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
208+
host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(beta), "Must be in [0,1]");
206209

207210
//find out the root cause
208-
RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta);
211+
RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold);
209212
RootCause dst = analyzer.Analyze();
210213
return dst;
211214
}

src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs

Lines changed: 89 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,17 @@ namespace Microsoft.ML.TimeSeries
1313
public class RootCauseAnalyzer
1414
{
1515
private static double _anomalyRatioThreshold = 0.5;
16-
private static double _anomalyDeltaThreshold = 0.95;
1716
private static double _anomalyPreDeltaThreshold = 2;
1817

1918
private RootCauseLocalizationInput _src;
2019
private double _beta;
20+
private double _rootCauseThreshold;
2121

22-
public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta)
22+
public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta, double rootCauseThreshold)
2323
{
2424
_src = src;
2525
_beta = beta;
26+
_rootCauseThreshold = rootCauseThreshold;
2627
}
2728

2829
public RootCause Analyze()
@@ -39,10 +40,10 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)
3940
dst.Items = new List<RootCauseItem>();
4041

4142
DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggregateSymbol);
42-
Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo);
43+
Tuple<PointTree, PointTree, Dictionary<Dictionary<string, object>, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo);
4344
PointTree pointTree = pointInfo.Item1;
4445
PointTree anomalyTree = pointInfo.Item2;
45-
Dictionary<string, TimeSeriesPoint> dimPointMapping = pointInfo.Item3;
46+
Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping = pointInfo.Item3;
4647

4748
//which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension
4849
if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0)
@@ -81,11 +82,12 @@ private DimensionInfo SeparateDimension(Dictionary<string, Object> dimensions, O
8182
return info;
8283
}
8384

84-
private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
85+
private Tuple<PointTree, PointTree, Dictionary<Dictionary<string, object>, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
8586
{
8687
PointTree pointTree = new PointTree();
8788
PointTree anomalyTree = new PointTree();
88-
Dictionary<string, TimeSeriesPoint> dimPointMapping = new Dictionary<string, TimeSeriesPoint>();
89+
DimensionComparer dc = new DimensionComparer();
90+
Dictionary<Dictionary<string, object>, TimeSeriesPoint> dimPointMapping = new Dictionary<Dictionary<string, object>, TimeSeriesPoint>(dc);
8991

9092
List<TimeSeriesPoint> totalPoints = GetTotalPointsForAnomalyTimestamp(src);
9193
Dictionary<string, Object> subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims);
@@ -94,9 +96,9 @@ private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPoin
9496
{
9597
if (ContainsAll(point.Dimension, subDim))
9698
{
97-
if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension)))
99+
if (!dimPointMapping.ContainsKey(point.Dimension))
98100
{
99-
dimPointMapping.Add(GetDicCode(point.Dimension), point);
101+
dimPointMapping.Add(point.Dimension, point);
100102
bool isValidPoint = point.IsAnomaly == true;
101103
if (ContainsAll(point.Dimension, subDim))
102104
{
@@ -111,7 +113,7 @@ private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPoin
111113
}
112114
}
113115

114-
return new Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping);
116+
return new Tuple<PointTree, PointTree, Dictionary<Dictionary<string, Object>, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping);
115117
}
116118

117119
protected Dictionary<string, Object> GetSubDim(Dictionary<string, Object> dimension, List<string> keyList)
@@ -327,7 +329,7 @@ private AnomalyDirection GetRootCauseDirection(TimeSeriesPoint rootCausePoint)
327329
}
328330
}
329331

330-
private void GetRootCauseDirectionAndScore(Dictionary<string, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
332+
private void GetRootCauseDirectionAndScore(Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
331333
{
332334
TimeSeriesPoint anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol);
333335
if (dst.Items.Count > 1)
@@ -378,11 +380,11 @@ private void GetRootCauseDirectionAndScore(Dictionary<string, TimeSeriesPoint> d
378380
}
379381
}
380382

381-
private TimeSeriesPoint GetPointByDimension(Dictionary<string, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
383+
private TimeSeriesPoint GetPointByDimension(Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
382384
{
383-
if (dimPointMapping.ContainsKey(GetDicCode(dimension)))
385+
if (dimPointMapping.ContainsKey(dimension))
384386
{
385-
return dimPointMapping[GetDicCode(dimension)];
387+
return dimPointMapping[dimension];
386388
}
387389

388390
int count = 0;
@@ -419,11 +421,6 @@ private TimeSeriesPoint GetPointByDimension(Dictionary<string, TimeSeriesPoint>
419421
}
420422
}
421423

422-
private static string GetDicCode(Dictionary<string, Object> dic)
423-
{
424-
return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray());
425-
}
426-
427424
private void BuildTree(PointTree tree, List<string> aggDims, TimeSeriesPoint point, Object aggSymbol)
428425
{
429426
int aggNum = 0;
@@ -476,22 +473,23 @@ private BestDimension FindBestDimension(SortedDictionary<BestDimension, double>
476473
bool isRatioNan = Double.IsNaN(valueRatioMap[best]);
477474
if (dimension.Key.AnomalyDis.Count > 1)
478475
{
479-
if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))
476+
if (best.AnomalyDis.Count != 1 && !isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
480477
{
481-
best = dimension.Key;
478+
best = GetBestDimension(best, dimension, valueRatioMap);
482479
}
483480
}
484-
else
481+
else if (dimension.Key.AnomalyDis.Count == 1)
485482
{
483+
486484
if (best.AnomalyDis.Count > 1)
487485
{
488486
best = dimension.Key;
489487
}
490-
else
488+
else if (best.AnomalyDis.Count == 1)
491489
{
492490
if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
493491
{
494-
best = dimension.Key;
492+
best = GetBestDimension(best, dimension, valueRatioMap);
495493
}
496494
}
497495
}
@@ -502,6 +500,22 @@ private BestDimension FindBestDimension(SortedDictionary<BestDimension, double>
502500
return best;
503501
}
504502

503+
private BestDimension GetBestDimension(BestDimension best, KeyValuePair<BestDimension, double> dimension, Dictionary<BestDimension, Double> valueRatioMap)
504+
{
505+
if (valueRatioMap[best].CompareTo(dimension.Value) == 0)
506+
{
507+
if (dimension.Key.AnomalyDis.Count != dimension.Key.PointDis.Count)
508+
{
509+
best = dimension.Key;
510+
}
511+
}
512+
else
513+
{
514+
best = dimension.Key;
515+
}
516+
return best;
517+
}
518+
505519
/// <summary>
506520
/// Calculate the surprise score according to root cause point and anomaly point
507521
/// </summary>
@@ -569,6 +583,10 @@ private double GetFinalScore(double surprise, double ep, double beta)
569583
else
570584
{
571585
a = (1 - Math.Pow(2, -surprise));
586+
if (Double.IsNaN(a))
587+
{
588+
a = 1;
589+
}
572590
b = (1 - Math.Pow(2, -ep));
573591
}
574592

@@ -593,7 +611,7 @@ private static Dictionary<string, Object> UpdateDimensionValue(Dictionary<string
593611

594612
private bool StopAnomalyComparison(double preTotal, double parent, double current, double pre)
595613
{
596-
if (Math.Abs(preTotal) < Math.Abs(parent) * _anomalyDeltaThreshold)
614+
if (Math.Abs(preTotal) < Math.Abs(parent) * _rootCauseThreshold)
597615
{
598616
return false;
599617
}
@@ -603,7 +621,7 @@ private bool StopAnomalyComparison(double preTotal, double parent, double curren
603621

604622
private bool ShouldSeparateAnomaly(double total, double parent, int totalSize, int size)
605623
{
606-
if (Math.Abs(total) < Math.Abs(parent) * _anomalyDeltaThreshold)
624+
if (Math.Abs(total) < Math.Abs(parent) * _rootCauseThreshold)
607625
{
608626
return false;
609627
}
@@ -657,7 +675,7 @@ private void UpdateDistribution(Dictionary<string, int> distribution, List<TimeS
657675
{
658676
foreach (TimeSeriesPoint point in points)
659677
{
660-
string dimVal = (string)point.Dimension[dimKey];
678+
string dimVal = Convert.ToString(point.Dimension[dimKey]);
661679
if (!distribution.ContainsKey(dimVal))
662680
{
663681
distribution.Add(dimVal, 0);
@@ -684,7 +702,7 @@ private static bool ContainsAll(Dictionary<string, Object> bigDictionary, Dictio
684702

685703
private bool IsAggregationDimension(Object val, Object aggSymbol)
686704
{
687-
return val.Equals(aggSymbol);
705+
return Convert.ToString(val).Equals(aggSymbol);
688706
}
689707
}
690708

@@ -748,4 +766,47 @@ public RootCauseScore(double surprise, double explanatoryScore)
748766
ExplanatoryScore = explanatoryScore;
749767
}
750768
}
751-
}
769+
770+
internal class DimensionComparer : EqualityComparer<Dictionary<string, object>>
771+
{
772+
public override bool Equals(Dictionary<string, object> x, Dictionary<string, object> y)
773+
{
774+
if (x == null && y == null)
775+
{
776+
return true;
777+
}
778+
if ((x == null && y != null) || (x != null && y == null))
779+
{
780+
return false;
781+
}
782+
if (x.Count != y.Count)
783+
{
784+
return false;
785+
}
786+
if (x.Keys.Except(y.Keys).Any())
787+
{
788+
return false;
789+
}
790+
if (y.Keys.Except(x.Keys).Any())
791+
{
792+
return false;
793+
}
794+
foreach (var pair in x)
795+
{
796+
if (!pair.Value.Equals(y[pair.Key]))
797+
{
798+
return false;
799+
}
800+
}
801+
return true;
802+
}
803+
804+
public override int GetHashCode(Dictionary<string, object> obj)
805+
{
806+
int code = 0;
807+
foreach (KeyValuePair<string, object> pair in obj)
808+
code = code ^ pair.GetHashCode();
809+
return code;
810+
}
811+
}
812+
}

0 commit comments

Comments
 (0)