diff --git a/docs/api-reference/time-series-root-cause-localization.md b/docs/api-reference/time-series-root-cause-localization.md new file mode 100644 index 0000000000..3eecc838e2 --- /dev/null +++ b/docs/api-reference/time-series-root-cause-localization.md @@ -0,0 +1,49 @@ +At Mircosoft, we develop a decision tree based root cause localization method which helps to find out the root causes for an anomaly incident at a specific timestamp incrementally. + +## Multi-Dimensional Root Cause Localization +It's a common case that one measure is collected with many dimensions (*e.g.*, Province, ISP) whose values are categorical(*e.g.*, Beijing or Shanghai for dimension Province). When a measure's value deviates from its expected value, this measure encounters anomalies. In such case, operators would like to localize the root cause dimension combinations rapidly and accurately. Multi-dimensional root cause localization is critical to troubleshoot and mitigate such case. + +## Algorithm + +The decision tree based root cause localization method is unsupervised, which means training step is not needed. It consists of the following major steps: + +(1) Find the best dimension which divides the anomalous and regular data based on decision tree according to entropy gain and entropy gain ratio. + +(2) Find the top anomaly points which contribute the most to anomaly incident given the selected best dimension. + +### Decision Tree + +[Decision tree](https://en.wikipedia.org/wiki/Decision_tree) algorithm chooses the highest information gain to split or construct a decision tree.  We use it to choose the dimension which contributes the most to the anomaly. Following are some concepts used in decision tree. + +#### Information Entropy + +Information [entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) is a measure of disorder or uncertainty. You can think of it as a measure of purity as well. The less the value , the more pure of data D. + +$$Ent(D) = - \sum_{k=1}^{|y|} p_k\log_2(p_k) $$ + +where $p_k$ represents the probability of an element in dataset. In our case, there are only two classes, the anomalous points and the regular points. $|y|$ is the count of total anomalies. + +#### Information Gain +[Information gain](https://en.wikipedia.org/wiki/Information_gain_in_decision_trees) is a metric to measure the reduction of this disorder in our target class given additional information about it. Mathematically it can be written as: + +$$Gain(D, a) = Ent(D) - \sum_{v=1}^{|V|} \frac{|D^V|}{|D |} Ent(D^v) $$ + +Where $Ent(D^v)$ is the entropy of set points in D for which dimension $a$ is equal to $v$, $|D|$ is the total number of points in dataset $D$. $|D^V|$ is the total number of points in dataset $D$ for which dimension $a$ is equal to $v$. + +For all aggregated dimensions, we calculate the information for each dimension. The greater the reduction in this uncertainty, the more information is gained about D from dimension $a$. + +#### Entropy Gain Ratio + +Information gain is biased toward variables with large number of distinct values. A modification is [information gain ratio](https://en.wikipedia.org/wiki/Information_gain_ratio), which reduces its bias. + +$$Ratio(D, a) = \frac{Gain(D,a)} {IV(a)} $$ + +where intrinsic value($IV$) is the entropy of split (with respect to dimension $a$ on focus). + +$$IV(a) = -\sum_{v=1}^V\frac{|D^v|} {|D|} \log_2 \frac{|D^v|} {|D|} $$ + +In our strategy, firstly, for all the aggregated dimensions, we loop the dimension to find the dimension whose entropy gain is above mean entropy gain, then from the filtered dimensions, we select the dimension with highest entropy ratio as the best dimension. In the meanwhile, dimensions for which the anomaly value count is only one, we include it when calculation. + +> [!Note] +> 1. As our algorithm depends on the data you input, so if the input points is incorrect or incomplete, the calculated result will be unexpected. +> 2. Currently, the algorithm localize the root cause incrementally, which means at most one dimension with the values are detected. If you want to find out all the dimensions that contribute to the anomaly, you can call this API recursively by updating the anomaly incident with the fixed dimension value. diff --git a/docs/api-reference/time-series-root-cause-surprise-score.md b/docs/api-reference/time-series-root-cause-surprise-score.md new file mode 100644 index 0000000000..b3b5ccb339 --- /dev/null +++ b/docs/api-reference/time-series-root-cause-surprise-score.md @@ -0,0 +1,6 @@ +Surprise score is used to capture the relative change for the root cause item. +$$S_i(m) = 0.5( p_i\log_2(\frac{2p_i} {p_i+q_i}) + q_i \log_2(\frac{2q_i}{p_i+q_i}) )$$ +$$p_i(m)= \frac{F_i(m)} {F(m)} $$ +$$q_i(m)= \frac{A_i(m)} {A(m)} $$ +where $F_i$ is the forecasted value for root cause item $i$, $A_i$ is the actual value for root cause item $i$, $F$ is the forecasted value for the anomly point and $A$ is the actual value for anomaly point. +For details of the surprise score, refer to [this document](https://www.usenix.org/system/files/conference/nsdi14/nsdi14-paper-bhagwan.pdf) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs new file mode 100644 index 0000000000..3879159491 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs @@ -0,0 +1,113 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.TimeSeries; + +namespace Samples.Dynamic +{ + public static class LocalizeRootCause + { + private static string AGG_SYMBOL = "##SUM##"; + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. + var mlContext = new MLContext(); + + // Create an root cause localization input instance. + DateTime timestamp = GetTimestamp(); + var data = new RootCauseLocalizationInput(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); + + // Get the root cause localization result. + RootCause prediction = mlContext.AnomalyDetection.LocalizeRootCause(data); + + // Print the localization result. + int count = 0; + foreach (RootCauseItem item in prediction.Items) + { + count++; + Console.WriteLine($"Root cause item #{count} ..."); + Console.WriteLine($"Score: {item.Score}, Path: {String.Join(" ",item.Path)}, Direction: {item.Direction}, Dimension:{String.Join(" ", item.Dimension)}"); + } + + //Item #1 ... + //Score: 0.26670448876705927, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] + } + + private static List GetPoints() + { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country", "UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", AGG_SYMBOL); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", AGG_SYMBOL); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", AGG_SYMBOL); + dic7.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetAnomalyDimension() + { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", AGG_SYMBOL); + dim.Add("DataCenter", AGG_SYMBOL); + + return dim; + } + + private static DateTime GetTimestamp() + { + return new DateTime(2020, 3, 23, 0, 0, 0); + } + } +} \ No newline at end of file diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 03a507195e..2cc2aa3b0a 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,7 +2,11 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; +using System.Reflection; using Microsoft.ML.Data; +using Microsoft.ML.Runtime; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; namespace Microsoft.ML @@ -143,9 +147,53 @@ public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, /// /// public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, - int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) + int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); + /// + /// Create , which localizes root causes using decision tree algorithm. + /// + /// The anomaly detection catalog. + /// Root cause's input. The data is an instance of . + /// Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score. + /// + /// + /// + /// + /// + public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5) + { + IHostEnvironment host = CatalogUtils.GetEnvironment(catalog); + + //check the root cause input + CheckRootCauseInput(host, src); + + //check beta + host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]"); + + //find out the root cause + RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta); + RootCause dst = analyzer.Analyze(); + return dst; + } + + private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src) + { + host.CheckUserArg(src.Slices.Count >= 1, nameof(src.Slices), "Must has more than one item"); + + bool containsAnomalyTimestamp = false; + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + containsAnomalyTimestamp = true; + } + } + host.CheckUserArg(containsAnomalyTimestamp, nameof(src.Slices), "Has no points in the given anomaly timestamp"); + } + /// /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs new file mode 100644 index 0000000000..9eaaeb630d --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -0,0 +1,751 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Runtime.CompilerServices; +using Microsoft.ML.Internal.Utilities; + +namespace Microsoft.ML.TimeSeries +{ + public class RootCauseAnalyzer + { + private static double _anomalyRatioThreshold = 0.5; + private static double _anomalyDeltaThreshold = 0.95; + private static double _anomalyPreDeltaThreshold = 2; + + private RootCauseLocalizationInput _src; + private double _beta; + + public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta) + { + _src = src; + _beta = beta; + } + + public RootCause Analyze() + { + return AnalyzeOneLayer(_src); + } + + /// + /// This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. + /// + private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) + { + RootCause dst = new RootCause(); + dst.Items = new List(); + + DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggSymbol); + Tuple> pointInfo = GetPointsInfo(src, dimensionInfo); + PointTree pointTree = pointInfo.Item1; + PointTree anomalyTree = pointInfo.Item2; + Dictionary dimPointMapping = pointInfo.Item3; + + //which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension + if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0) + { + return dst; + } + + dst.Items.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, src.AnomalyDimension, dimensionInfo.AggDims)); + GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta, pointTree, src.AggType, src.AggSymbol); + + return dst; + } + + protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) + { + MetricSlice slice = src.Slices.Single(slice => slice.TimeStamp.Equals(src.AnomalyTimestamp)); + return slice.Points; + } + + private DimensionInfo SeparateDimension(Dictionary dimensions, Object aggSymbol) + { + DimensionInfo info = new DimensionInfo(); + foreach (KeyValuePair entry in dimensions) + { + string key = entry.Key; + if (aggSymbol.Equals(entry.Value)) + { + info.AggDims.Add(key); + } + else + { + info.DetailDims.Add(key); + } + } + + return info; + } + + private Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) + { + PointTree pointTree = new PointTree(); + PointTree anomalyTree = new PointTree(); + Dictionary dimPointMapping = new Dictionary(); + + List totalPoints = GetTotalPointsForAnomalyTimestamp(src); + Dictionary subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims); + + foreach (Point point in totalPoints) + { + if (ContainsAll(point.Dimension, subDim)) + { + if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension))) + { + dimPointMapping.Add(GetDicCode(point.Dimension), point); + bool isValidPoint = point.IsAnomaly == true; + if (ContainsAll(point.Dimension, subDim)) + { + BuildTree(pointTree, dimensionInfo.AggDims, point, src.AggSymbol); + + if (isValidPoint) + { + BuildTree(anomalyTree, dimensionInfo.AggDims, point, src.AggSymbol); + } + } + } + } + } + + return new Tuple>(pointTree, anomalyTree, dimPointMapping); + } + + protected Dictionary GetSubDim(Dictionary dimension, List keyList) + { + return new Dictionary(keyList.Select(dim => new KeyValuePair(dim, dimension[dim])).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)); + } + + private List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) + { + BestDimension best = null; + if (anomalyTree.ChildrenNodes.Count == 0) + { + //has no children node information, should use the leaves node(whose point has no aggrgated dimensions) information + best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims); + } + else + { + //has no leaves information, should calculate the entropy information according to the children nodes + best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, aggDims); + } + + if (best == null) + { + return new List() { new RootCauseItem(anomalyDimension) }; + } + + List children = null; + if (anomalyTree.ChildrenNodes.ContainsKey(best.DimensionKey)) + { + //Use children node information to get top anomalies + children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, pointTree.ChildrenNodes[best.DimensionKey].Count > 0 ? pointTree.ChildrenNodes[best.DimensionKey] : pointTree.Leaves, best.DimensionKey, !(pointTree.ChildrenNodes[best.DimensionKey].Count > 0)); + } + else + { + //Use leaves node informatin to get top anomalies + children = GetTopAnomaly(anomalyTree.Leaves, anomalyTree.ParentNode, pointTree.Leaves, best.DimensionKey, true); + } + + if (children == null) + { + //As the cause couldn't be found, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension) }; + } + else + { + List causes = new List(); + // For the found causes, we return the result + foreach (Point anomaly in children) + { + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimension[best.DimensionKey]), new List() { best.DimensionKey })); + } + return causes; + } + } + + protected double GetEntropy(int totalNum, int anomalyNum) + { + double ratio = (double)anomalyNum / totalNum; + if (ratio == 0 || ratio == 1) + { + return 0; + } + + return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); + } + + protected List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey, bool isLeaveslevel = false) + { + Dictionary pointDistribution = new Dictionary(); + UpdateDistribution(pointDistribution, totalPoints, dimKey); + + anomalyPoints = anomalyPoints.OrderBy(x => x.Delta).ToList(); + + if (root.Delta > 0) + { + anomalyPoints.Reverse(); + } + else + { + anomalyPoints = anomalyPoints.FindAll(x => x.Delta < 0); + } + if (anomalyPoints.Count == 1) + { + return anomalyPoints; + } + + double delta = 0; + double preDelta = 0; + List causeList = new List(); + foreach (Point anomaly in anomalyPoints) + { + if (StopAnomalyComparison(delta, root.Delta, anomaly.Delta, preDelta)) + { + break; + } + + delta += anomaly.Delta; + causeList.Add(anomaly); + preDelta = anomaly.Delta; + } + + int pointSize = isLeaveslevel ? pointDistribution.Count : GetTotalNumber(pointDistribution); + if (ShouldSeparateAnomaly(delta, root.Delta, pointSize, causeList.Count)) + { + return causeList; + } + + return null; + } + + /// + /// Use leaves point information to select best dimension + /// + protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim) + { + double totalEntropy = GetEntropy(totalPoints.Count, anomalyPoints.Count); + SortedDictionary entroyGainMap = new SortedDictionary(); + Dictionary entroyGainRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (string dimKey in aggDim) + { + BestDimension dimension = new BestDimension(); + dimension.DimensionKey = dimKey; + + UpdateDistribution(dimension.PointDis, totalPoints, dimKey); + UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey); + + double relativeEntropy = GetDimensionEntropy(dimension.PointDis, dimension.AnomalyDis); + double gain = totalEntropy - relativeEntropy; + if (Double.IsNaN(gain)) + { + gain = 0; + } + entroyGainMap.Add(dimension, gain); + + double gainRatio = gain / GetDimensionIntrinsicValue(dimension.PointDis); + if (Double.IsInfinity(gainRatio)) + { + gainRatio = 0; + } + entroyGainRatioMap.Add(dimension, gainRatio); + + sumGain += gain; + } + + double meanGain = sumGain / aggDim.Count(); + + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + return best; + } + + /// + /// Use children point information to select best dimension + /// + private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim) + { + SortedDictionary entropyMap = new SortedDictionary(); + Dictionary entropyRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (string dimKey in aggDim) + { + BestDimension dimension = new BestDimension(); + dimension.DimensionKey = dimKey; + + if (pointChildren.ContainsKey(dimKey)) + { + UpdateDistribution(dimension.PointDis, pointChildren[dimKey], dimKey); + } + if (anomalyChildren.ContainsKey(dimKey)) + { + UpdateDistribution(dimension.AnomalyDis, anomalyChildren[dimKey], dimKey); + } + + double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count); + if (Double.IsNaN(entropy)) + { + entropy = Double.MaxValue; + } + entropyMap.Add(dimension, entropy); + + double gainRatio = entropy / GetDimensionIntrinsicValue(dimension.PointDis); + + if (Double.IsInfinity(gainRatio)) + { + gainRatio = 0; + } + entropyRatioMap.Add(dimension, gainRatio); + + sumGain += entropy; + } + + double meanGain = sumGain / aggDim.Count; + + BestDimension best = FindBestDimension(entropyMap, entropyRatioMap, meanGain, false); + return best; + } + + private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) + { + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + return AnomalyDirection.Up; + } + else if (rootCausePoint.ExpectedValue > rootCausePoint.Value) + { + return AnomalyDirection.Down; + } + else + { + return AnomalyDirection.Same; + } + } + + private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol) + { + Point anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); + if (dst.Items.Count > 1) + { + //get surprise value and explanatory power value + List scoreList = new List(); + + foreach (RootCauseItem item in dst.Items) + { + Point rootCausePoint = GetPointByDimension(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); + if (anomalyPoint != null && rootCausePoint != null) + { + Tuple scores = GetSurpriseAndExplanatoryScore(rootCausePoint, anomalyPoint); + scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); + item.Direction = GetRootCauseDirection(rootCausePoint); + } + } + + //get final score + for (int i = 0; i < scoreList.Count; i++) + { + if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) + { + dst.Items[i].Score = 1; + } + else + { + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplanatoryScore), beta); + } + } + } + else if (dst.Items.Count == 1) + { + Point rootCausePoint = GetPointByDimension(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); + if (anomalyPoint != null && rootCausePoint != null) + { + Tuple scores = GetSurpriseAndExplanatoryScore(rootCausePoint, anomalyPoint); + if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) + { + dst.Items[0].Score = 1; + } + else + { + dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); + } + dst.Items[0].Direction = GetRootCauseDirection(rootCausePoint); + } + } + } + + private Point GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol) + { + if (dimPointMapping.ContainsKey(GetDicCode(dimension))) + { + return dimPointMapping[GetDicCode(dimension)]; + } + + int count = 0; + Point p = new Point(dimension); + DimensionInfo dimensionInfo = SeparateDimension(dimension, aggSymbol); + Dictionary subDim = GetSubDim(dimension, dimensionInfo.DetailDims); + + foreach (Point leave in pointTree.Leaves) + { + if (ContainsAll(leave.Dimension, subDim)) + { + count++; + + p.Value = +leave.Value; + p.ExpectedValue = +leave.ExpectedValue; + p.Delta = +leave.Delta; + } + + } + if (aggType.Equals(AggregateType.Avg)) + { + p.Value = p.Value / count; + p.ExpectedValue = p.ExpectedValue / count; + p.Delta = p.Delta / count; + } + + if (count > 0) + { + return p; + } + else + { + return null; + } + } + + private static string GetDicCode(Dictionary dic) + { + return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray()); + } + + private void BuildTree(PointTree tree, List aggDims, Point point, Object aggSymbol) + { + int aggNum = 0; + string nextDim = null; + + foreach (string dim in aggDims) + { + if (IsAggregationDimension(point.Dimension[dim], aggSymbol)) + { + aggNum++; + } + else + { + nextDim = dim; + } + } + + if (aggNum == aggDims.Count) + { + tree.ParentNode = point; + } + else if (aggNum == aggDims.Count - 1) + { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) + { + tree.ChildrenNodes.Add(nextDim, new List()); + } + tree.ChildrenNodes[nextDim].Add(point); + } + + if (aggNum == 0) + { + tree.Leaves.Add(point); + } + } + + private BestDimension FindBestDimension(SortedDictionary valueMap, Dictionary valueRatioMap, double meanGain, bool isLeavesLevel = true) + { + BestDimension best = null; + foreach (KeyValuePair dimension in valueMap) + { + if (dimension.Key.AnomalyDis.Count == 1 || (isLeavesLevel ? dimension.Value >= meanGain : dimension.Value <= meanGain)) + { + if (best == null) + { + best = dimension.Key; + } + else + { + bool isRatioNan = Double.IsNaN(valueRatioMap[best]); + if (dimension.Key.AnomalyDis.Count > 1) + { + if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))) + { + best = dimension.Key; + } + } + else + { + if (best.AnomalyDis.Count > 1) + { + best = dimension.Key; + } + else + { + if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) + { + best = dimension.Key; + } + } + } + } + } + } + + return best; + } + + /// + /// Calculate the surprise score according to root cause point and anomaly point + /// + /// A point which has been detected as root cause + /// The anomaly point + /// + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-surprise-score.md)] + /// + /// + /// Surprise score + private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + { + double p; + double q; + + if (anomalyPoint.ExpectedValue == 0) + { + p = 0; + } + else + { + p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + } + + if (anomalyPoint.Value == 0) + { + q = 0; + } + else + { + q = rootCausePoint.Value / anomalyPoint.Value; + } + + double surprise = 0; + + if (p == 0) + { + surprise = 0.5 * (q * Log2(2 * q / (p + q))); + } + else if (q == 0) + { + surprise = 0.5 * (p * Log2(2 * p / (p + q))); + } + else + { + surprise = 0.5 * (p * Log2(2 * p / (p + q)) + q * Log2(2 * q / (p + q))); + } + + return surprise; + } + + private double GetFinalScore(double surprise, double ep, double beta) + { + double a = 0; + double b = 0; + if (surprise == 0) + { + a = 0; + } + if (ep == 0) + { + b = 0; + } + else + { + a = (1 - Math.Pow(2, -surprise)); + b = (1 - Math.Pow(2, -ep)); + } + + return beta * a + (1 - beta) * b; + } + + private Tuple GetSurpriseAndExplanatoryScore(Point rootCausePoint, Point anomalyPoint) + { + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); + + double ep = anomalyPoint.Value - anomalyPoint.ExpectedValue == 0 ? 0 : Math.Abs((rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue)); + + return new Tuple(surprise, ep); + } + + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, Object value) + { + Dictionary newDim = new Dictionary(dimension); + newDim[key] = value; + return newDim; + } + + private bool StopAnomalyComparison(double preTotal, double parent, double current, double pre) + { + if (Math.Abs(preTotal) < Math.Abs(parent) * _anomalyDeltaThreshold) + { + return false; + } + + return Math.Abs(pre) / Math.Abs(current) > _anomalyPreDeltaThreshold; + } + + private bool ShouldSeparateAnomaly(double total, double parent, int totalSize, int size) + { + if (Math.Abs(total) < Math.Abs(parent) * _anomalyDeltaThreshold) + { + return false; + } + + if (size == totalSize && size == 1) + { + return true; + } + + return size <= totalSize * _anomalyRatioThreshold; + } + + private double GetDimensionEntropy(Dictionary pointDis, Dictionary anomalyDis) + { + int total = GetTotalNumber(pointDis); + double entropy = 0; + + foreach (string key in anomalyDis.Keys) + { + double dimEntropy = GetEntropy(pointDis[key], anomalyDis[key]); + entropy += dimEntropy * pointDis[key] / total; + } + + return entropy; + } + + private double GetDimensionIntrinsicValue(Dictionary pointDis) + { + int total = GetTotalNumber(pointDis); + double intrinsicValue = 0; + + foreach (string key in pointDis.Keys) + { + intrinsicValue -= Log2((double)pointDis[key] / total) * (double)pointDis[key] / total; + } + + return intrinsicValue; + } + + private int GetTotalNumber(Dictionary distribution) + { + int total = 0; + foreach (int num in distribution.Values) + { + total += num; + } + return total; + } + + private void UpdateDistribution(Dictionary distribution, List points, string dimKey) + { + foreach (Point point in points) + { + string dimVal = (string)point.Dimension[dimKey]; + if (!distribution.ContainsKey(dimVal)) + { + distribution.Add(dimVal, 0); + } + distribution[dimVal] = distribution[dimVal] + 1; + } + } + + [MethodImplAttribute(MethodImplOptions.AggressiveInlining)] + private double Log2(double val) => Double.IsNaN(val) ? 0 : Math.Log(val) / Math.Log(2); + + private static bool ContainsAll(Dictionary bigDictionary, Dictionary smallDictionary) + { + foreach (var item in smallDictionary) + { + if (!bigDictionary.ContainsKey(item.Key) || !bigDictionary[item.Key].Equals(smallDictionary[item.Key])) + { + return false; + } + } + + return true; + } + + private bool IsAggregationDimension(Object val, Object aggSymbol) + { + return val.Equals(aggSymbol); + } + } + + internal class DimensionInfo + { + internal List DetailDims { get; set; } + internal List AggDims { get; set; } + + public DimensionInfo() + { + DetailDims = new List(); + AggDims = new List(); + } + } + + internal class PointTree + { + internal Point ParentNode; + internal Dictionary> ChildrenNodes; + internal List Leaves; + + public PointTree() + { + Leaves = new List(); + ChildrenNodes = new Dictionary>(); + } + } + + public class BestDimension : IComparable + { + internal string DimensionKey; + internal Dictionary AnomalyDis; + internal Dictionary PointDis; + + public BestDimension() + { + AnomalyDis = new Dictionary(); + PointDis = new Dictionary(); + } + + public int CompareTo(object obj) + { + if (obj == null) return 1; + + BestDimension other = obj as BestDimension; + if (other != null) + return DimensionKey.CompareTo(other.DimensionKey); + else + throw new ArgumentException("Object is not a BestDimension"); + } + } + + internal class RootCauseScore + { + internal double Surprise; + internal double ExplanatoryScore; + + public RootCauseScore(double surprise, double explanatoryScore) + { + Surprise = surprise; + ExplanatoryScore = explanatoryScore; + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs new file mode 100644 index 0000000000..97fb63a8bf --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -0,0 +1,227 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.TimeSeries +{ + public sealed class RootCause + { + /// + /// A List for root cause item. Instance of the item should be + /// + public List Items { get; set; } + public RootCause() + { + Items = new List(); + } + } + + public sealed class RootCauseLocalizationInput + { + /// + /// When the anomaly incident occurs + /// + public DateTime AnomalyTimestamp { get; set; } + + /// + /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause + /// + public Dictionary AnomalyDimension { get; set; } + + /// + /// A list of points at different timestamp. If the slices don't contain point data corresponding to the anomaly timestamp, the root cause localization alogorithm will not calculate the root cause as no information at the anomaly timestamp is provided. + /// + public List Slices { get; set; } + + /// + /// The aggregated type, the type should be + /// + public AggregateType AggType { get; set; } + + /// + /// The string you defined as a aggregated symbol in the AnomalyDimension and point dimension. + /// + public Object AggSymbol { get; set; } + + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, AggregateType aggregateType, Object aggregateSymbol) + { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimension = anomalyDimension; + Slices = slices; + AggType = aggregateType; + AggSymbol = aggregateSymbol; + } + + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, string aggregateSymbol) + { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimension = anomalyDimension; + Slices = slices; + AggType = AggregateType.Unknown; + AggSymbol = aggregateSymbol; + } + } + + public enum AggregateType + { + /// + /// Make the aggregate type as unknown type. + /// + Unknown = 0, + /// + /// Make the aggregate type as summation. + /// + Sum = 1, + /// + /// Make the aggregate type as average. + /// + Avg = 2, + /// + /// Make the aggregate type as min. + /// + Min = 3, + /// + /// Make the aggregate type as max. + /// + Max = 4 + } + + public enum AnomalyDirection + { + /// + /// the value is larger than expected value. + /// + Up = 0, + /// + /// the value is lower than expected value. + /// + Down = 1, + /// + /// the value is the same as expected value. + /// + Same = 2 + } + + public sealed class RootCauseItem : IEquatable + { + /// + ///The score is a value to evaluate the contribution to the anomaly incident. The range is between [0,1]. The larger the score, the root cause contributes the most to the anomaly. The parameter beta has an influence on this score. For how the score is calculated, you can refer to the source code. + /// + public double Score; + /// + /// Path is a list of the dimension key that the libary selected for you. In this root cause localization library, for one time call for the library, the path will be obtained and the length of path list will always be 1. Different RootCauseItem obtained from one library call will have the same path as it is the best dimension selected for the input. + /// + public List Path; + /// + /// The dimension for the detected root cause point + /// + public Dictionary Dimension; + /// + /// The direction for the detected root cause point, should be + /// + public AnomalyDirection Direction; + + public RootCauseItem(Dictionary rootCause) + { + Dimension = rootCause; + Path = new List(); + } + + public RootCauseItem(Dictionary rootCause, List path) + { + Dimension = rootCause; + Path = path; + } + public bool Equals(RootCauseItem other) + { + if (Dimension.Count == other.Dimension.Count) + { + foreach (KeyValuePair item in Dimension) + { + if (!other.Dimension[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + return false; + } + } + + public sealed class MetricSlice + { + /// + /// Timestamp for the point list + /// + public DateTime TimeStamp { get; set; } + /// + /// A list of points + /// + public List Points { get; set; } + + public MetricSlice(DateTime timeStamp, List points) + { + TimeStamp = timeStamp; + Points = points; + } + } + + public sealed class Point : IEquatable + { + /// + /// Value of a time series point + /// + public double Value { get; set; } + /// + /// Forecasted value for the time series point + /// + public double ExpectedValue { get; set; } + /// + /// Whether the point is an anomaly point + /// + public bool IsAnomaly { get; set; } + /// + /// Dimension information for the point. For example, City = New York City, Dataceter = DC1. The value for this dictionary is an object, when the Dimension is used, the equals function for the Object will be used. If you have a customized class, you need to define the Equals function. + /// + public Dictionary Dimension { get; set; } + /// + /// Difference between value and expected value + /// + public double Delta { get; set; } + + public Point(Dictionary dimension) + { + Dimension = dimension; + } + public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimension) + { + Value = value; + ExpectedValue = expectedValue; + IsAnomaly = isAnomaly; + Dimension = dimension; + Delta = value - expectedValue; + } + + public bool Equals(Point other) + { + foreach (KeyValuePair item in Dimension) + { + if (!other.Dimension[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + + public override int GetHashCode() + { + return Dimension.GetHashCode(); + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index dafea7a40d..f605af9244 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -1,12 +1,15 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.TestFrameworkCommon; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; using Xunit.Abstractions; @@ -90,6 +93,9 @@ private sealed class SrCnnAnomalyDetection public double[] Prediction { get; set; } } + private static Object _rootCauseAggSymbol = "##SUM##"; + + [Fact] public void ChangeDetection() { @@ -472,7 +478,7 @@ public void SsaForecastPredictionEngine() // The forecasted results should be the same because the state of the models // is the same. Assert.Equal(result.Forecast, resultCopy.Forecast); - + } [Fact] @@ -515,5 +521,108 @@ public void AnomalyDetectionWithSrCnn() k += 1; } } + + [Fact] + public void RootCauseLocalization() + { + // Create an root cause localizatiom input + var rootCauseLocalizationInput = new RootCauseLocalizationInput(GetRootCauseTimestamp(), GetRootCauseAnomalyDimension(), new List() { new MetricSlice(GetRootCauseTimestamp(), GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _rootCauseAggSymbol); + + var ml = new MLContext(1); + RootCause rootCause = ml.AnomalyDetection.LocalizeRootCause(rootCauseLocalizationInput); + + Assert.NotNull(rootCause); + Assert.Equal(1, (int)rootCause.Items.Count); + Assert.Equal(3, (int)rootCause.Items[0].Dimension.Count); + Assert.Equal(AnomalyDirection.Up, rootCause.Items[0].Direction); + Assert.Equal(1, (int)rootCause.Items[0].Path.Count); + Assert.Equal("DataCenter", rootCause.Items[0].Path[0]); + + Dictionary expectedDim = new Dictionary(); + expectedDim.Add("Country", "UK"); + expectedDim.Add("DeviceType", _rootCauseAggSymbol); + expectedDim.Add("DataCenter", "DC1"); + + foreach (KeyValuePair pair in rootCause.Items[0].Dimension) + { + Assert.Equal(expectedDim[pair.Key], pair.Value); + } + } + + private static List GetRootCauseLocalizationPoints() + { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country", "UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", _rootCauseAggSymbol); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", _rootCauseAggSymbol); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", _rootCauseAggSymbol); + dic7.Add("DataCenter", _rootCauseAggSymbol); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", _rootCauseAggSymbol); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", _rootCauseAggSymbol); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetRootCauseAnomalyDimension() + { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", _rootCauseAggSymbol); + dim.Add("DataCenter", _rootCauseAggSymbol); + + return dim; + } + + private static DateTime GetRootCauseTimestamp() + { + return new DateTime(2020, 3, 23, 0, 0, 0); + } } }