Skip to content

add threshold for RCA #5218

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 37 commits into from
Jun 19, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
2c7a62b
add root cause localization transformer
suxi-ms Mar 10, 2020
f9af073
merge with remote repo
suxi-ms Jun 3, 2020
0551ecd
rebase
suxi-ms Jun 3, 2020
3138c39
merge
suxi-ms Jun 3, 2020
946ac43
temp save for internal review
suxi-ms Mar 20, 2020
d39e657
merge
suxi-ms Jun 3, 2020
eadada4
merge
suxi-ms Jun 3, 2020
6cf9b59
merge
suxi-ms Jun 3, 2020
36d1625
merge
suxi-ms Jun 3, 2020
0f81982
merge
suxi-ms Jun 3, 2020
f3fad18
merge
suxi-ms Jun 3, 2020
612be4d
merge
suxi-ms Jun 3, 2020
23261f2
merge
suxi-ms Jun 3, 2020
fa10bff
merge
suxi-ms Jun 3, 2020
919ed6b
update
suxi-ms Apr 1, 2020
0407282
refine internal logic
suxi-ms Apr 7, 2020
0efee95
merge
suxi-ms Jun 3, 2020
bde4a53
update
suxi-ms Jun 3, 2020
a1ab905
merge
suxi-ms Jun 3, 2020
711dfca
merge
suxi-ms Jun 3, 2020
e202a04
merge
suxi-ms Jun 3, 2020
1a2d569
fix type
suxi-ms Apr 27, 2020
b059b53
update model signature
suxi-ms Apr 28, 2020
fa834fd
update rca interface by removing transformer
suxi-ms May 7, 2020
2b29426
update
suxi-ms May 8, 2020
0860e01
update
suxi-ms May 9, 2020
6cf15c8
merge
suxi-ms Jun 3, 2020
a3eee1a
update
suxi-ms Jun 3, 2020
66261e6
some update
suxi-ms Jun 9, 2020
887e992
update
suxi-ms Jun 9, 2020
421157d
Update src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
suxi-ms Jun 11, 2020
54727ea
fix typo in extension catalgo comments
suxi-ms Jun 11, 2020
0e60d3c
merge
suxi-ms Jun 11, 2020
9d4a45e
update libmf
suxi-ms Jun 12, 2020
cd19889
update point map
suxi-ms Jun 12, 2020
28bf0a5
add root cause options
suxi-ms Jun 18, 2020
8e5784e
revert option changes
suxi-ms Jun 19, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs
Original file line number Diff line number Diff line change
Expand Up @@ -187,25 +187,28 @@ public static IDataView DetectEntireAnomalyBySrCnn(this AnomalyDetectionCatalog
/// It is used when score is calculated for each root cause item. The range of beta should be in [0,1].
/// For a larger beta, root cause items which have a large difference between value and expected value will get a high score.
/// For a small beta, root cause items which have a high relative change will get a low score.</param>
/// <param name="rootCauseThreshold">A threshold to determine whether the point should be root cause. The range of this threshold should be in [0,1].
/// If the point's delta is equal to or larger than rootCauseThreshold multiplied by anomaly dimension point's delta, this point is treated as a root cause. Different threshold will turn out different results. Users can choose the delta according to their data and requirments.</param>
/// <example>
Copy link
Contributor

@harishsk harishsk Jun 17, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the range of valid values for this parameter? #Resolved

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the range of valid values for this parameter?

Have updated the range

/// <format type="text/markdown">
/// <![CDATA[
/// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs)]
/// ]]>
/// </format>
/// </example>
public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5)
public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.3, double rootCauseThreshold = 0.95)
{
IHostEnvironment host = CatalogUtils.GetEnvironment(catalog);

//check the root cause input
CheckRootCauseInput(host, src);

//check beta
//check parameters
host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]");
host.CheckUserArg(rootCauseThreshold >= 0 && rootCauseThreshold <= 1, nameof(beta), "Must be in [0,1]");

//find out the root cause
RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta);
RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta, rootCauseThreshold);
RootCause dst = analyzer.Analyze();
return dst;
}
Expand Down
117 changes: 89 additions & 28 deletions src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,17 @@ namespace Microsoft.ML.TimeSeries
public class RootCauseAnalyzer
{
private static double _anomalyRatioThreshold = 0.5;
private static double _anomalyDeltaThreshold = 0.95;
private static double _anomalyPreDeltaThreshold = 2;

private RootCauseLocalizationInput _src;
private double _beta;
private double _rootCauseThreshold;

public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta)
public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta, double rootCauseThreshold)
{
_src = src;
_beta = beta;
_rootCauseThreshold = rootCauseThreshold;
}

public RootCause Analyze()
Expand All @@ -39,10 +40,10 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src)
dst.Items = new List<RootCauseItem>();

DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggregateSymbol);
Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo);
Tuple<PointTree, PointTree, Dictionary<Dictionary<string, object>, TimeSeriesPoint>> pointInfo = GetPointsInfo(src, dimensionInfo);
PointTree pointTree = pointInfo.Item1;
PointTree anomalyTree = pointInfo.Item2;
Dictionary<string, TimeSeriesPoint> dimPointMapping = pointInfo.Item3;
Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping = pointInfo.Item3;

//which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension
if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0)
Expand Down Expand Up @@ -81,11 +82,12 @@ private DimensionInfo SeparateDimension(Dictionary<string, Object> dimensions, O
return info;
}

private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
private Tuple<PointTree, PointTree, Dictionary<Dictionary<string, object>, TimeSeriesPoint>> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo)
{
PointTree pointTree = new PointTree();
PointTree anomalyTree = new PointTree();
Dictionary<string, TimeSeriesPoint> dimPointMapping = new Dictionary<string, TimeSeriesPoint>();
DimensionComparer dc = new DimensionComparer();
Dictionary<Dictionary<string, object>, TimeSeriesPoint> dimPointMapping = new Dictionary<Dictionary<string, object>, TimeSeriesPoint>(dc);

List<TimeSeriesPoint> totalPoints = GetTotalPointsForAnomalyTimestamp(src);
Dictionary<string, Object> subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims);
Expand All @@ -94,9 +96,9 @@ private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPoin
{
if (ContainsAll(point.Dimension, subDim))
{
if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension)))
if (!dimPointMapping.ContainsKey(point.Dimension))
{
dimPointMapping.Add(GetDicCode(point.Dimension), point);
dimPointMapping.Add(point.Dimension, point);
bool isValidPoint = point.IsAnomaly == true;
if (ContainsAll(point.Dimension, subDim))
{
Expand All @@ -111,7 +113,7 @@ private Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>> GetPoin
}
}

return new Tuple<PointTree, PointTree, Dictionary<string, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping);
return new Tuple<PointTree, PointTree, Dictionary<Dictionary<string, Object>, TimeSeriesPoint>>(pointTree, anomalyTree, dimPointMapping);
}

protected Dictionary<string, Object> GetSubDim(Dictionary<string, Object> dimension, List<string> keyList)
Expand Down Expand Up @@ -327,7 +329,7 @@ private AnomalyDirection GetRootCauseDirection(TimeSeriesPoint rootCausePoint)
}
}

private void GetRootCauseDirectionAndScore(Dictionary<string, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
private void GetRootCauseDirectionAndScore(Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol)
{
TimeSeriesPoint anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol);
if (dst.Items.Count > 1)
Expand Down Expand Up @@ -378,11 +380,11 @@ private void GetRootCauseDirectionAndScore(Dictionary<string, TimeSeriesPoint> d
}
}

private TimeSeriesPoint GetPointByDimension(Dictionary<string, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
private TimeSeriesPoint GetPointByDimension(Dictionary<Dictionary<string, Object>, TimeSeriesPoint> dimPointMapping, Dictionary<string, Object> dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol)
{
if (dimPointMapping.ContainsKey(GetDicCode(dimension)))
if (dimPointMapping.ContainsKey(dimension))
{
return dimPointMapping[GetDicCode(dimension)];
return dimPointMapping[dimension];
}

int count = 0;
Expand Down Expand Up @@ -419,11 +421,6 @@ private TimeSeriesPoint GetPointByDimension(Dictionary<string, TimeSeriesPoint>
}
}

private static string GetDicCode(Dictionary<string, Object> dic)
{
return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray());
}

private void BuildTree(PointTree tree, List<string> aggDims, TimeSeriesPoint point, Object aggSymbol)
{
int aggNum = 0;
Expand Down Expand Up @@ -476,22 +473,23 @@ private BestDimension FindBestDimension(SortedDictionary<BestDimension, double>
bool isRatioNan = Double.IsNaN(valueRatioMap[best]);
if (dimension.Key.AnomalyDis.Count > 1)
{
if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))
if (best.AnomalyDis.Count != 1 && !isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
{
best = dimension.Key;
best = GetBestDimension(best, dimension, valueRatioMap);
}
}
else
else if (dimension.Key.AnomalyDis.Count == 1)
{

if (best.AnomalyDis.Count > 1)
{
best = dimension.Key;
}
else
else if (best.AnomalyDis.Count == 1)
{
if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))
{
best = dimension.Key;
best = GetBestDimension(best, dimension, valueRatioMap);
}
}
}
Expand All @@ -502,6 +500,22 @@ private BestDimension FindBestDimension(SortedDictionary<BestDimension, double>
return best;
}

private BestDimension GetBestDimension(BestDimension best, KeyValuePair<BestDimension, double> dimension, Dictionary<BestDimension, Double> valueRatioMap)
{
if (valueRatioMap[best].CompareTo(dimension.Value) == 0)
{
if (dimension.Key.AnomalyDis.Count != dimension.Key.PointDis.Count)
{
best = dimension.Key;
}
}
else
{
best = dimension.Key;
}
return best;
}

/// <summary>
/// Calculate the surprise score according to root cause point and anomaly point
/// </summary>
Expand Down Expand Up @@ -569,6 +583,10 @@ private double GetFinalScore(double surprise, double ep, double beta)
else
{
a = (1 - Math.Pow(2, -surprise));
if (Double.IsNaN(a))
{
a = 1;
}
b = (1 - Math.Pow(2, -ep));
}

Expand All @@ -593,7 +611,7 @@ private static Dictionary<string, Object> UpdateDimensionValue(Dictionary<string

private bool StopAnomalyComparison(double preTotal, double parent, double current, double pre)
{
if (Math.Abs(preTotal) < Math.Abs(parent) * _anomalyDeltaThreshold)
if (Math.Abs(preTotal) < Math.Abs(parent) * _rootCauseThreshold)
{
return false;
}
Expand All @@ -603,7 +621,7 @@ private bool StopAnomalyComparison(double preTotal, double parent, double curren

private bool ShouldSeparateAnomaly(double total, double parent, int totalSize, int size)
{
if (Math.Abs(total) < Math.Abs(parent) * _anomalyDeltaThreshold)
if (Math.Abs(total) < Math.Abs(parent) * _rootCauseThreshold)
{
return false;
}
Expand Down Expand Up @@ -657,7 +675,7 @@ private void UpdateDistribution(Dictionary<string, int> distribution, List<TimeS
{
foreach (TimeSeriesPoint point in points)
{
string dimVal = (string)point.Dimension[dimKey];
string dimVal = Convert.ToString(point.Dimension[dimKey]);
if (!distribution.ContainsKey(dimVal))
{
distribution.Add(dimVal, 0);
Expand All @@ -684,7 +702,7 @@ private static bool ContainsAll(Dictionary<string, Object> bigDictionary, Dictio

private bool IsAggregationDimension(Object val, Object aggSymbol)
{
return val.Equals(aggSymbol);
return Convert.ToString(val).Equals(aggSymbol);
}
}

Expand Down Expand Up @@ -748,4 +766,47 @@ public RootCauseScore(double surprise, double explanatoryScore)
ExplanatoryScore = explanatoryScore;
}
}
}

internal class DimensionComparer : EqualityComparer<Dictionary<string, object>>
{
public override bool Equals(Dictionary<string, object> x, Dictionary<string, object> y)
{
if (x == null && y == null)
{
return true;
}
if ((x == null && y != null) || (x != null && y == null))
{
return false;
}
if (x.Count != y.Count)
{
return false;
}
if (x.Keys.Except(y.Keys).Any())
{
return false;
}
if (y.Keys.Except(x.Keys).Any())
{
return false;
}
foreach (var pair in x)
{
if (!pair.Value.Equals(y[pair.Key]))
{
return false;
}
}
return true;
}

public override int GetHashCode(Dictionary<string, object> obj)
{
int code = 0;
foreach (KeyValuePair<string, object> pair in obj)
code = code ^ pair.GetHashCode();
return code;
}
}
}
Loading