From d5ee2058c4b97b54a5775622fef9eb6cb17608e0 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 10 Mar 2020 09:24:42 +0800 Subject: [PATCH 01/49] add root cause localization transformer --- .../TimeSeries/LocalizeRootCauseByDT.cs | 76 +++ .../DTRootCauseLocalization.cs | 490 ++++++++++++++++++ .../ExtensionsCatalog.cs | 17 + .../TimeSeriesDirectApi.cs | 66 +++ 4 files changed, 649 insertions(+) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs create mode 100644 src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs new file mode 100644 index 0000000000..94d117fd45 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -0,0 +1,76 @@ +using System; +using System.Collections.Generic; +using System.Drawing; +using System.IO; +using Microsoft.ML; +using Microsoft.ML.Data; +using Microsoft.ML.Transforms.TimeSeries; + +namespace Samples.Dynamic +{ + public static class LocalizeRootCause + { + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for + // exception tracking and logging, as well as the source of randomness. + var mlContext = new MLContext(); + + // Create an empty list as the dataset. The 'NormalizeText' API does not + // require training data as the estimator ('TextNormalizingEstimator') + // created by 'NormalizeText' API is not a trainable estimator. The + // empty list is only needed to pass input schema to the pipeline. + var emptySamples = new List(); + + // Convert sample list to an empty IDataView. + var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); + + // A pipeline for localizeing root cause. + var localizePipeline = mlContext.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit to data. + var localizeTransformer = localizePipeline.Fit(emptyDataView); + + // Create the prediction engine to get the root cause result from the + // input data. + var predictionEngine = mlContext.Model.CreatePredictionEngine(localizeTransformer); + + // Call the prediction API. + var data = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, "SUM", "SUM"); + + var prediction = predictionEngine.Predict(data); + + // Print the localization result. + Console.WriteLine($"Localized result: {prediction.RootCause}"); + } + + + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices,String aggregateType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs new file mode 100644 index 0000000000..c70dd398ff --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -0,0 +1,490 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using Microsoft.ML; +using Microsoft.ML.CommandLine; +using Microsoft.ML.Data; +using Microsoft.ML.Internal.Utilities; +using Microsoft.ML.Runtime; +using Microsoft.ML.Transforms.TimeSeries; + +[assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), typeof(DTRootCauseLocalizationTransformer.Options), typeof(SignatureDataTransform), + DTRootCauseLocalizationTransformer.UserName, "DTRootCauseLocalizationTransform", "DTRootCauseLocalization")] + +[assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadDataTransform), + DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] + +[assembly: LoadableClass(typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadModel), + DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] + +[assembly: LoadableClass(typeof(IRowMapper), typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadRowMapper), + DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] + +namespace Microsoft.ML.Transforms.TimeSeries +{ + public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute + { + /// + /// Create a root cause localizagin input type. + /// + public RootCauseLocalizationInputTypeAttribute() + { + } + + /// + /// Equal function. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) + return false; + return true; + } + + /// + /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); + } + } + + public sealed class RootCauseTypeAttribute : DataViewTypeAttribute + { + /// + /// Create an root cause type. + /// + public RootCauseTypeAttribute() + { + } + + /// + /// RootCauseTypeAttribute with the same type should equal. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (other is RootCauseTypeAttribute otherAttribute) + return true; + return false; + } + + /// + /// Produce the same hash code for all RootCauseTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); + } + } + + public sealed class RootCause + { + public List Items { get; set; } + } + + public sealed class RootCauseItems { + public double Score; + public List Path; + public Dictionary RootCause; + public AnomalyDirection Direction; + } + + public enum AnomalyDirection { + /// + /// the value is larger than expected value. + /// + Up = 0, + /// + /// the value is lower than expected value. + /// + Down = 1 + } + + public sealed class RootCauseLocalizationInput + { + public DateTime AnomalyTimestamp { get; set; } + + public Dictionary AnomalyDimensions { get; set; } + + public List Slices { get; set; } + + public DTRootCauseLocalizationEstimator.AggregateType AggType{ get; set; } + + public string AggSymbol { get; set; } + + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateType, string aggregateSymbol) { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimensions = anomalyDimensions; + Slices = slices; + AggType = aggregateType; + AggSymbol = aggregateSymbol; + } + public void Dispose() + { + AnomalyDimensions = null; + Slices = null; + } + } + + public sealed class MetricSlice + { + public DateTime TimeStamp { get; set; } + public List Points { get; set; } + + public MetricSlice(DateTime timeStamp, List points) { + TimeStamp = timeStamp; + Points = points; + } + } + + public sealed class Point { + public double Value { get; set; } + public double ExpectedValue { get; set; } + public bool IsAnomaly { get; set; } + public Dictionary Dimensions{ get; set; } + } + + public sealed class RootCauseDataViewType : StructuredDataViewType + { + public RootCauseDataViewType() + : base(typeof(RootCause)) + { + } + + public override bool Equals(DataViewType other) + { + if (other == this) + return true; + if (!(other is RootCauseDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseDataViewType).Name; + } + } + + public sealed class RootCauseLocalizationInputDataViewType : StructuredDataViewType + { + public RootCauseLocalizationInputDataViewType() + : base(typeof(RootCauseLocalizationInput)) + { + } + + public override bool Equals(DataViewType other) + { + if (!(other is RootCauseLocalizationInputDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseLocalizationInputDataViewType).Name; + } + } + + // REVIEW: Rewrite as LambdaTransform to simplify. + // REVIEW: Should it be separate transform or part of ImageResizerTransform? + /// + /// resulting from fitting an . + /// + public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase + { + internal sealed class Column : OneToOneColumn + { + internal static Column Parse(string str) + { + var res = new Column(); + if (res.TryParse(str)) + return res; + return null; + } + + internal bool TryUnparse(StringBuilder sb) + { + Contracts.AssertValue(sb); + return TryUnparseCore(sb); + } + } + + internal class Options : TransformInputBase + { + [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)] + public Column[] Columns; + + [Argument(ArgumentType.AtMostOnce, HelpText = "Weight for getting the score for the root cause item.", ShortName = "Beta", SortOrder = 2)] + public double Beta = DTRootCauseLocalizationEstimator.Defaults.Beta; + + } + + internal const string Summary = "Localize root cause for anomaly."; + + internal const string UserName = "DT Root Cause Localization Transform"; + internal const string LoaderSignature = "DTRootCauseLTransform"; + + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "DTRCL", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); + } + + private const string RegistrationName = "RootCauseLocalization"; + + /// + /// The input and output column pairs passed to this . + /// + internal IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); + + private readonly double _beta; + + /// + /// Localization root cause for multi-dimensional anomaly. + /// + /// The estimator's local . + /// Weight for generating score. + /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). + + internal DTRootCauseLocalizationTransformer(IHostEnvironment env,double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns) + { + Host.CheckUserArg(beta >=0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); + + _beta = beta; + } + + // Factory method for SignatureDataTransform. + internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) + { + Contracts.CheckValue(env, nameof(env)); + env.CheckValue(options, nameof(options)); + env.CheckValue(input, nameof(input)); + env.CheckValue(options.Columns, nameof(options.Columns)); + + return new DTRootCauseLocalizationTransformer(env,options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) + .MakeDataTransform(input); + } + + // Factory method for SignatureLoadModel. + private static DTRootCauseLocalizationTransformer Create(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + var host = env.Register(RegistrationName); + host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(GetVersionInfo()); + return new DTRootCauseLocalizationTransformer(host, ctx); + } + + private DTRootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) + : base(host, ctx) + { + } + + // Factory method for SignatureLoadDataTransform. + private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) + => Create(env, ctx).MakeDataTransform(input); + + // Factory method for SignatureLoadRowMapper. + private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) + => Create(env, ctx).MakeRowMapper(inputSchema); + + private protected override void SaveModel(ModelSaveContext ctx) + { + Host.CheckValue(ctx, nameof(ctx)); + + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + + // *** Binary format *** + // + base.SaveColumns(ctx); + } + + private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); + + private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) + { + if (!(inputSchema[srcCol].Type is RootCauseLocalizationInputDataViewType)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, "RootCauseLocalizationInputDataViewType", inputSchema[srcCol].Type.ToString()); + } + + private sealed class Mapper : OneToOneMapperBase + { + private DTRootCauseLocalizationTransformer _parent; + + public Mapper(DTRootCauseLocalizationTransformer parent, DataViewSchema inputSchema) + : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) + { + _parent = parent; + } + + protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() + { + var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; + for (int i = 0; i < _parent.ColumnPairs.Length; i++) + { + InputSchema.TryGetColumnIndex(_parent.ColumnPairs[i].inputColumnName, out int colIndex); + Host.Assert(colIndex >= 0); + + DataViewType type; + type = new RootCauseDataViewType(); + + result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, type, null); + } + return result; + } + + protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) + { + Contracts.AssertValue(input); + Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); + + var src = default(RootCauseLocalizationInput); + var getSrc = input.GetGetter(input.Schema[ColMapNewToOld[iinfo]]); + + disposer = + () => + { + if (src != null) + { + src.Dispose(); + src = null; + } + }; + + ValueGetter del = + (ref RootCause dst) => + { + getSrc(ref src); + if (src == null) + return; + + if (src.Slices.Count < 1) { + throw Host.Except($"Length of Slices must be larger than 0"); + } + //todo- more checks will be added here for the input + + dst = new RootCause(); + //dst.Items = new List{ new RootCauseItems() }; + //todo- algorithms would be implememted here + }; + + return del; + } + } + } + + /// + /// for the . + /// + /// + /// | + /// | Output column data type | | + /// | Exportable to ONNX | No | + /// + /// The resulting creates a new column, named as specified in the output column name parameters, and + /// localize the root causes which contribute most to the anomaly. + /// Check the See Also section for links to usage examples. + /// ]]> + /// + /// + /// + public sealed class DTRootCauseLocalizationEstimator : TrivialEstimator + { + internal static class Defaults + { + public const double Beta = 0.5; + } + + public enum AggregateType + { + /// + /// Make the aggregate type as sum. + /// + Sum = 0, + /// + /// Make the aggregate type as average. + /// + Avg = 1, + /// + /// Make the aggregate type as min. + /// + Min = 2, + /// + /// Make the aggregate type as max. + /// + Max = 3 + } + + /// + /// Localize root cause. + /// + /// The estimator's local . + /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). + /// The weight for generating score in output result. + [BestFriend] + internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta,params(string outputColumnName, string inputColumnName)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta,columns)) + { + } + + /// + /// Returns the of the schema which will be produced by the transformer. + /// Used for schema propagation and verification in a pipeline. + /// + public override SchemaShape GetOutputSchema(SchemaShape inputSchema) + { + Host.CheckValue(inputSchema, nameof(inputSchema)); + var result = inputSchema.ToDictionary(x => x.Name); + foreach (var colInfo in Transformer.Columns) + { + if (!inputSchema.TryFindColumn(colInfo.inputColumnName, out var col)) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName); + if (!(col.ItemType is RootCauseLocalizationInputDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) + throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName, new RootCauseLocalizationInputDataViewType().ToString(), col.GetTypeString()); + + result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, col.Kind, col.ItemType, col.IsKey, col.Annotations); + } + + return new SchemaShape(result.Values); + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 03a507195e..9bbc051d7c 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -146,6 +146,23 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); + /// + /// Create , which localizes root causess using decision tree algorithm. + /// + /// The transform's catalog. + /// Name of the column resulting from the transformation of . + /// Name of column to transform. + /// The weight parameter in score. The range of the parameter should be in [0,1]. + /// + /// + /// + /// + /// + public static DTRootCauseLocalizationEstimator LocalizeRootCauseByDT(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta=0.5) + => new DTRootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog),beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + /// /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index dafea7a40d..51ea88c881 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -2,7 +2,9 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using System.Collections.Generic; +using System.Drawing; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; @@ -515,5 +517,69 @@ public void AnomalyDetectionWithSrCnn() k += 1; } } + + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + + [Fact] + public void RootCauseLocalizationWithDT() + { + // Create an root cause localizatiom input list. + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(),new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + + var ml = new MLContext(1); + // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. + var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); + + // Create pipeline to localize root cause by decision tree. + var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit the model. + var model = pipeline.Fit(data); + + // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. + var transformedData = model.Transform(data); + + // Load input list in DataView back to Enumerable. + var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); + + foreach (var dataPoint in transformedDataPoints) + { + var rootCause = dataPoint.RootCause; + + Assert.NotNull(rootCause); + } + + var engine = ml.Model.CreatePredictionEngine(model); + var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() {new MetricSlice(new DateTime(), new List())}, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + var transformedRootCause = engine.Predict(newRootCauseInput); + + Assert.NotNull(transformedRootCause); + //todo - will add more tests here when onboarding mock data + } } } From f727a79c859cb76b77e5ee178cbb003ffd22745a Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 16 Mar 2020 22:26:48 +0800 Subject: [PATCH 02/49] add test cases --- LCATest/Class1.cs | 72 +++ LCATest/LCATest.csproj | 28 + Microsoft.ML.sln | 83 ++- .../TimeSeries/LocalizeRootCauseByDT.cs | 2 +- .../DTRootCauseLocalization.cs | 153 ++++- .../DTRootCauseLocalizationUtils.cs | 587 ++++++++++++++++++ .../Microsoft.ML.TimeSeries.Tests.csproj | 4 + .../TimeSeriesDirectApi.cs | 148 ++++- 8 files changed, 1011 insertions(+), 66 deletions(-) create mode 100644 LCATest/Class1.cs create mode 100644 LCATest/LCATest.csproj create mode 100644 src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs diff --git a/LCATest/Class1.cs b/LCATest/Class1.cs new file mode 100644 index 0000000000..febf8c3364 --- /dev/null +++ b/LCATest/Class1.cs @@ -0,0 +1,72 @@ +using System; + +namespace LCATest +{ + public class Class1 : BaseTestClass + { + + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + + [Fact] + public void RootCauseLocalizationWithDT() + { + // Create an root cause localizatiom input list. + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + + var ml = new MLContext(1); + // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. + var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); + + // Create pipeline to localize root cause by decision tree. + var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit the model. + var model = pipeline.Fit(data); + + // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. + var transformedData = model.Transform(data); + + // Load input list in DataView back to Enumerable. + var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); + + foreach (var dataPoint in transformedDataPoints) + { + var rootCause = dataPoint.RootCause; + + Assert.NotNull(rootCause); + } + + var engine = ml.Model.CreatePredictionEngine(model); + var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + var transformedRootCause = engine.Predict(newRootCauseInput); + + Assert.NotNull(transformedRootCause); + //todo - will add more tests here when onboarding mock data + } + } +} diff --git a/LCATest/LCATest.csproj b/LCATest/LCATest.csproj new file mode 100644 index 0000000000..97d17fe121 --- /dev/null +++ b/LCATest/LCATest.csproj @@ -0,0 +1,28 @@ + + + CORECLR + + Exe + + + + AnyCPU + + + + + + + + + + + + + + + + PreserveNewest + + + diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index c1b573c922..a1bb937cae 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -293,9 +293,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Vision", "src\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TestFrameworkCommon", "test\Microsoft.ML.TestFrameworkCommon\Microsoft.ML.TestFrameworkCommon.csproj", "{A22FAD27-77E8-4460-8B92-EC7090B7173A}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.NightlyBuild.Tests", "test\Microsoft.ML.NightlyBuild.Tests\Microsoft.ML.NightlyBuild.Tests.csproj", "{A1CAC86F-F4BB-4B6D-9D18-E9AE15B3C66E}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.NightlyBuild.Tests", "test\Microsoft.ML.NightlyBuild.Tests\Microsoft.ML.NightlyBuild.Tests.csproj", "{A1CAC86F-F4BB-4B6D-9D18-E9AE15B3C66E}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.NugetPackageVersionUpdater", "test\Microsoft.ML.NugetPackageVersionUpdater\Microsoft.ML.NugetPackageVersionUpdater.csproj", "{C8DB58DC-6434-4431-A81F-263D86E2A5F3}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.NugetPackageVersionUpdater", "test\Microsoft.ML.NugetPackageVersionUpdater\Microsoft.ML.NugetPackageVersionUpdater.csproj", "{C8DB58DC-6434-4431-A81F-263D86E2A5F3}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{C91F81E3-B900-4968-A6DF-F53B515E97E1}" EndProject @@ -304,6 +304,8 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "netstandard2.0", "netstanda pkg\Microsoft.ML.CpuMath\build\netstandard2.0\Microsoft.ML.CpuMath.props = pkg\Microsoft.ML.CpuMath\build\netstandard2.0\Microsoft.ML.CpuMath.props EndProjectSection EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LCATest", "LCATest\LCATest.csproj", "{27A55727-AA9B-4C27-B322-EB3E65F310B8}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -1639,6 +1641,30 @@ Global {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|Any CPU.Build.0 = Debug|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -1771,30 +1797,30 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|Any CPU.Build.0 = Debug|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|x64.ActiveCfg = Debug|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|x64.Build.0 = Debug|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|Any CPU.ActiveCfg = Release|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|Any CPU.Build.0 = Release|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|x64.ActiveCfg = Release|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|x64.Build.0 = Release|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU + {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -1880,6 +1906,8 @@ Global {F5D11F71-2D61-4AE9-99D7-0F0B54649B15} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {A6924919-9E37-4023-8B7F-E85C8E3CC9B3} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {3C8F910B-7F23-4D25-B521-6D5AC9570ADD} = {DA452A53-2E94-4433-B08C-041EDEC729E6} + {E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {56CB0850-7341-4D71-9AE4-9EFC472D93DD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {46CC5637-3DDF-4100-93FC-44BB87B2DB81} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {3817A875-278C-4140-BF66-3C4A8CA55F0D} = {D3D38B03-B557-484D-8348-8BADEE4DF592} @@ -1889,8 +1917,7 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {C91F81E3-B900-4968-A6DF-F53B515E97E1} = {BF66A305-DF10-47E4-8D81-42049B149D2B} {027DBA48-85B6-46F1-9487-0B49B5057FC0} = {C91F81E3-B900-4968-A6DF-F53B515E97E1} - {E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530} - {1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592} + {27A55727-AA9B-4C27-B322-EB3E65F310B8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 94d117fd45..79a5cfa839 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -37,7 +37,7 @@ public static void Example() RootCauseLocalizationTransformedData>(localizeTransformer); // Call the prediction API. - var data = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, "SUM", "SUM"); + var data = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, "SUM", "SUM"); var prediction = predictionEngine.Predict(data); diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index c70dd398ff..05ac7a82b4 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; [assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), typeof(DTRootCauseLocalizationTransformer.Options), typeof(SignatureDataTransform), @@ -95,25 +96,7 @@ public override void Register() public sealed class RootCause { - public List Items { get; set; } - } - - public sealed class RootCauseItems { - public double Score; - public List Path; - public Dictionary RootCause; - public AnomalyDirection Direction; - } - - public enum AnomalyDirection { - /// - /// the value is larger than expected value. - /// - Up = 0, - /// - /// the value is lower than expected value. - /// - Down = 1 + public List Items { get; set; } } public sealed class RootCauseLocalizationInput @@ -153,13 +136,6 @@ public MetricSlice(DateTime timeStamp, List points) { } } - public sealed class Point { - public double Value { get; set; } - public double ExpectedValue { get; set; } - public bool IsAnomaly { get; set; } - public Dictionary Dimensions{ get; set; } - } - public sealed class RootCauseDataViewType : StructuredDataViewType { public RootCauseDataViewType() @@ -392,19 +368,130 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func{ new RootCauseItems() }; - //todo- algorithms would be implememted here + LocalizeRootCauses(src, ref dst); }; return del; } } + + private static void CheckInput(RootCauseLocalizationInput src, IHost host) { + if (src.Slices.Count < 1) + { + throw host.Except($"Length of Slices must be larger than 0"); + } + + bool containsAnomalyTimestamp = false; + foreach (MetricSlice slice in src.Slices) { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) { + containsAnomalyTimestamp = true; + } + } + if (!containsAnomalyTimestamp) { + throw host.Except($"Has no points in the given anomaly timestamp"); + } + } + + private static void LocalizeRootCauses(RootCauseLocalizationInput src , ref RootCause dst) { + dst = new RootCause(); + dst.Items = new List { }; + + GetRootCauseList(src, ref dst); + GetRootCauseScore(src, ref dst); + } + + private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst) { + DimensionInfo dimensionInfo = DTRootCauseLocalizationUtils.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); + if (dimensionInfo.AggDim.Count == 0) { + return; + } + + List points = new List(); + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + points = slice.Points; + } + } + + Dictionary subDim = DTRootCauseLocalizationUtils.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); + + List totalPoints = DTRootCauseLocalizationUtils.SelectPoints(points, subDim); + + PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol); + PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim,subDim,src.AggSymbol, true); + + // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo + if (anomalyTree.ParentNode == null) { + return; + } + + // no point under anomaly dimension + if (totalPoints.Count == 0) { + if (anomalyTree.Leaves.Count != 0) { + throw new Exception("point leaves not match with anomaly leaves"); + } + } + else + { + List rootCauses = new List(); + double totalEntropy = 1; + if (anomalyTree.Leaves.Count > 0) + { + totalEntropy= DTRootCauseLocalizationUtils.GetEntropy(totalPoints.Count, anomalyTree.Leaves.Count); + } + + if (totalEntropy > 0.9) + { + if (dimensionInfo.AggDim.Count == 1) { + //root cause is itself; + rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree,pointTree, totalEntropy,src.AnomalyDimensions)); + } + } + else if (totalEntropy < 0.5) + { + if (DTRootCauseLocalizationUtils.IsAnomalous(totalPoints.Count, anomalyTree.Leaves.Count)) { + //root cause is itself; + dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree,src.AnomalyDimensions)); + + } + } + else { + if (dimensionInfo.AggDim.Count == 1) { + //because we have known the dimension, so by anomaly + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + } + else { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints,anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } + } + + dst.Items = rootCauses; + } + } + + private static void GetRootCauseScore(RootCauseLocalizationInput src, ref RootCause dst) { + + if (dst.Items.Count > 1) + { + // for each, get surprise + + // then normalize + } + else if (dst.Items.Count == 1) { + //surprise and expananory , max is 1 + } + } } /// diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs new file mode 100644 index 0000000000..56e8378d5b --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -0,0 +1,587 @@ +using System; +using System.Collections.Generic; +using System.Linq; + +namespace Microsoft.ML.TimeSeries +{ + public class DTRootCauseLocalizationUtils + { + public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { + DimensionInfo info = DimensionInfo.CreateDefaultInstance(); + foreach (KeyValuePair entry in dimensions) { + string key = entry.Key; + if (aggSymbol.Equals(entry.Value)) + { + info.AggDim.Add(key); + } + else { + info.DetailDim.Add(key); + } + } + + return info; + } + + public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, bool filterByAnomaly = false) { + PointTree tree = PointTree.CreateDefaultInstance(); + + foreach (Point point in pointList) { + bool isValidPoint =true; + if (filterByAnomaly) { + isValidPoint = point.IsAnomaly == true; + } + if (ContainsAll(point.Dimensions, subDim) && isValidPoint) { + if (aggDims.Count == 0) + { + tree.ParentNode = point; + tree.Leaves.Add(point); + } + else { + int aggNum = 0; + string nextDim = null; + + foreach (string dim in aggDims) { + if (IsAggregationDimension(point.Dimensions[dim], aggSymbol)) + { + aggNum++; + } + else { + nextDim = dim; + } + } + + if (aggNum == aggDims.Count) + { + tree.ParentNode = point; + } + else if (aggNum == aggDims.Count - 1) { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) { + tree.ChildrenNodes.Add(nextDim, new List()); + } + tree.ChildrenNodes[nextDim].Add(point); + } + + if (aggNum == 0) { + tree.Leaves.Add(point); + } + + //todo - need optimize, to see whether we can independent of leaves; + } + } + } + return tree; + } + + public static Dictionary GetsubDim(Dictionary dimension, List keyList) { + Dictionary subDim = new Dictionary(); + + foreach (String dim in keyList) { + subDim.Add(dim, dimension[dim]); + } + return subDim; + } + + public static List SelectPoints(List points, Dictionary subDim) { + List list = new List(); + + foreach (Point point in points) { + if (ContainsAll(point.Dimensions, subDim)) { + list.Add(point); + } + } + + return list; + } + + public static List LocalizeRootCauseByAnomaly(List totalPoints, PointTree anomalyTree, Dictionary anomalyDimension) { + if (anomalyTree.ChildrenNodes.Count == 0) + { + //As there is no children node under such dimension, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension) }; + } + else { + AnomalyCause cause = GetAnomalyCause(anomalyTree, totalPoints); + if (cause == null) + { + //As the cause couldn't be found, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension) }; + } + else { + List causes = new List(); + // For the found causes, we return the result + foreach (Point anomaly in cause.Anomalies){ + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, cause.DimensionKey,anomaly.Dimensions[cause.DimensionKey]))) ; + } + return causes; + } + } + } + + public static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { + + var set = anomalyTree.ChildrenNodes.Keys; + + BestDimension best = null; + + if (anomalyTree.Leaves.Count > 0) + { + best =SelectBestDimension(totalPoints, anomalyTree.Leaves, set.ToList(), totoalEntropy); + } + else { + //has no leaves information, should calculate the entropy information according to the children nodes + best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, set.ToList(), totoalEntropy); + } + + if (IsLargeEntropyGain(totoalEntropy, best.Entropy) || best.AnomalyDis.Count == 1) + { + List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); + if (children == null) + { + //As the cause couldn't be found, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension) }; + } + else + { + List causes = new List(); + // For the found causes, we return the result + foreach (Point anomaly in children) + { + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimensions[best.DimensionKey]), best.DimensionKey)); + } + return causes; + } + } + else { + //As the entropy gain for this best dimension is small, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension) }; + } + } + + public static double GetEntropy(int totalNum, int anomalyNum) { + double ratio =(double)anomalyNum / totalNum; + if (ratio == 0 || ratio == 1) { + return 0; + } + + return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); + } + + public static Dictionary GetEntropyList(BestDimension best, List points) { + Dictionary list = new Dictionary(); + // need to update, change to children if necessary + foreach(Point point in points) { + string dimVal = point.Dimensions[best.DimensionKey]; + int pointSize = GetPointSize(best, dimVal); + int anomalySize = GetAnomalyPointSize(best, dimVal); + + double dimEntropy = GetEntropy(pointSize, anomalySize); + list.Add(dimVal, dimEntropy); + } + + return list; + } + + public static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, String dimKey) + { + Dictionary pointDistribution = new Dictionary(); + UpdateDistribution(pointDistribution, totalPoints, dimKey); + + anomalyPoints.OrderBy(x=>x.Delta); + + if (root.Delta > 0) + { + anomalyPoints.Reverse(); + } + + if (anomalyPoints.Count == 1) + { + return anomalyPoints; + } + + double delta = 0; + double preDelta = 0; + List causeList = new List(); + foreach(Point anomaly in anomalyPoints) + { + if (anomaly.Delta * root.Delta <= 0) + { + break; + } + + if (StopAnomalyComparison(delta, root.Delta, anomaly.Delta, preDelta)) + { + break; + } + + delta += anomaly.Delta; + causeList.Add(anomaly); + preDelta = anomaly.Delta; + } + + int pointSize = GetTotalNumber(pointDistribution); + if (ShouldSeperateAnomaly(delta, root.Delta, pointSize, causeList.Count)) + { + return causeList; + } + + return null; + } + + public static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + { + Dictionary entroyGainMap = new Dictionary(); + Dictionary entroyGainRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (String dimKey in aggDim) + { + BestDimension dimension = BestDimension.CreateDefaultInstance(); + dimension.DimensionKey =dimKey; + + UpdateDistribution(dimension.PointDis, totalPoints, dimKey); + UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey); + + double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + dimension.Entropy = totalEntropy - gain; + entroyGainMap.Add(dimension, gain); + + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + entroyGainRatioMap.Add(dimension, gainRatio); + + sumGain += gain; + } + + double meanGain = sumGain / aggDim.Count(); + + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + + return best; + } + + public static BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + { + Dictionary entroyGainMap = new Dictionary(); + Dictionary entroyGainRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (String dimKey in aggDim) + { + BestDimension dimension = BestDimension.CreateDefaultInstance(); + dimension.DimensionKey = dimKey; + + UpdateDistribution(dimension.PointDis, pointChildren[dimKey], dimKey); + UpdateDistribution(dimension.AnomalyDis, anomalyChildren[dimKey], dimKey); + + double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + dimension.Entropy = totalEntropy - gain; + entroyGainMap.Add(dimension, gain); + + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + entroyGainRatioMap.Add(dimension, gainRatio); + + sumGain += gain; + } + + double meanGain = sumGain / aggDim.Count(); + + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + + return best; + } + + private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap,double meanGain) { + BestDimension best = null; + foreach (KeyValuePair dimension in entropyGainMap) + { + if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value >= meanGain) + { + if (dimension.Key.AnomalyDis.Count > 1) + { + if (best == null || (best.AnomalyDis.Count != 1 && entropyGainRationMap[best].CompareTo(dimension.Value) < 0)) + { + best = dimension.Key; + } + } + else + { + if (best == null || best.AnomalyDis.Count > 1) + { + best = dimension.Key; + } + else + { + if (entropyGainRationMap[best].CompareTo(dimension.Value) < 0) + { + best = dimension.Key; + } + } + } + } + } + return best; + } + + private static AnomalyCause GetAnomalyCause(PointTree anomalyTree, List totalPoints) + { + string bestKey = null; + List bestAnomalies = null; + + foreach (string key in anomalyTree.ChildrenNodes.Keys) + { + List anomalies = anomalyTree.ChildrenNodes[key]; + + List causeList = GetTopAnomaly(anomalies, anomalyTree.ParentNode, totalPoints, key); + if (causeList == null) + { + continue; + } + + if (bestAnomalies == null || bestAnomalies.Count > causeList.Count) + { + bestKey = key; + bestAnomalies = causeList; + } + } + + if (bestKey == null) + { + return null; + } + else + { + AnomalyCause cause = new AnomalyCause(); + cause.DimensionKey = bestKey; + cause.Anomalies = bestAnomalies; + return cause; + } + } + + public static bool IsAnomalous(int pointSize, int anomalySize) + { + if (anomalySize == pointSize && anomalySize == 1) + { + return false; + } + + return (double)anomalySize / pointSize > 0.5; + } + + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) { + Dictionary newDim = new Dictionary(dimension); + newDim[key] = value; + return newDim; + } + + private static bool StopAnomalyComparison(double preTotal, double parent, double current, double pre) + { + if (Math.Abs(preTotal) < Math.Abs(parent) * 0.95) + { + return false; + } + + return Math.Abs(pre) / Math.Abs(current) > 2; + } + + private static bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) + { + if (Math.Abs(total) < Math.Abs(parent) * 0.95) + { + return false; + } + + if (size == totalSize && size == 1) + { + return true; + } + + return size <= totalSize * 0.5; + } + + private static double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + { + int total = GetTotalNumber(pointDis); + double entropy = 0; + foreach (string key in anomalyDis.Keys) + { + double dimEntropy = GetEntropy(pointDis[key], anomalyDis[key]); + entropy += dimEntropy * pointDis[key] / total; + } + return totalEntropy - entropy; + } + + private static double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + { + double instrinsicValue = 0; + + foreach (string key in anomalyDis.Keys) + { + instrinsicValue -= Log2( (double)anomalyDis[key] / pointDis[key]) * anomalyDis[key] / pointDis[key]; + } + + return instrinsicValue; + } + + private static int GetTotalNumber(Dictionary distribution) + { + int total = 0; + foreach (int num in distribution.Values) + { + total += num; + } + return total; + } + + private static void UpdateDistribution(Dictionary distribution, List points, string dimKey) + { + foreach (Point point in points) + { + String dimVal = point.Dimensions[dimKey]; + if (!distribution.ContainsKey(dimVal)) + { + distribution.Add(dimVal, 0); + } + distribution[dimVal]= distribution[dimVal] + 1; + } + } + + private static bool IsLargeEntropyGain(double total, double dimEntropy) { + return total - dimEntropy > 0.3; + } + + private static double Log2(double val) { + return Math.Log(val) / Math.Log(2); + } + + private static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { + foreach (var item in smallDic) { + if (!bigDic.ContainsKey(item.Key)) { + return false; + } + } + return true; + } + + private static bool IsAggregationDimension(string val, string aggSymbol) { + return val.Equals(aggSymbol); + } + + private static int GetPointSize(BestDimension dim, string key) { + int pointSize = 0; + if (dim.PointDis.ContainsKey(key)) { + pointSize = dim.PointDis[key]; + } + return pointSize; + } + + private static int GetAnomalyPointSize(BestDimension dim, string key) + { + int anomalyPointSize = 0; + if (dim.AnomalyDis.ContainsKey(key)) + { + anomalyPointSize = dim.AnomalyDis[key]; + } + return anomalyPointSize; + } + } + + public class DimensionInfo { + public List DetailDim { get; set; } + public List AggDim { get; set; } + + public static DimensionInfo CreateDefaultInstance() { + DimensionInfo instance = new DimensionInfo(); + instance.DetailDim = new List(); + instance.AggDim = new List(); + return instance; + } + } + + public class PointTree { + public Point ParentNode; + public Dictionary> ChildrenNodes; + public List Leaves; + + public static PointTree CreateDefaultInstance() + { + PointTree instance = new PointTree(); + instance.Leaves = new List(); + instance.ChildrenNodes = new Dictionary>(); + return instance; + } + } + + public sealed class Point + { + public double Value { get; set; } + public double ExpectedValue { get; set; } + public bool IsAnomaly { get; set; } + public Dictionary Dimensions { get; set; } + + public double Delta { get; set; } + + public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimensions) + { + Value = value; + ExpectedValue = expectedValue; + IsAnomaly = isAnomaly; + Dimensions = dimensions; + Delta = (value - expectedValue) / expectedValue; + if (expectedValue == 0) { + Delta = 0; //todo, need to confirm + } + } + } + + public sealed class BestDimension { + public string DimensionKey; + public double Entropy; + public Dictionary AnomalyDis; + public Dictionary PointDis; + + public BestDimension() { } + public static BestDimension CreateDefaultInstance() { + BestDimension instance = new BestDimension(); + instance.AnomalyDis = new Dictionary(); + instance.PointDis = new Dictionary(); + return instance; + } + } + + public sealed class AnomalyCause { + public string DimensionKey; + public List Anomalies; + + public AnomalyCause() {} + } + + public sealed class RootCauseItem + { + public double Score; + public string Path; + public Dictionary RootCause; + public AnomalyDirection Direction; + + public RootCauseItem(Dictionary rootCause) + { + RootCause = rootCause; + } + + public RootCauseItem(Dictionary rootCause, string path) + { + RootCause = rootCause; + Path= path; + } + } + + public enum AnomalyDirection + { + /// + /// the value is larger than expected value. + /// + Up = 0, + /// + /// the value is lower than expected value. + /// + Down = 1 + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj b/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj index d604ec0a9e..cb03caf794 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj +++ b/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj @@ -1,6 +1,7 @@  CORECLR + netcoreapp3.0 @@ -14,6 +15,9 @@ + + + PreserveNewest diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 51ea88c881..1f6cf8436e 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -1,10 +1,10 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System; using System.Collections.Generic; -using System.Drawing; +using System.Data; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; @@ -12,6 +12,9 @@ using Microsoft.ML.Transforms.TimeSeries; using Xunit; using Xunit.Abstractions; +using Microsoft.VisualBasic.CompilerServices; + +using Microsoft.VisualBasic.FileIO; namespace Microsoft.ML.Tests { @@ -549,7 +552,7 @@ public RootCauseLocalizationTransformedData() public void RootCauseLocalizationWithDT() { // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(),new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(),new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -575,11 +578,148 @@ public void RootCauseLocalizationWithDT() } var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() {new MetricSlice(new DateTime(), new List())}, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() {new MetricSlice(new DateTime(), new List())}, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); //todo - will add more tests here when onboarding mock data } + + [Fact] + public void RootCauseLocalizationWithCSV() + { + string ocsdatacenter = "OCSDatacenter"; + string appType = "AppType"; + string releaseAudienceGroup = "Release_AudienceGroup"; + string wacDatacenter = "WACDatacenter"; + string requestType = "RequestType"; + string statusCode = "StatusCode"; + + var anomalyRootData = GetDataTabletFromCSVFile("C:/excel/anomaly_root.csv"); + + var inputData = GetDataTabletFromCSVFile("C:/excel/input/1563224400.csv"); + + Dictionary> rootNodeMap = new Dictionary>(); + foreach (DataRow row in anomalyRootData.Rows) + { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + + Dictionary dimension = new Dictionary(); + dimension.Add(ocsdatacenter, row[ocsdatacenter].ToString()); + dimension.Add(appType, row[appType].ToString()); + dimension.Add(releaseAudienceGroup, row[releaseAudienceGroup].ToString()); + dimension.Add(wacDatacenter, row[wacDatacenter].ToString()); + dimension.Add(statusCode, row[statusCode].ToString()); + dimension.Add(requestType, row[requestType].ToString()); + + rootNodeMap.Add(t, dimension); + } + Console.WriteLine(rootNodeMap); + + + + + DateTime timeStamp = new DateTime(); + + List points = new List(); + foreach (DataRow row in inputData.Rows) { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + double value = Double.Parse(row["Value"].ToString()); + double expectedValue = 0; + if (!row["ExpectedValue"].ToString().Equals("")) { + expectedValue = Double.Parse(row["ExpectedValue"].ToString()); + } + bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); + Dictionary dimension = new Dictionary(); + dimension.Add(ocsdatacenter, row[ocsdatacenter].ToString()); + dimension.Add(appType, row[appType].ToString()); + dimension.Add(releaseAudienceGroup, row[releaseAudienceGroup].ToString()); + dimension.Add(wacDatacenter, row[wacDatacenter].ToString()); + dimension.Add(statusCode, row[statusCode].ToString()); + dimension.Add(requestType, row[requestType].ToString()); + + points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; + } + + List slices = new List(); + slices.Add(new MetricSlice(timeStamp, points)); + + //// Create an root cause localizatiom input list from csv. + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM")}; + + + var ml = new MLContext(1); + // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. + var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); + + // Create pipeline to localize root cause by decision tree. + var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit the model. + var model = pipeline.Fit(data); + + // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. + var transformedData = model.Transform(data); + + // Load input list in DataView back to Enumerable. + var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); + + foreach (var dataPoint in transformedDataPoints) + { + var rootCause = dataPoint.RootCause; + + Assert.NotNull(rootCause); + } + + var engine = ml.Model.CreatePredictionEngine(model); + + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "##EMPTY##awqegp##"); + var transformedRootCause = engine.Predict(newRootCauseInput); + + Assert.NotNull(transformedRootCause); + ////todo - will add more tests here when onboarding mock data + + } + + + private static DataTable GetDataTabletFromCSVFile(string filePath) + { + DataTable csvData = new DataTable(); + + + using (TextFieldParser csvReader = new TextFieldParser(filePath)) + { + csvReader.SetDelimiters(new string[] { "," }); + csvReader.HasFieldsEnclosedInQuotes = true; + string[] colFields = csvReader.ReadFields(); + foreach (string column in colFields) + { + DataColumn datecolumn = new DataColumn(column); + datecolumn.AllowDBNull = true; + csvData.Columns.Add(datecolumn); + } + + while (!csvReader.EndOfData) + { + string[] fieldData = csvReader.ReadFields(); + //Making empty value as null + for (int i = 0; i < fieldData.Length; i++) + { + if (fieldData[i] == "") + { + fieldData[i] = null; + } + } + csvData.Rows.Add(fieldData); + } + } + + return csvData; + } + } } From 92de1dc7ffa46347d611afb8e764d6456d261b26 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 18 Mar 2020 09:21:22 +0800 Subject: [PATCH 03/49] revert sln changes --- Microsoft.ML.sln | 83 ++++++----------- .../DTRootCauseLocalization.cs | 89 +++++++++++++++++-- .../DTRootCauseLocalizationUtils.cs | 27 +++++- 3 files changed, 138 insertions(+), 61 deletions(-) diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln index a1bb937cae..c1b573c922 100644 --- a/Microsoft.ML.sln +++ b/Microsoft.ML.sln @@ -293,9 +293,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Vision", "src\ EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TestFrameworkCommon", "test\Microsoft.ML.TestFrameworkCommon\Microsoft.ML.TestFrameworkCommon.csproj", "{A22FAD27-77E8-4460-8B92-EC7090B7173A}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.NightlyBuild.Tests", "test\Microsoft.ML.NightlyBuild.Tests\Microsoft.ML.NightlyBuild.Tests.csproj", "{A1CAC86F-F4BB-4B6D-9D18-E9AE15B3C66E}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.NightlyBuild.Tests", "test\Microsoft.ML.NightlyBuild.Tests\Microsoft.ML.NightlyBuild.Tests.csproj", "{A1CAC86F-F4BB-4B6D-9D18-E9AE15B3C66E}" EndProject -Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.NugetPackageVersionUpdater", "test\Microsoft.ML.NugetPackageVersionUpdater\Microsoft.ML.NugetPackageVersionUpdater.csproj", "{C8DB58DC-6434-4431-A81F-263D86E2A5F3}" +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Microsoft.ML.NugetPackageVersionUpdater", "test\Microsoft.ML.NugetPackageVersionUpdater\Microsoft.ML.NugetPackageVersionUpdater.csproj", "{C8DB58DC-6434-4431-A81F-263D86E2A5F3}" EndProject Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "build", "build", "{C91F81E3-B900-4968-A6DF-F53B515E97E1}" EndProject @@ -304,8 +304,6 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "netstandard2.0", "netstanda pkg\Microsoft.ML.CpuMath\build\netstandard2.0\Microsoft.ML.CpuMath.props = pkg\Microsoft.ML.CpuMath\build\netstandard2.0\Microsoft.ML.CpuMath.props EndProjectSection EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "LCATest", "LCATest\LCATest.csproj", "{27A55727-AA9B-4C27-B322-EB3E65F310B8}" -EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -1641,30 +1639,6 @@ Global {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU {3C8F910B-7F23-4D25-B521-6D5AC9570ADD}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU - {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|Any CPU.Build.0 = Debug|Any CPU {56CB0850-7341-4D71-9AE4-9EFC472D93DD}.Debug|x64.ActiveCfg = Debug|Any CPU @@ -1797,30 +1771,30 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU {C8DB58DC-6434-4431-A81F-263D86E2A5F3}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|Any CPU.ActiveCfg = Debug|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|Any CPU.Build.0 = Debug|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|x64.ActiveCfg = Debug|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug|x64.Build.0 = Debug|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|Any CPU.ActiveCfg = Release|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|Any CPU.Build.0 = Release|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|x64.ActiveCfg = Release|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release|x64.Build.0 = Release|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU - {27A55727-AA9B-4C27-B322-EB3E65F310B8}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|Any CPU.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.ActiveCfg = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug|x64.Build.0 = Debug|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|Any CPU.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.ActiveCfg = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netcoreapp3_0|x64.Build.0 = Debug-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|Any CPU.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.ActiveCfg = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Debug-netfx|x64.Build.0 = Debug-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|Any CPU.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.ActiveCfg = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release|x64.Build.0 = Release|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|Any CPU.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.ActiveCfg = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netcoreapp3_0|x64.Build.0 = Release-netcoreapp3_0|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|Any CPU.Build.0 = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.ActiveCfg = Release-netfx|Any CPU + {E2DD0721-5B0F-4606-8182-4C7EFB834518}.Release-netfx|x64.Build.0 = Release-netfx|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -1906,8 +1880,6 @@ Global {F5D11F71-2D61-4AE9-99D7-0F0B54649B15} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {A6924919-9E37-4023-8B7F-E85C8E3CC9B3} = {DA452A53-2E94-4433-B08C-041EDEC729E6} {3C8F910B-7F23-4D25-B521-6D5AC9570ADD} = {DA452A53-2E94-4433-B08C-041EDEC729E6} - {E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530} - {1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592} {56CB0850-7341-4D71-9AE4-9EFC472D93DD} = {09EADF06-BE25-4228-AB53-95AE3E15B530} {46CC5637-3DDF-4100-93FC-44BB87B2DB81} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {3817A875-278C-4140-BF66-3C4A8CA55F0D} = {D3D38B03-B557-484D-8348-8BADEE4DF592} @@ -1917,7 +1889,8 @@ Global {C8DB58DC-6434-4431-A81F-263D86E2A5F3} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} {C91F81E3-B900-4968-A6DF-F53B515E97E1} = {BF66A305-DF10-47E4-8D81-42049B149D2B} {027DBA48-85B6-46F1-9487-0B49B5057FC0} = {C91F81E3-B900-4968-A6DF-F53B515E97E1} - {27A55727-AA9B-4C27-B322-EB3E65F310B8} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4} + {E2DD0721-5B0F-4606-8182-4C7EFB834518} = {09EADF06-BE25-4228-AB53-95AE3E15B530} + {1BA5C784-52E8-4A87-8525-26B2452F2882} = {D3D38B03-B557-484D-8348-8BADEE4DF592} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D} diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 05ac7a82b4..de148a1189 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -398,8 +398,10 @@ private static void LocalizeRootCauses(RootCauseLocalizationInput src , ref Root dst = new RootCause(); dst.Items = new List { }; + //todo - get total points GetRootCauseList(src, ref dst); - GetRootCauseScore(src, ref dst); + //todo - need to update, temp for test + GetRootCauseScore(new List(), src.AnomalyDimensions, ref dst); } private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst) { @@ -480,17 +482,94 @@ private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCau } } - private static void GetRootCauseScore(RootCauseLocalizationInput src, ref RootCause dst) { + private static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst) + { if (dst.Items.Count > 1) { - // for each, get surprise + //get surprise value and explanary power value + Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + + double sumSurprise = 0; + double sumEp = 0; + List scoreList = new List(); - // then normalize + foreach (RootCauseItem item in dst.Items) + { + Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(item.RootCause, points); + if (rootCausePoint != null) + { + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + item.Direction = AnomalyDirection.Up; + } + else + { + item.Direction = AnomalyDirection.Down; + } + } + + if (anomalyPoint != null && rootCausePoint != null) + { + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); + + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + + scoreList.Add(new RootCauseScore(surprise, ep)); + sumSurprise += surprise; + sumEp += Math.Abs(ep); + } + } + + //normalize and get final score + for (int i = 0; i < scoreList.Count; i++) + { + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp); + + } } - else if (dst.Items.Count == 1) { + else if (dst.Items.Count == 1) + { //surprise and expananory , max is 1 + Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(dst.Items[0].RootCause, points); + if (rootCausePoint != null) + { + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + dst.Items[0].Direction = AnomalyDirection.Up; + } + else + { + dst.Items[0].Direction = AnomalyDirection.Down; + } + } + + Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + if (anomalyPoint != null && rootCausePoint != null) + { + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); + + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + dst.Items[0].Score = GetFinalScore(surprise, ep); + } + } + + } + + private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + { + double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + double q = rootCausePoint.Value / anomalyPoint.Value; + double surprise = 0.5 * (p * DTRootCauseLocalizationUtils.Log2(2 * p / (p + q)) + q * DTRootCauseLocalizationUtils.Log2(2 * q / (p + q))); + + return surprise; + } + + private static double GetFinalScore(double surprise, double ep) + { + //return Math.Max(1, Parent.Beta * surprise + (1 - Parent.Beta) * ep); + return 0; } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs index 56e8378d5b..cf52b48cdc 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -366,6 +366,19 @@ public static bool IsAnomalous(int pointSize, int anomalySize) return (double)anomalySize / pointSize > 0.5; } + public static Point FindPointByDimension(Dictionary dim, List points) + { + foreach (Point p in points) + { + if (p.Dimensions.Equals(dim)) + { + return p; + } + } + + return null; + } + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) { Dictionary newDim = new Dictionary(dimension); newDim[key] = value; @@ -448,7 +461,7 @@ private static bool IsLargeEntropyGain(double total, double dimEntropy) { return total - dimEntropy > 0.3; } - private static double Log2(double val) { + public static double Log2(double val) { return Math.Log(val) / Math.Log(2); } @@ -584,4 +597,16 @@ public enum AnomalyDirection /// Down = 1 } + + public class RootCauseScore + { + public double Surprise; + public double ExplainaryScore; + + public RootCauseScore(double surprise, double explainaryScore) + { + Surprise = surprise; + ExplainaryScore = explainaryScore; + } + } } From 798289c0dec314552d1fd2266275e1844588ac87 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 18 Mar 2020 17:50:41 +0800 Subject: [PATCH 04/49] add evaluation --- LCATest/Class1.cs | 72 ---- LCATest/LCATest.csproj | 28 -- .../TimeSeries/LocalizeRootCauseByDT.cs | 3 - .../TimeSeries/LocalizeRootCauseEvaluation.cs | 403 ++++++++++++++++++ .../Microsoft.ML.Samples.csproj | 3 +- docs/samples/Microsoft.ML.Samples/Program.cs | 16 +- .../DTRootCauseLocalization.cs | 6 +- .../DTRootCauseLocalizationUtils.cs | 22 +- .../Microsoft.ML.TimeSeries.Tests.csproj | 4 - .../TimeSeriesDirectApi.cs | 151 +------ 10 files changed, 441 insertions(+), 267 deletions(-) delete mode 100644 LCATest/Class1.cs delete mode 100644 LCATest/LCATest.csproj create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs diff --git a/LCATest/Class1.cs b/LCATest/Class1.cs deleted file mode 100644 index febf8c3364..0000000000 --- a/LCATest/Class1.cs +++ /dev/null @@ -1,72 +0,0 @@ -using System; - -namespace LCATest -{ - public class Class1 : BaseTestClass - { - - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } - - [Fact] - public void RootCauseLocalizationWithDT() - { - // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; - - var ml = new MLContext(1); - // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. - var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); - - // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit the model. - var model = pipeline.Fit(data); - - // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. - var transformedData = model.Transform(data); - - // Load input list in DataView back to Enumerable. - var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); - - foreach (var dataPoint in transformedDataPoints) - { - var rootCause = dataPoint.RootCause; - - Assert.NotNull(rootCause); - } - - var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); - var transformedRootCause = engine.Predict(newRootCauseInput); - - Assert.NotNull(transformedRootCause); - //todo - will add more tests here when onboarding mock data - } - } -} diff --git a/LCATest/LCATest.csproj b/LCATest/LCATest.csproj deleted file mode 100644 index 97d17fe121..0000000000 --- a/LCATest/LCATest.csproj +++ /dev/null @@ -1,28 +0,0 @@ - - - CORECLR - - Exe - - - - AnyCPU - - - - - - - - - - - - - - - - PreserveNewest - - - diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 79a5cfa839..09988dd19e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -1,9 +1,6 @@ using System; using System.Collections.Generic; -using System.Drawing; -using System.IO; using Microsoft.ML; -using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; namespace Samples.Dynamic diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs new file mode 100644 index 0000000000..e2e0e5c985 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -0,0 +1,403 @@ +using System; +using System.Collections.Generic; +using System.Data; +using Microsoft.ML; +using Microsoft.ML.TimeSeries; +using Microsoft.ML.Transforms.TimeSeries; + +using Microsoft.VisualBasic.FileIO; + +namespace Samples.Dynamic.Transforms.TimeSeries +{ + public static class LocalizeRootCauseEvaluation + { + public static void Example() + { + Dictionary> rootNodeMap = GetAnomalyRootMap(); + Dictionary>> labeledRootCauseMap = GetLabeledRootCauseMap(); + + string aggSymbol = "##EMPTY##awqegp##"; + + int totalTp = 0; + int totalFp = 0; + int totalFn = 0; + int totalCount = 0; + + bool exactly = false; + + foreach (KeyValuePair> item in rootNodeMap) + { + DateTime timeStamp = item.Key; + + int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); + string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); + List points = GetPoints(path); + List slices = new List(); + slices.Add(new MetricSlice(timeStamp, points)); + + PredictionEngine engine = GetRootCausePredictionEngine(); + + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggSymbol); + + List list = new List(); + GetRootCause(list, newRootCauseInput, engine); + + List> labeledRootCause = labeledRootCauseMap[timeStamp]; + List> detectedRootCause = ConvertRootCauseItemToDic(list); + RemoveAggSymbol(detectedRootCause, aggSymbol); + + Tuple evaluation = ScoreRootCause(detectedRootCause, labeledRootCause, exactly, timeStamp); + totalTp += evaluation.Item1; + totalFp += evaluation.Item2; + totalFn += evaluation.Item3; + totalCount++; + } + + double precision = (double)totalTp / (totalTp + totalFp); + double recall = (double)totalTp / (totalTp + totalFn); + double f1 = 2 * precision * recall / (precision + recall); + Console.WriteLine(String.Format("Total Count : {0}, TP: {1}, FP: {2}, FN: {3}", totalCount, totalTp, totalFp, totalFn)); + Console.WriteLine(String.Format("Precision : {0}, Recall: {1}, F1: {2}", precision, recall, f1)); + } + + private static Tuple ScoreRootCause(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) + { + int tp = 0; + int fp = 0; + int fn; ; + List labelSet = new List(); + foreach (Dictionary cause in detectedRootCause) + { + string tpCause = FindTruePositive(cause, labeledRootCause, exactly); + if (tpCause == null) + { + //todo - seriesalize the root cause + Console.WriteLine(String.Format("FP : timestamp - {0}, detected root cause ", timeStamp)); + Console.WriteLine(string.Join(Environment.NewLine, cause)); + } + else + { + tp++; + labelSet.Add(tpCause); + } + } + + fn = labeledRootCause.Count - labelSet.Count; + if (fn != 0) + { + List> nCause = GetNegtiveCause(labeledRootCause, labelSet); + //todo seralize + if (nCause.Count > 0) + { + Console.WriteLine(String.Format("FN : timestamp - {0}", timeStamp)); + foreach (Dictionary cause in nCause) + { + Console.WriteLine(string.Join(Environment.NewLine, nCause)); + } + } + } + + return new Tuple(tp, fp, fn); + } + + private static List> GetNegtiveCause(List> labelCauses, List labelSet) + { + List> causeList = new List>(); + foreach (Dictionary cause in labelCauses) + { + if (!labelSet.Contains(GetDicHashCode(cause))) + { + causeList.Add(cause); + } + } + return causeList; + } + + private static string FindTruePositive(Dictionary cause, List> labelCauses, bool exactly) + { + foreach (Dictionary label in labelCauses) + { + string id = GetDicHashCode(label); + int compare = CompareCause(cause, label); + if (compare == 0) + { + return id; + } + else if (!exactly && (compare == 1 || compare == 2)) + { + return id; + } + } + return null; + } + + + private static string GetDicHashCode(Dictionary dic) + { + return dic.GetHashCode().ToString(); + } + + private static int CompareCause(Dictionary detect, Dictionary label) + { + + if (detect.Equals(label)) + { + return 0; + } + else if (DTRootCauseLocalizationUtils.ContainsAll(detect, label)) + { + return 1; + } + else if (DTRootCauseLocalizationUtils.ContainsAll(label, detect)) + { + return 2; + } + return 3; + } + private static List> ConvertRootCauseItemToDic(List items) + { + List> list = new List>(); + foreach (RootCauseItem item in items) + { + list.Add(item.RootCause); + } + return list; + } + + private static void RemoveAggSymbol(List> dimensions, string aggSymbol) + { + foreach (Dictionary dim in dimensions) + { + foreach (string key in dim.Keys) + { + if (dim[key].Equals(aggSymbol)) + { + dim.Remove(key); + } + } + } + } + + private static PredictionEngine GetRootCausePredictionEngine() + { + //// Create an root cause localizatiom input list from csv. + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM") }; + + + var ml = new MLContext(1); + // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. + var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); + + // Create pipeline to localize root cause by decision tree. + var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit the model. + var model = pipeline.Fit(data); + + // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. + var transformedData = model.Transform(data); + + // Load input list in DataView back to Enumerable. + var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); + + foreach (var dataPoint in transformedDataPoints) + { + var rootCause = dataPoint.RootCause; + } + + var engine = ml.Model.CreatePredictionEngine(model); + return engine; + } + + private static string _ocsDataCenter = "OCSDatacenter"; + private static string _appType = "AppType"; + private static string _releaseAudienceGroup = "Release_AudienceGroup"; + private static string _wacDatacenter = "WACDatacenter"; + private static string _requestType = "RequestType"; + private static string _statusCode = "StatusCode"; + + private static List _dimensionKeys = new List() { _ocsDataCenter, _appType, _releaseAudienceGroup, _wacDatacenter, _statusCode, _requestType }; + + private static Dictionary> GetAnomalyRootMap() + { + var anomalyRootData = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/anomaly_root.csv"); + + Dictionary> rootNodeMap = new Dictionary>(); + foreach (DataRow row in anomalyRootData.Rows) + { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + rootNodeMap.Add(t, dimension); + } + return rootNodeMap; + } + + private static Dictionary>> GetLabeledRootCauseMap() + { + var labeldRootCause = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/labeled_root_cause.csv"); + + Dictionary>> map = new Dictionary>>(); + foreach (DataRow row in labeldRootCause.Rows) + { + // load the data, build the labled result, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + if (map.ContainsKey(t)) + { + map[t].Add(dimension); + } + else + { + map.Add(t, new List>() { dimension }); + } + } + return map; + } + + private static List GetPoints(string path) + { + + + var inputData = GetDataTabletFromCSVFile(path); + + DateTime timeStamp = new DateTime(); + + List points = new List(); + foreach (DataRow row in inputData.Rows) + { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + double value = Double.Parse(row["Value"].ToString()); + double expectedValue = 0; + if (!row["ExpectedValue"].ToString().Equals("")) + { + expectedValue = Double.Parse(row["ExpectedValue"].ToString()); + } + bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; + } + + return points; + } + + private static void GetRootCause(List rootCauseList, RootCauseLocalizationData inputData, PredictionEngine engine) + { + + RootCauseLocalizationTransformedData incrementalResult = engine.Predict(inputData); + + if (incrementalResult.RootCause.Items.Count == 0 || ( + incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].RootCause.Equals(inputData.Input.AnomalyDimensions) + )) + { + if (!rootCauseList.Contains(new RootCauseItem(inputData.Input.AnomalyDimensions))) + { + rootCauseList.Add(new RootCauseItem(inputData.Input.AnomalyDimensions)); + + } + return; + } + else + { + foreach (RootCauseItem item in incrementalResult.RootCause.Items) + { + RootCauseLocalizationData newData = new RootCauseLocalizationData(inputData.Input.AnomalyTimestamp, + item.RootCause, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); + GetRootCause(rootCauseList, newData, engine); + } + } + } + + private static DataTable GetDataTabletFromCSVFile(string filePath) + { + DataTable csvData = new DataTable(); + + + using (TextFieldParser csvReader = new TextFieldParser(filePath)) + { + csvReader.SetDelimiters(new string[] { "," }); + csvReader.HasFieldsEnclosedInQuotes = true; + string[] colFields = csvReader.ReadFields(); + foreach (string column in colFields) + { + DataColumn datecolumn = new DataColumn(column); + datecolumn.AllowDBNull = true; + csvData.Columns.Add(datecolumn); + } + + while (!csvReader.EndOfData) + { + string[] fieldData = csvReader.ReadFields(); + //Making empty value as null + for (int i = 0; i < fieldData.Length; i++) + { + if (fieldData[i] == "") + { + fieldData[i] = null; + } + } + csvData.Rows.Add(fieldData); + } + } + + return csvData; + } + + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index 812114e7a5..d0f3e2dff5 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -1,7 +1,8 @@  - netcoreapp2.1 + + netcoreapp3.0 Exe false diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 4c46399421..a3da984b16 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -13,16 +13,20 @@ internal static void RunAll() int samples = 0; foreach (var type in Assembly.GetExecutingAssembly().GetTypes()) { - var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); - - if (sample != null) + if (type.Name.Equals("LocalizeRootCauseEvaluation")) { - Console.WriteLine(type.Name); - sample.Invoke(null, null); - samples++; + var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); + + if (sample != null) + { + Console.WriteLine(type.Name); + sample.Invoke(null, null); + samples++; + } } } + Console.WriteLine("Number of samples that ran without any exception: " + samples); } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index de148a1189..05d116a2dc 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -430,16 +430,17 @@ private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCau if (anomalyTree.ParentNode == null) { return; } - + List rootCauses = new List(); // no point under anomaly dimension if (totalPoints.Count == 0) { if (anomalyTree.Leaves.Count != 0) { throw new Exception("point leaves not match with anomaly leaves"); } + + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); } else { - List rootCauses = new List(); double totalEntropy = 1; if (anomalyTree.Leaves.Count > 0) { @@ -465,7 +466,6 @@ private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCau else { rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree,src.AnomalyDimensions)); - } } else { diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs index cf52b48cdc..a648da7ce7 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -118,11 +118,9 @@ public static List LocalizeRootCauseByAnomaly(List totalPo } public static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { - var set = anomalyTree.ChildrenNodes.Keys; BestDimension best = null; - if (anomalyTree.Leaves.Count > 0) { best =SelectBestDimension(totalPoints, anomalyTree.Leaves, set.ToList(), totoalEntropy); @@ -132,6 +130,10 @@ public static List LocalizeRootCauseByDimension(List tota best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, set.ToList(), totoalEntropy); } + if (best == null) { + return new List() { new RootCauseItem(anomalyDimension) }; + } + if (IsLargeEntropyGain(totoalEntropy, best.Entropy) || best.AnomalyDis.Count == 1) { List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); @@ -465,7 +467,7 @@ public static double Log2(double val) { return Math.Log(val) / Math.Log(2); } - private static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { + public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { foreach (var item in smallDic) { if (!bigDic.ContainsKey(item.Key)) { return false; @@ -567,7 +569,7 @@ public sealed class AnomalyCause { public AnomalyCause() {} } - public sealed class RootCauseItem + public sealed class RootCauseItem: IEquatable { public double Score; public string Path; @@ -584,6 +586,18 @@ public RootCauseItem(Dictionary rootCause, string path) RootCause = rootCause; Path= path; } + public bool Equals(RootCauseItem other) + { + if (RootCause.Count == other.RootCause.Count) { + foreach (KeyValuePair item in RootCause) { + if (!other.RootCause[item.Key].Equals(item.Value)) { + return false; + } + } + return true; + } + return false; + } } public enum AnomalyDirection diff --git a/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj b/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj index cb03caf794..d604ec0a9e 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj +++ b/test/Microsoft.ML.TimeSeries.Tests/Microsoft.ML.TimeSeries.Tests.csproj @@ -1,7 +1,6 @@  CORECLR - netcoreapp3.0 @@ -15,9 +14,6 @@ - - - PreserveNewest diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 1f6cf8436e..6394dce155 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -4,7 +4,6 @@ using System; using System.Collections.Generic; -using System.Data; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; @@ -12,9 +11,6 @@ using Microsoft.ML.Transforms.TimeSeries; using Xunit; using Xunit.Abstractions; -using Microsoft.VisualBasic.CompilerServices; - -using Microsoft.VisualBasic.FileIO; namespace Microsoft.ML.Tests { @@ -477,7 +473,7 @@ public void SsaForecastPredictionEngine() // The forecasted results should be the same because the state of the models // is the same. Assert.Equal(result.Forecast, resultCopy.Forecast); - + } [Fact] @@ -525,7 +521,7 @@ private class RootCauseLocalizationData { [RootCauseLocalizationInputType] public RootCauseLocalizationInput Input { get; set; } - + public RootCauseLocalizationData() { Input = null; @@ -539,7 +535,7 @@ public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary() { new RootCauseLocalizationData(new DateTime(),new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -578,148 +574,11 @@ public void RootCauseLocalizationWithDT() } var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() {new MetricSlice(new DateTime(), new List())}, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); //todo - will add more tests here when onboarding mock data } - - [Fact] - public void RootCauseLocalizationWithCSV() - { - string ocsdatacenter = "OCSDatacenter"; - string appType = "AppType"; - string releaseAudienceGroup = "Release_AudienceGroup"; - string wacDatacenter = "WACDatacenter"; - string requestType = "RequestType"; - string statusCode = "StatusCode"; - - var anomalyRootData = GetDataTabletFromCSVFile("C:/excel/anomaly_root.csv"); - - var inputData = GetDataTabletFromCSVFile("C:/excel/input/1563224400.csv"); - - Dictionary> rootNodeMap = new Dictionary>(); - foreach (DataRow row in anomalyRootData.Rows) - { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - - Dictionary dimension = new Dictionary(); - dimension.Add(ocsdatacenter, row[ocsdatacenter].ToString()); - dimension.Add(appType, row[appType].ToString()); - dimension.Add(releaseAudienceGroup, row[releaseAudienceGroup].ToString()); - dimension.Add(wacDatacenter, row[wacDatacenter].ToString()); - dimension.Add(statusCode, row[statusCode].ToString()); - dimension.Add(requestType, row[requestType].ToString()); - - rootNodeMap.Add(t, dimension); - } - Console.WriteLine(rootNodeMap); - - - - - DateTime timeStamp = new DateTime(); - - List points = new List(); - foreach (DataRow row in inputData.Rows) { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - double value = Double.Parse(row["Value"].ToString()); - double expectedValue = 0; - if (!row["ExpectedValue"].ToString().Equals("")) { - expectedValue = Double.Parse(row["ExpectedValue"].ToString()); - } - bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); - Dictionary dimension = new Dictionary(); - dimension.Add(ocsdatacenter, row[ocsdatacenter].ToString()); - dimension.Add(appType, row[appType].ToString()); - dimension.Add(releaseAudienceGroup, row[releaseAudienceGroup].ToString()); - dimension.Add(wacDatacenter, row[wacDatacenter].ToString()); - dimension.Add(statusCode, row[statusCode].ToString()); - dimension.Add(requestType, row[requestType].ToString()); - - points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; - } - - List slices = new List(); - slices.Add(new MetricSlice(timeStamp, points)); - - //// Create an root cause localizatiom input list from csv. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM")}; - - - var ml = new MLContext(1); - // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. - var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); - - // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit the model. - var model = pipeline.Fit(data); - - // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. - var transformedData = model.Transform(data); - - // Load input list in DataView back to Enumerable. - var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); - - foreach (var dataPoint in transformedDataPoints) - { - var rootCause = dataPoint.RootCause; - - Assert.NotNull(rootCause); - } - - var engine = ml.Model.CreatePredictionEngine(model); - - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "##EMPTY##awqegp##"); - var transformedRootCause = engine.Predict(newRootCauseInput); - - Assert.NotNull(transformedRootCause); - ////todo - will add more tests here when onboarding mock data - - } - - - private static DataTable GetDataTabletFromCSVFile(string filePath) - { - DataTable csvData = new DataTable(); - - - using (TextFieldParser csvReader = new TextFieldParser(filePath)) - { - csvReader.SetDelimiters(new string[] { "," }); - csvReader.HasFieldsEnclosedInQuotes = true; - string[] colFields = csvReader.ReadFields(); - foreach (string column in colFields) - { - DataColumn datecolumn = new DataColumn(column); - datecolumn.AllowDBNull = true; - csvData.Columns.Add(datecolumn); - } - - while (!csvReader.EndOfData) - { - string[] fieldData = csvReader.ReadFields(); - //Making empty value as null - for (int i = 0; i < fieldData.Length; i++) - { - if (fieldData[i] == "") - { - fieldData[i] = null; - } - } - csvData.Rows.Add(fieldData); - } - } - - return csvData; - } - } } From f2e128dbb9f04dece577769c0c13f5ce41bea024 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 20 Mar 2020 11:08:15 +0800 Subject: [PATCH 05/49] temp save for internal review --- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 13 +- .../DTRootCauseLocalization.cs | 322 ++++++++++-------- .../DTRootCauseLocalizationUtils.cs | 10 +- 3 files changed, 188 insertions(+), 157 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index e2e0e5c985..f4c8f9f7be 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -71,9 +71,10 @@ private static Tuple ScoreRootCause(List ScoreRootCause(List> nCause = GetNegtiveCause(labeledRootCause, labelSet); - //todo seralize if (nCause.Count > 0) { Console.WriteLine(String.Format("FN : timestamp - {0}", timeStamp)); foreach (Dictionary cause in nCause) { - Console.WriteLine(string.Join(Environment.NewLine, nCause)); + Console.WriteLine(string.Join(Environment.NewLine, cause)); + Console.WriteLine("---------------------"); } + } } @@ -200,11 +202,6 @@ private static PredictionEngine(transformedData, false); - foreach (var dataPoint in transformedDataPoints) - { - var rootCause = dataPoint.RootCause; - } - var engine = ml.Model.CreatePredictionEngine(model); return engine; } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 05d116a2dc..3667d1805a 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -107,11 +107,12 @@ public sealed class RootCauseLocalizationInput public List Slices { get; set; } - public DTRootCauseLocalizationEstimator.AggregateType AggType{ get; set; } + public DTRootCauseLocalizationEstimator.AggregateType AggType { get; set; } public string AggSymbol { get; set; } - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateType, string aggregateSymbol) { + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateType, string aggregateSymbol) + { AnomalyTimestamp = anomalyTimestamp; AnomalyDimensions = anomalyDimensions; Slices = slices; @@ -130,7 +131,8 @@ public sealed class MetricSlice public DateTime TimeStamp { get; set; } public List Points { get; set; } - public MetricSlice(DateTime timeStamp, List points) { + public MetricSlice(DateTime timeStamp, List points) + { TimeStamp = timeStamp; Points = points; } @@ -254,10 +256,10 @@ private static VersionInfo GetVersionInfo() /// Weight for generating score. /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). - internal DTRootCauseLocalizationTransformer(IHostEnvironment env,double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + internal DTRootCauseLocalizationTransformer(IHostEnvironment env, double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns) { - Host.CheckUserArg(beta >=0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); + Host.CheckUserArg(beta >= 0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); _beta = beta; } @@ -270,7 +272,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - return new DTRootCauseLocalizationTransformer(env,options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) + return new DTRootCauseLocalizationTransformer(env, options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) .MakeDataTransform(input); } @@ -319,7 +321,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int private sealed class Mapper : OneToOneMapperBase { - private DTRootCauseLocalizationTransformer _parent; + private readonly DTRootCauseLocalizationTransformer _parent; public Mapper(DTRootCauseLocalizationTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) @@ -375,201 +377,227 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func { }; + private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause dst) + { + dst = new RootCause(); + dst.Items = new List { }; - //todo - get total points - GetRootCauseList(src, ref dst); - //todo - need to update, temp for test - GetRootCauseScore(new List(), src.AnomalyDimensions, ref dst); - } + DimensionInfo dimensionInfo = DTRootCauseLocalizationUtils.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); + if (dimensionInfo.AggDim.Count == 0) + { + return; + } + Dictionary subDim = DTRootCauseLocalizationUtils.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); + + List totalPoints = GetTotalPointsForAnomalyTimestamp(src, subDim); - private static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst) { - DimensionInfo dimensionInfo = DTRootCauseLocalizationUtils.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); - if (dimensionInfo.AggDim.Count == 0) { - return; + GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); + + GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst); } - List points = new List(); - foreach (MetricSlice slice in src.Slices) + private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) { - if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + List points = new List(); + foreach (MetricSlice slice in src.Slices) { - points = slice.Points; + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + points = slice.Points; + } } - } - - Dictionary subDim = DTRootCauseLocalizationUtils.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); - - List totalPoints = DTRootCauseLocalizationUtils.SelectPoints(points, subDim); - PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol); - PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim,subDim,src.AggSymbol, true); + List totalPoints = DTRootCauseLocalizationUtils.SelectPoints(points, subDim); - // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo - if (anomalyTree.ParentNode == null) { - return; + return totalPoints; } - List rootCauses = new List(); - // no point under anomaly dimension - if (totalPoints.Count == 0) { - if (anomalyTree.Leaves.Count != 0) { - throw new Exception("point leaves not match with anomaly leaves"); - } - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); - } - else + private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) { - double totalEntropy = 1; - if (anomalyTree.Leaves.Count > 0) + List points = new List(); + foreach (MetricSlice slice in src.Slices) { - totalEntropy= DTRootCauseLocalizationUtils.GetEntropy(totalPoints.Count, anomalyTree.Leaves.Count); + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + points = slice.Points; + } } - if (totalEntropy > 0.9) + PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol); + PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, true); + + // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo + if (anomalyTree.ParentNode == null) { - if (dimensionInfo.AggDim.Count == 1) { - //root cause is itself; - rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); - } - else { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree,pointTree, totalEntropy,src.AnomalyDimensions)); + return; + } + List rootCauses = new List(); + // no point under anomaly dimension + if (totalPoints.Count == 0) + { + if (anomalyTree.Leaves.Count != 0) + { + throw new Exception("point leaves not match with anomaly leaves"); } + + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); } - else if (totalEntropy < 0.5) + else { - if (DTRootCauseLocalizationUtils.IsAnomalous(totalPoints.Count, anomalyTree.Leaves.Count)) { - //root cause is itself; - dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); + double totalEntropy = 1; + if (anomalyTree.Leaves.Count > 0) + { + totalEntropy = DTRootCauseLocalizationUtils.GetEntropy(totalPoints.Count, anomalyTree.Leaves.Count); } - else + + if (totalEntropy > 0.9) { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree,src.AnomalyDimensions)); + if (dimensionInfo.AggDim.Count == 1) + { + //root cause is itself; + rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } } - } - else { - if (dimensionInfo.AggDim.Count == 1) { - //because we have known the dimension, so by anomaly - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + else if (totalEntropy < 0.5) + { + if (DTRootCauseLocalizationUtils.IsAnomalous(totalPoints.Count, anomalyTree.Leaves.Count)) + { + //root cause is itself; + dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + } } - else { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints,anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + else + { + if (dimensionInfo.AggDim.Count == 1) + { + //because we have known the dimension, so by anomaly + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } } - } - dst.Items = rootCauses; + dst.Items = rootCauses; + } } - } - private static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst) - { - - if (dst.Items.Count > 1) + private void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst) { - //get surprise value and explanary power value - Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); - - double sumSurprise = 0; - double sumEp = 0; - List scoreList = new List(); - foreach (RootCauseItem item in dst.Items) + if (dst.Items.Count > 1) { - Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(item.RootCause, points); - if (rootCausePoint != null) + //get surprise value and explanary power value + Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + + double sumSurprise = 0; + double sumEp = 0; + List scoreList = new List(); + + foreach (RootCauseItem item in dst.Items) { - if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(item.RootCause, points); + if (rootCausePoint != null) { - item.Direction = AnomalyDirection.Up; + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + item.Direction = AnomalyDirection.Up; + } + else + { + item.Direction = AnomalyDirection.Down; + } } - else + + if (anomalyPoint != null && rootCausePoint != null) { - item.Direction = AnomalyDirection.Down; + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); + + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + + scoreList.Add(new RootCauseScore(surprise, ep)); + sumSurprise += surprise; + sumEp += Math.Abs(ep); } } - if (anomalyPoint != null && rootCausePoint != null) + //normalize and get final score + for (int i = 0; i < scoreList.Count; i++) { - double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); - - double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp); - scoreList.Add(new RootCauseScore(surprise, ep)); - sumSurprise += surprise; - sumEp += Math.Abs(ep); } } - - //normalize and get final score - for (int i = 0; i < scoreList.Count; i++) - { - dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp); - - } - } - else if (dst.Items.Count == 1) - { - //surprise and expananory , max is 1 - Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(dst.Items[0].RootCause, points); - if (rootCausePoint != null) + else if (dst.Items.Count == 1) { - if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + //surprise and expananory , max is 1 + Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(dst.Items[0].RootCause, points); + if (rootCausePoint != null) { - dst.Items[0].Direction = AnomalyDirection.Up; + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + dst.Items[0].Direction = AnomalyDirection.Up; + } + else + { + dst.Items[0].Direction = AnomalyDirection.Down; + } } - else + + Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + if (anomalyPoint != null && rootCausePoint != null) { - dst.Items[0].Direction = AnomalyDirection.Down; + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + dst.Items[0].Score = GetFinalScore(surprise, ep); } } - - Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); - if (anomalyPoint != null && rootCausePoint != null) - { - double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); - - double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); - dst.Items[0].Score = GetFinalScore(surprise, ep); - } - } - } - - private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) - { - double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; - double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * DTRootCauseLocalizationUtils.Log2(2 * p / (p + q)) + q * DTRootCauseLocalizationUtils.Log2(2 * q / (p + q))); + private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + { + double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + double q = rootCausePoint.Value / anomalyPoint.Value; + double surprise = 0.5 * (p * DTRootCauseLocalizationUtils.Log2(2 * p / (p + q)) + q * DTRootCauseLocalizationUtils.Log2(2 * q / (p + q))); - return surprise; - } + return surprise; + } - private static double GetFinalScore(double surprise, double ep) - { - //return Math.Max(1, Parent.Beta * surprise + (1 - Parent.Beta) * ep); - return 0; + private double GetFinalScore(double surprise, double ep) + { + return Math.Max(1, _parent._beta * surprise + (1 - _parent._beta) * ep); + } } } @@ -627,8 +655,8 @@ public enum AggregateType /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). /// The weight for generating score in output result. [BestFriend] - internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta,params(string outputColumnName, string inputColumnName)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta,columns)) + internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, columns)) { } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs index a648da7ce7..61aa2fe04e 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -372,8 +372,14 @@ public static Point FindPointByDimension(Dictionary dim, List item in p.Dimensions) { + if (!dim[item.Key].Equals(item.Value)) { + isEqual = false; + } + } + + if (isEqual) { return p; } } From 51569e3059b66ea8323eb36b590542fb2f5097e0 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 20 Mar 2020 11:13:33 +0800 Subject: [PATCH 06/49] rename function --- src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 3667d1805a..867d0812d1 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -370,7 +370,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func Date: Sun, 22 Mar 2020 21:08:56 +0800 Subject: [PATCH 07/49] temp save bottom up points for switch desktop --- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 48 +- .../DTRootCauseLocalization.cs | 87 ++-- .../DTRootCauseLocalizationUtils.cs | 419 ++++++++++++++---- 3 files changed, 394 insertions(+), 160 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index f4c8f9f7be..0a756a2ef5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -29,28 +29,33 @@ public static void Example() { DateTime timeStamp = item.Key; - int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); - string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); - List points = GetPoints(path); - List slices = new List(); - slices.Add(new MetricSlice(timeStamp, points)); + DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", + System.Globalization.CultureInfo.InvariantCulture); + if (timeStamp.CompareTo(filterTime).Equals(0)) + { + int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); + string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); + List points = GetPoints(path); + List slices = new List(); + slices.Add(new MetricSlice(timeStamp, points)); - PredictionEngine engine = GetRootCausePredictionEngine(); + PredictionEngine engine = GetRootCausePredictionEngine(); - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggSymbol); + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggSymbol); - List list = new List(); - GetRootCause(list, newRootCauseInput, engine); + List list = new List(); + GetRootCause(list, newRootCauseInput, engine); - List> labeledRootCause = labeledRootCauseMap[timeStamp]; - List> detectedRootCause = ConvertRootCauseItemToDic(list); - RemoveAggSymbol(detectedRootCause, aggSymbol); + List> labeledRootCause = labeledRootCauseMap[timeStamp]; + List> detectedRootCause = ConvertRootCauseItemToDic(list); + RemoveAggSymbol(detectedRootCause, aggSymbol); - Tuple evaluation = ScoreRootCause(detectedRootCause, labeledRootCause, exactly, timeStamp); - totalTp += evaluation.Item1; - totalFp += evaluation.Item2; - totalFn += evaluation.Item3; - totalCount++; + Tuple evaluation = ScoreRootCause(detectedRootCause, labeledRootCause, exactly, timeStamp); + totalTp += evaluation.Item1; + totalFp += evaluation.Item2; + totalFn += evaluation.Item3; + totalCount++; + } } double precision = (double)totalTp / (totalTp + totalFp); @@ -64,7 +69,7 @@ private static Tuple ScoreRootCause(List labelSet = new List(); foreach (Dictionary cause in detectedRootCause) { @@ -86,10 +91,10 @@ private static Tuple ScoreRootCause(List> nCause = GetNegtiveCause(labeledRootCause, labelSet); + List> nCause = GetFNegtiveCause(labeledRootCause, labelSet); if (nCause.Count > 0) { - Console.WriteLine(String.Format("FN : timestamp - {0}", timeStamp)); + Console.WriteLine(String.Format("FN : timestamp - {0}, labeled root cause", timeStamp)); foreach (Dictionary cause in nCause) { Console.WriteLine(string.Join(Environment.NewLine, cause)); @@ -102,7 +107,7 @@ private static Tuple ScoreRootCause(List(tp, fp, fn); } - private static List> GetNegtiveCause(List> labelCauses, List labelSet) + private static List> GetFNegtiveCause(List> labelCauses, List labelSet) { List> causeList = new List>(); foreach (Dictionary cause in labelCauses) @@ -141,7 +146,6 @@ private static string GetDicHashCode(Dictionary dic) private static int CompareCause(Dictionary detect, Dictionary label) { - if (detect.Equals(label)) { return 0; diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 867d0812d1..5f961b888b 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -107,11 +107,11 @@ public sealed class RootCauseLocalizationInput public List Slices { get; set; } - public DTRootCauseLocalizationEstimator.AggregateType AggType { get; set; } + public AggregateType AggType { get; set; } public string AggSymbol { get; set; } - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateType, string aggregateSymbol) + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; AnomalyDimensions = anomalyDimensions; @@ -436,17 +436,8 @@ private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) { - List points = new List(); - foreach (MetricSlice slice in src.Slices) - { - if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) - { - points = slice.Points; - } - } - - PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol); - PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, true); + PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo if (anomalyTree.ParentNode == null) @@ -469,7 +460,8 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, double totalEntropy = 1; if (anomalyTree.Leaves.Count > 0) { - totalEntropy = DTRootCauseLocalizationUtils.GetEntropy(totalPoints.Count, anomalyTree.Leaves.Count); + // update from total points to pointTree.Leaves.Count + totalEntropy = DTRootCauseLocalizationUtils.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); } if (totalEntropy > 0.9) @@ -481,31 +473,38 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, } else { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); - } - } - else if (totalEntropy < 0.5) - { - if (DTRootCauseLocalizationUtils.IsAnomalous(totalPoints.Count, anomalyTree.Leaves.Count)) - { - //root cause is itself; - dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); - } - else - { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + // update from total points to pointTree.Leaves + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } } + //comment here, as LocalizeRootCauseByAnomaly is not right, for example, data 2019-08-08T19:30:00Z seconds - 1566383400 + //else if (totalEntropy < 0.5) + //{ + // // todo- need improve, if anomalytree.leave.count == 0, need to filter by all up level + // //if (DTRootCauseLocalizationUtils.IsAnomalous(pointTree.Leaves.Count, anomalyTree.Leaves.Count)) + // //{ + // // //root cause is itself; + // // dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); + // //} + // //else + // //{ + // rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + // //} + //} else { - if (dimensionInfo.AggDim.Count == 1) + // remove this part, has no different to the result + //if (dimensionInfo.AggDim.Count == 1) + //{ + // //update totalPoints to .leaves + // //because we have known the dimension, so by anomaly + // //rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + // rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(pointTree.Leaves, anomalyTree, src.AnomalyDimensions)); + //} + //else { - //because we have known the dimension, so by anomaly - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); - } - else - { - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(totalPoints, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + //update totalPoints to .leaves + rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } } @@ -628,26 +627,6 @@ internal static class Defaults public const double Beta = 0.5; } - public enum AggregateType - { - /// - /// Make the aggregate type as sum. - /// - Sum = 0, - /// - /// Make the aggregate type as average. - /// - Avg = 1, - /// - /// Make the aggregate type as min. - /// - Min = 2, - /// - /// Make the aggregate type as max. - /// - Max = 3 - } - /// /// Localize root cause. /// diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs index 61aa2fe04e..9c48818c01 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -1,20 +1,24 @@ using System; using System.Collections.Generic; using System.Linq; +using Microsoft.ML.Transforms; namespace Microsoft.ML.TimeSeries { public class DTRootCauseLocalizationUtils { - public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { + public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); - foreach (KeyValuePair entry in dimensions) { + foreach (KeyValuePair entry in dimensions) + { string key = entry.Key; if (aggSymbol.Equals(entry.Value)) { info.AggDim.Add(key); } - else { + else + { info.DetailDim.Add(key); } } @@ -22,30 +26,38 @@ public static DimensionInfo SeperateDimension(Dictionary dimensi return info; } - public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, bool filterByAnomaly = false) { + public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol,AggregateType aggType, bool filterByAnomaly = false) + { + //todo- would be better to add check and fix for child point and leaves point PointTree tree = PointTree.CreateDefaultInstance(); - foreach (Point point in pointList) { - bool isValidPoint =true; - if (filterByAnomaly) { + foreach (Point point in pointList) + { + bool isValidPoint = true; + if (filterByAnomaly) + { isValidPoint = point.IsAnomaly == true; } - if (ContainsAll(point.Dimensions, subDim) && isValidPoint) { + if (ContainsAll(point.Dimensions, subDim) && isValidPoint) + { if (aggDims.Count == 0) { tree.ParentNode = point; tree.Leaves.Add(point); } - else { + else + { int aggNum = 0; string nextDim = null; - foreach (string dim in aggDims) { + foreach (string dim in aggDims) + { if (IsAggregationDimension(point.Dimensions[dim], aggSymbol)) { aggNum++; } - else { + else + { nextDim = dim; } } @@ -54,14 +66,17 @@ public static PointTree BuildPointTree(List pointList, List aggDi { tree.ParentNode = point; } - else if (aggNum == aggDims.Count - 1) { - if (!tree.ChildrenNodes.ContainsKey(nextDim)) { + else if (aggNum == aggDims.Count - 1) + { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) + { tree.ChildrenNodes.Add(nextDim, new List()); } tree.ChildrenNodes[nextDim].Add(point); } - if (aggNum == 0) { + if (aggNum == 0) + { tree.Leaves.Add(point); } @@ -69,78 +84,188 @@ public static PointTree BuildPointTree(List pointList, List aggDi } } } + + // rebuild the tree from bottom to up incase the child node data is incomplete + + return tree; + } + + private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol) { + + if (tree.Leaves.Count == 0) return tree; + + Dictionary> map = new Dictionary>(); + foreach (Point p in tree.Leaves) { + foreach (KeyValuePair keyValuePair in p.Dimensions) { + if (map.ContainsKey(keyValuePair.Key)) + { + map[keyValuePair.Key].Add(keyValuePair.Value); + } + else { + map.Add(keyValuePair.Key, new HashSet() { keyValuePair.Value}); + } + } + } + + foreach (KeyValuePair> pair in map) { + if (tree.ChildrenNodes.ContainsKey(pair.Key)) + { + if (tree.ChildrenNodes[pair.Key].Count < pair.Value.Count) { + foreach(string value in pair.Value) + { + if (!IsAggDimensionExisted(pair.Key, value, tree.ChildrenNodes[pair.Key])) { + Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); + tree.ChildrenNodes[pair.Key].Add(p); + } + } + } + } + else { + List childPoints = new List(); + foreach (string value in pair.Value) { + //simulate the aggregation value + Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); + childPoints.Add(p); + } + + tree.ChildrenNodes.Add(pair.Key, childPoints); + } + } + return tree; } - public static Dictionary GetsubDim(Dictionary dimension, List keyList) { + private static bool IsAggDimensionExisted(string key, string value, List points) { + foreach (Point p in points) { + if (p.Dimensions[key].Equals(value)) { + return true; + } + } + return false; + } + + private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) { + Point p = null; + + Dictionary dimension = new Dictionary(); + + dimension.Add(key, keyValue); + + foreach (KeyValuePair pair in leaves[0].Dimensions) + { + if (!pair.Key.Equals(key)) + { + dimension.Add(pair.Key, aggSymbol); + } + } + + if (type.Equals(AggregateType.Sum)) { + + bool isAnomaly = false; + double value = 0; + double expectedValue = 0; + foreach (Point leave in leaves) { + + if (leave.Dimensions.ContainsKey(key) && leave.Dimensions[key].Equals(keyValue)) { + value += leave.Value; + expectedValue = leave.ExpectedValue; + isAnomaly = isAnomaly || leave.IsAnomaly; + } + } + + p = new Point(value, expectedValue, isAnomaly, dimension); + } + + return p; + } + + public static Dictionary GetsubDim(Dictionary dimension, List keyList) + { Dictionary subDim = new Dictionary(); - foreach (String dim in keyList) { + foreach (String dim in keyList) + { subDim.Add(dim, dimension[dim]); } return subDim; } - public static List SelectPoints(List points, Dictionary subDim) { + public static List SelectPoints(List points, Dictionary subDim) + { List list = new List(); - foreach (Point point in points) { - if (ContainsAll(point.Dimensions, subDim)) { - list.Add(point); + foreach (Point point in points) + { + if (ContainsAll(point.Dimensions, subDim)) + { + //needs to remove duplicate + if (!list.Contains(point)) + { + list.Add(point); + } } } return list; } - public static List LocalizeRootCauseByAnomaly(List totalPoints, PointTree anomalyTree, Dictionary anomalyDimension) { + public static List LocalizeRootCauseByAnomaly(List totalPoints, PointTree anomalyTree, Dictionary anomalyDimension) + { if (anomalyTree.ChildrenNodes.Count == 0) { //As there is no children node under such dimension, the root cause should be itself return new List() { new RootCauseItem(anomalyDimension) }; } - else { + else + { AnomalyCause cause = GetAnomalyCause(anomalyTree, totalPoints); if (cause == null) { //As the cause couldn't be found, the root cause should be itself return new List() { new RootCauseItem(anomalyDimension) }; } - else { + else + { List causes = new List(); // For the found causes, we return the result - foreach (Point anomaly in cause.Anomalies){ - causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, cause.DimensionKey,anomaly.Dimensions[cause.DimensionKey]))) ; + foreach (Point anomaly in cause.Anomalies) + { + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, cause.DimensionKey, anomaly.Dimensions[cause.DimensionKey]))); } return causes; } } } - public static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { - var set = anomalyTree.ChildrenNodes.Keys; + public static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) + { + var set = anomalyTree.ChildrenNodes.Keys; BestDimension best = null; if (anomalyTree.Leaves.Count > 0) { - best =SelectBestDimension(totalPoints, anomalyTree.Leaves, set.ToList(), totoalEntropy); + best = SelectBestDimension(totalPoints, anomalyTree.Leaves, set.ToList(), totoalEntropy); } - else { + else + { //has no leaves information, should calculate the entropy information according to the children nodes best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, set.ToList(), totoalEntropy); } - if (best == null) { + if (best == null) + { return new List() { new RootCauseItem(anomalyDimension) }; } - if (IsLargeEntropyGain(totoalEntropy, best.Entropy) || best.AnomalyDis.Count == 1) + // when total and best.entroy all equals 0, do we need to go deep down? would it be better if we remove the else logic, yes- remove the else logic will improve tp, reduce fn, and improve accuracy + //if (IsLargeEntropyGain(totoalEntropy, best.Entropy) || best.AnomalyDis.Count == 1) { + // need to improve here, whether can go deeper List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); if (children == null) { //As the cause couldn't be found, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension) }; + return new List() { new RootCauseItem(anomalyDimension, best.DimensionKey) }; } else { @@ -153,25 +278,30 @@ public static List LocalizeRootCauseByDimension(List tota return causes; } } - else { - //As the entropy gain for this best dimension is small, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension) }; - } + //else + //{ + // //As the entropy gain for this best dimension is small, the root cause should be itself + // return new List() { new RootCauseItem(anomalyDimension) }; + //} } - public static double GetEntropy(int totalNum, int anomalyNum) { - double ratio =(double)anomalyNum / totalNum; - if (ratio == 0 || ratio == 1) { + public static double GetEntropy(int totalNum, int anomalyNum) + { + double ratio = (double)anomalyNum / totalNum; + if (ratio == 0 || ratio == 1) + { return 0; } return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); } - public static Dictionary GetEntropyList(BestDimension best, List points) { + public static Dictionary GetEntropyList(BestDimension best, List points) + { Dictionary list = new Dictionary(); // need to update, change to children if necessary - foreach(Point point in points) { + foreach (Point point in points) + { string dimVal = point.Dimensions[best.DimensionKey]; int pointSize = GetPointSize(best, dimVal); int anomalySize = GetAnomalyPointSize(best, dimVal); @@ -183,12 +313,12 @@ public static Dictionary GetEntropyList(BestDimension best, List return list; } - public static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, String dimKey) + public static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) { Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); - anomalyPoints.OrderBy(x=>x.Delta); + anomalyPoints.OrderBy(x => x.Delta); if (root.Delta > 0) { @@ -203,12 +333,14 @@ public static List GetTopAnomaly(List anomalyPoints, Point root, L double delta = 0; double preDelta = 0; List causeList = new List(); - foreach(Point anomaly in anomalyPoints) + foreach (Point anomaly in anomalyPoints) { - if (anomaly.Delta * root.Delta <= 0) - { - break; - } + // if anomaly.delta equals to 0 , which means we have no data here, should not break, so update from <= to < + // if oposite is different, shouldn't divide it. + // if (anomaly.Delta * root.Delta < 0) + // { + // break; + //} if (StopAnomalyComparison(delta, root.Delta, anomaly.Delta, preDelta)) { @@ -229,16 +361,18 @@ public static List GetTopAnomaly(List anomalyPoints, Point root, L return null; } - public static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + public static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; + //double maxGain = Int32.MinValue; - foreach (String dimKey in aggDim) + BestDimension best = null; + foreach (string dimKey in aggDim) { BestDimension dimension = BestDimension.CreateDefaultInstance(); - dimension.DimensionKey =dimKey; + dimension.DimensionKey = dimKey; UpdateDistribution(dimension.PointDis, totalPoints, dimKey); UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey); @@ -250,13 +384,29 @@ public static BestDimension SelectBestDimension(List totalPoints, List> Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; + //double maxGain = Int32.MinValue; + BestDimension best = null; + foreach (String dimKey in aggDim) { BestDimension dimension = BestDimension.CreateDefaultInstance(); @@ -281,17 +434,34 @@ public static BestDimension SelectBestDimension(Dictionary> double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); entroyGainRatioMap.Add(dimension, gainRatio); + ////get anomaly value range, if range = 1, choose best dim + //int valueCount = GetValueCountInDimKey(anomalyChildren[dimKey], dimKey); + //if (valueCount == 1) + //{ + // if (maxGain < gain) + // { + // best = dimension; + // maxGain = gain; + // } + //} + sumGain += gain; } double meanGain = sumGain / aggDim.Count(); - BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + if (best != null) + { + return best; + } + + best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); return best; } - private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap,double meanGain) { + private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + { BestDimension best = null; foreach (KeyValuePair dimension in entropyGainMap) { @@ -373,13 +543,16 @@ public static Point FindPointByDimension(Dictionary dim, List item in p.Dimensions) { - if (!dim[item.Key].Equals(item.Value)) { + foreach (KeyValuePair item in p.Dimensions) + { + if (!dim[item.Key].Equals(item.Value)) + { isEqual = false; } } - if (isEqual) { + if (isEqual) + { return p; } } @@ -387,7 +560,8 @@ public static Point FindPointByDimension(Dictionary dim, List UpdateDimensionValue(Dictionary dimension, string key, string value) { + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) + { Dictionary newDim = new Dictionary(dimension); newDim[key] = value; return newDim; @@ -436,7 +610,7 @@ private static double GetDimensionInstrinsicValue(Dictionary pointD foreach (string key in anomalyDis.Keys) { - instrinsicValue -= Log2( (double)anomalyDis[key] / pointDis[key]) * anomalyDis[key] / pointDis[key]; + instrinsicValue -= Log2((double)anomalyDis[key] / pointDis[key]) * anomalyDis[key] / pointDis[key]; } return instrinsicValue; @@ -456,39 +630,69 @@ private static void UpdateDistribution(Dictionary distribution, Lis { foreach (Point point in points) { - String dimVal = point.Dimensions[dimKey]; + string dimVal = point.Dimensions[dimKey]; if (!distribution.ContainsKey(dimVal)) { distribution.Add(dimVal, 0); } - distribution[dimVal]= distribution[dimVal] + 1; + distribution[dimVal] = distribution[dimVal] + 1; } } - private static bool IsLargeEntropyGain(double total, double dimEntropy) { - return total - dimEntropy > 0.3; + private static int GetValueCountInDimKey(List points, string dimKey) + { + HashSet values = new HashSet(); + + foreach (Point point in points) + { + string dimVal = point.Dimensions[dimKey]; + if (!values.Contains(dimVal)) + { + values.Add(dimVal); + } + } + + return values.Count; + } + + //threshold needs to be updated, example, 8/23/2019 11:00:00 AM + private static bool IsLargeEntropyGain(double total, double dimEntropy) + { + return total - dimEntropy > 0.2; } - public static double Log2(double val) { + public static double Log2(double val) + { return Math.Log(val) / Math.Log(2); } - public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { - foreach (var item in smallDic) { - if (!bigDic.ContainsKey(item.Key)) { + public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) + { + foreach (var item in smallDic) + { + if (!bigDic.ContainsKey(item.Key)) + { + return false; + } + + if (bigDic.ContainsKey(item.Key) && !bigDic[item.Key].Equals(smallDic[item.Key])) + { return false; } } return true; } - private static bool IsAggregationDimension(string val, string aggSymbol) { + private static bool IsAggregationDimension(string val, string aggSymbol) + { return val.Equals(aggSymbol); } - private static int GetPointSize(BestDimension dim, string key) { + private static int GetPointSize(BestDimension dim, string key) + { int pointSize = 0; - if (dim.PointDis.ContainsKey(key)) { + if (dim.PointDis.ContainsKey(key)) + { pointSize = dim.PointDis[key]; } return pointSize; @@ -505,11 +709,13 @@ private static int GetAnomalyPointSize(BestDimension dim, string key) } } - public class DimensionInfo { + public class DimensionInfo + { public List DetailDim { get; set; } public List AggDim { get; set; } - public static DimensionInfo CreateDefaultInstance() { + public static DimensionInfo CreateDefaultInstance() + { DimensionInfo instance = new DimensionInfo(); instance.DetailDim = new List(); instance.AggDim = new List(); @@ -517,7 +723,8 @@ public static DimensionInfo CreateDefaultInstance() { } } - public class PointTree { + public class PointTree + { public Point ParentNode; public Dictionary> ChildrenNodes; public List Leaves; @@ -531,7 +738,7 @@ public static PointTree CreateDefaultInstance() } } - public sealed class Point + public sealed class Point : IEquatable { public double Value { get; set; } public double ExpectedValue { get; set; } @@ -547,20 +754,40 @@ public Point(double value, double expectedValue, bool isAnomaly, Dictionary item in Dimensions) + { + if (!other.Dimensions[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + + public override int GetHashCode() + { + return Dimensions.GetHashCode(); + } } - public sealed class BestDimension { + public sealed class BestDimension + { public string DimensionKey; public double Entropy; public Dictionary AnomalyDis; public Dictionary PointDis; public BestDimension() { } - public static BestDimension CreateDefaultInstance() { + public static BestDimension CreateDefaultInstance() + { BestDimension instance = new BestDimension(); instance.AnomalyDis = new Dictionary(); instance.PointDis = new Dictionary(); @@ -568,14 +795,15 @@ public static BestDimension CreateDefaultInstance() { } } - public sealed class AnomalyCause { + public sealed class AnomalyCause + { public string DimensionKey; public List Anomalies; - public AnomalyCause() {} + public AnomalyCause() { } } - public sealed class RootCauseItem: IEquatable + public sealed class RootCauseItem : IEquatable { public double Score; public string Path; @@ -590,13 +818,16 @@ public RootCauseItem(Dictionary rootCause) public RootCauseItem(Dictionary rootCause, string path) { RootCause = rootCause; - Path= path; + Path = path; } public bool Equals(RootCauseItem other) { - if (RootCause.Count == other.RootCause.Count) { - foreach (KeyValuePair item in RootCause) { - if (!other.RootCause[item.Key].Equals(item.Value)) { + if (RootCause.Count == other.RootCause.Count) + { + foreach (KeyValuePair item in RootCause) + { + if (!other.RootCause[item.Key].Equals(item.Value)) + { return false; } } @@ -629,4 +860,24 @@ public RootCauseScore(double surprise, double explainaryScore) ExplainaryScore = explainaryScore; } } + + public enum AggregateType + { + /// + /// Make the aggregate type as sum. + /// + Sum = 0, + /// + /// Make the aggregate type as average. + /// + Avg = 1, + /// + /// Make the aggregate type as min. + /// + Min = 2, + /// + /// Make the aggregate type as max. + /// + Max = 3 + } } From 29216e0a4fd5d77ddd65d7f3784761ba60492967 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sun, 22 Mar 2020 22:21:50 +0800 Subject: [PATCH 08/49] update from laptop --- .../TimeSeries/LocalizeRootCauseByDT.cs | 2 +- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 8 +- .../DTRootCauseLocalizationUtils.cs | 94 +++++++++---------- .../TimeSeriesDirectApi.cs | 7 +- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 09988dd19e..2150c0b88e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -55,7 +55,7 @@ public RootCauseLocalizationData() public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices,String aggregateType, string aggregateSymbol) { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggregateSymbol); + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, Microsoft.ML.TimeSeries.AggregateType.Sum, aggregateSymbol); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index 0a756a2ef5..32fc4061af 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -31,7 +31,7 @@ public static void Example() DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", System.Globalization.CultureInfo.InvariantCulture); - if (timeStamp.CompareTo(filterTime).Equals(0)) + //if (timeStamp.CompareTo(filterTime).Equals(0)) { int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); @@ -41,7 +41,7 @@ public static void Example() PredictionEngine engine = GetRootCausePredictionEngine(); - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggSymbol); + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, AggregateType.Sum, aggSymbol); List list = new List(); GetRootCause(list, newRootCauseInput, engine); @@ -187,7 +187,7 @@ private static void RemoveAggSymbol(List> dimensions, private static PredictionEngine GetRootCausePredictionEngine() { //// Create an root cause localizatiom input list from csv. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM") }; var ml = new MLContext(1); @@ -384,7 +384,7 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) { Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs index 9c48818c01..0e5001a20c 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationUtils.cs @@ -26,9 +26,8 @@ public static DimensionInfo SeperateDimension(Dictionary dimensi return info; } - public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol,AggregateType aggType, bool filterByAnomaly = false) + public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) { - //todo- would be better to add check and fix for child point and leaves point PointTree tree = PointTree.CreateDefaultInstance(); foreach (Point point in pointList) @@ -86,43 +85,58 @@ public static PointTree BuildPointTree(List pointList, List aggDi } // rebuild the tree from bottom to up incase the child node data is incomplete + //CompleteTreeBottomUp(tree, aggType, aggSymbol, aggDims); + + // todo- rebuild the tree from top to down incase the leaves node data is incomplete, including the other aggregate dims in middle level return tree; } - private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol) { + private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) + { if (tree.Leaves.Count == 0) return tree; Dictionary> map = new Dictionary>(); - foreach (Point p in tree.Leaves) { - foreach (KeyValuePair keyValuePair in p.Dimensions) { - if (map.ContainsKey(keyValuePair.Key)) + foreach (Point p in tree.Leaves) + { + foreach (KeyValuePair keyValuePair in p.Dimensions) + { + if (aggDims.Contains(keyValuePair.Key)) { - map[keyValuePair.Key].Add(keyValuePair.Value); - } - else { - map.Add(keyValuePair.Key, new HashSet() { keyValuePair.Value}); + if (map.ContainsKey(keyValuePair.Key)) + { + map[keyValuePair.Key].Add(keyValuePair.Value); + } + else + { + map.Add(keyValuePair.Key, new HashSet() { keyValuePair.Value }); + } } } } - foreach (KeyValuePair> pair in map) { + foreach (KeyValuePair> pair in map) + { if (tree.ChildrenNodes.ContainsKey(pair.Key)) { - if (tree.ChildrenNodes[pair.Key].Count < pair.Value.Count) { - foreach(string value in pair.Value) + if (tree.ChildrenNodes[pair.Key].Count < pair.Value.Count) + { + foreach (string value in pair.Value) { - if (!IsAggDimensionExisted(pair.Key, value, tree.ChildrenNodes[pair.Key])) { + if (!IsAggDimensionExisted(pair.Key, value, tree.ChildrenNodes[pair.Key])) + { Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); tree.ChildrenNodes[pair.Key].Add(p); } } } } - else { + else + { List childPoints = new List(); - foreach (string value in pair.Value) { + foreach (string value in pair.Value) + { //simulate the aggregation value Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); childPoints.Add(p); @@ -135,16 +149,20 @@ private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType agg return tree; } - private static bool IsAggDimensionExisted(string key, string value, List points) { - foreach (Point p in points) { - if (p.Dimensions[key].Equals(value)) { + private static bool IsAggDimensionExisted(string key, string value, List points) + { + foreach (Point p in points) + { + if (p.Dimensions[key].Equals(value)) + { return true; } } return false; } - private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) { + private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) + { Point p = null; Dictionary dimension = new Dictionary(); @@ -159,14 +177,17 @@ private static Point SimulateBottomUpValue(List leaves, string key, strin } } - if (type.Equals(AggregateType.Sum)) { + if (type.Equals(AggregateType.Sum)) + { bool isAnomaly = false; double value = 0; double expectedValue = 0; - foreach (Point leave in leaves) { + foreach (Point leave in leaves) + { - if (leave.Dimensions.ContainsKey(key) && leave.Dimensions[key].Equals(keyValue)) { + if (leave.Dimensions.ContainsKey(key) && leave.Dimensions[key].Equals(keyValue)) + { value += leave.Value; expectedValue = leave.ExpectedValue; isAnomaly = isAnomaly || leave.IsAnomaly; @@ -366,7 +387,6 @@ public static BestDimension SelectBestDimension(List totalPoints, List entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; - //double maxGain = Int32.MinValue; BestDimension best = null; foreach (string dimKey in aggDim) @@ -384,18 +404,6 @@ public static BestDimension SelectBestDimension(List totalPoints, List> Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; - //double maxGain = Int32.MinValue; BestDimension best = null; foreach (String dimKey in aggDim) @@ -434,17 +441,6 @@ public static BestDimension SelectBestDimension(Dictionary> double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); entroyGainRatioMap.Add(dimension, gainRatio); - ////get anomaly value range, if range = 1, choose best dim - //int valueCount = GetValueCountInDimKey(anomalyChildren[dimKey], dimKey); - //if (valueCount == 1) - //{ - // if (maxGain < gain) - // { - // best = dimension; - // maxGain = gain; - // } - //} - sumGain += gain; } @@ -455,7 +451,7 @@ public static BestDimension SelectBestDimension(Dictionary> return best; } - best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); return best; } diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 6394dce155..223be232df 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; using Xunit.Abstractions; @@ -527,7 +528,7 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) { Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); } @@ -548,7 +549,7 @@ public RootCauseLocalizationTransformedData() public void RootCauseLocalizationWithDT() { // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Avg, "AVG") }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -574,7 +575,7 @@ public void RootCauseLocalizationWithDT() } var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM"); var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); From 69da330e3c06ca010b34b6cfcdd6a276fab64cc4 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 10:49:04 +0800 Subject: [PATCH 09/49] save for add test --- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 6 +-- ...izationUtils.cs => DTRootCauseAnalyzer.cs} | 39 +++++++------------ .../DTRootCauseLocalization.cs | 28 ++++++------- 3 files changed, 31 insertions(+), 42 deletions(-) rename src/Microsoft.ML.TimeSeries/{DTRootCauseLocalizationUtils.cs => DTRootCauseAnalyzer.cs} (94%) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index 32fc4061af..8cdd2f6140 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -23,7 +23,7 @@ public static void Example() int totalFn = 0; int totalCount = 0; - bool exactly = false; + bool exactly = true; foreach (KeyValuePair> item in rootNodeMap) { @@ -150,11 +150,11 @@ private static int CompareCause(Dictionary detect, Dictionary dimensions, string aggSymbol) { @@ -91,7 +91,6 @@ public static PointTree BuildPointTree(List pointList, List aggDi return tree; } - private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) { @@ -219,7 +218,7 @@ public static List SelectPoints(List points, Dictionary LocalizeRootCauseByDimension(List total return new List() { new RootCauseItem(anomalyDimension) }; } - // when total and best.entroy all equals 0, do we need to go deep down? would it be better if we remove the else logic, yes- remove the else logic will improve tp, reduce fn, and improve accuracy - //if (IsLargeEntropyGain(totoalEntropy, best.Entropy) || best.AnomalyDis.Count == 1) + List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); + if (children == null) { - // need to improve here, whether can go deeper - List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); - if (children == null) - { - //As the cause couldn't be found, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension, best.DimensionKey) }; - } - else + //As the cause couldn't be found, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension, best.DimensionKey) }; + } + else + { + List causes = new List(); + // For the found causes, we return the result + foreach (Point anomaly in children) { - List causes = new List(); - // For the found causes, we return the result - foreach (Point anomaly in children) - { - causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimensions[best.DimensionKey]), best.DimensionKey)); - } - return causes; + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimensions[best.DimensionKey]), best.DimensionKey)); } + return causes; } - //else - //{ - // //As the entropy gain for this best dimension is small, the root cause should be itself - // return new List() { new RootCauseItem(anomalyDimension) }; - //} } public static double GetEntropy(int totalNum, int anomalyNum) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 5f961b888b..450888906e 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -404,12 +404,12 @@ private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause ds dst = new RootCause(); dst.Items = new List { }; - DimensionInfo dimensionInfo = DTRootCauseLocalizationUtils.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); + DimensionInfo dimensionInfo = DTRootCauseAnalyzer.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); if (dimensionInfo.AggDim.Count == 0) { return; } - Dictionary subDim = DTRootCauseLocalizationUtils.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); + Dictionary subDim = DTRootCauseAnalyzer.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); List totalPoints = GetTotalPointsForAnomalyTimestamp(src, subDim); @@ -429,15 +429,15 @@ private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput } } - List totalPoints = DTRootCauseLocalizationUtils.SelectPoints(points, subDim); + List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); return totalPoints; } private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) { - PointTree pointTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); - PointTree anomalyTree = DTRootCauseLocalizationUtils.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); + PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo if (anomalyTree.ParentNode == null) @@ -453,7 +453,7 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, throw new Exception("point leaves not match with anomaly leaves"); } - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); } else { @@ -461,7 +461,7 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, if (anomalyTree.Leaves.Count > 0) { // update from total points to pointTree.Leaves.Count - totalEntropy = DTRootCauseLocalizationUtils.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); + totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); } if (totalEntropy > 0.9) @@ -474,7 +474,7 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, else { // update from total points to pointTree.Leaves - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } } //comment here, as LocalizeRootCauseByAnomaly is not right, for example, data 2019-08-08T19:30:00Z seconds - 1566383400 @@ -504,7 +504,7 @@ private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, //else { //update totalPoints to .leaves - rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } } @@ -518,7 +518,7 @@ private void GetRootCauseScore(List points, Dictionary an if (dst.Items.Count > 1) { //get surprise value and explanary power value - Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); double sumSurprise = 0; double sumEp = 0; @@ -526,7 +526,7 @@ private void GetRootCauseScore(List points, Dictionary an foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(item.RootCause, points); + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.RootCause, points); if (rootCausePoint != null) { if (rootCausePoint.ExpectedValue < rootCausePoint.Value) @@ -561,7 +561,7 @@ private void GetRootCauseScore(List points, Dictionary an else if (dst.Items.Count == 1) { //surprise and expananory , max is 1 - Point rootCausePoint = DTRootCauseLocalizationUtils.FindPointByDimension(dst.Items[0].RootCause, points); + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(dst.Items[0].RootCause, points); if (rootCausePoint != null) { if (rootCausePoint.ExpectedValue < rootCausePoint.Value) @@ -574,7 +574,7 @@ private void GetRootCauseScore(List points, Dictionary an } } - Point anomalyPoint = DTRootCauseLocalizationUtils.FindPointByDimension(anomalyRoot, points); + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); if (anomalyPoint != null && rootCausePoint != null) { double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); @@ -588,7 +588,7 @@ private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * DTRootCauseLocalizationUtils.Log2(2 * p / (p + q)) + q * DTRootCauseLocalizationUtils.Log2(2 * q / (p + q))); + double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); return surprise; } From e1c5432edcda64837c0b5e8984913e4f79a4c9db Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 15:12:20 +0800 Subject: [PATCH 10/49] add root cause localization algorithm --- .../TimeSeries/LocalizeRootCauseByDT.cs | 112 ++++- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 25 +- docs/samples/Microsoft.ML.Samples/Program.cs | 1 + .../DTRootCauseAnalyzer.cs | 307 ++++++++------ .../DTRootCauseLocalization.cs | 388 +----------------- .../DTRootCauseLocalizationType.cs | 168 ++++++++ .../TimeSeriesDirectApi.cs | 97 ++++- 7 files changed, 570 insertions(+), 528 deletions(-) create mode 100644 src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 2150c0b88e..326d5f360d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -1,21 +1,23 @@ using System; using System.Collections.Generic; +using System.Runtime.InteropServices.ComTypes; using Microsoft.ML; -using Microsoft.ML.Transforms.TimeSeries; +using Microsoft.ML.TimeSeries; namespace Samples.Dynamic { - public static class LocalizeRootCause + public static class LocalizeRootCauseByDT { + private static string AGG_SYMBOL = "##SUM##"; public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'NormalizeText' API does not - // require training data as the estimator ('TextNormalizingEstimator') - // created by 'NormalizeText' API is not a trainable estimator. The + // Create an empty list as the dataset. The 'DTRootCauseLocalization' API does not + // require training data as the estimator ('DTRootCauseLocalizationEstimator') + // created by 'DTRootCauseLocalization' API is not a trainable estimator. The // empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); @@ -28,21 +30,106 @@ public static void Example() // Fit to data. var localizeTransformer = localizePipeline.Fit(emptyDataView); - // Create the prediction engine to get the root cause result from the - // input data. + // Create the prediction engine to get the root cause result from the input data. var predictionEngine = mlContext.Model.CreatePredictionEngine(localizeTransformer); // Call the prediction API. - var data = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, "SUM", "SUM"); + DateTime timestamp = GetTimestamp(); + var data = new RootCauseLocalizationData(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); var prediction = predictionEngine.Predict(data); // Print the localization result. - Console.WriteLine($"Localized result: {prediction.RootCause}"); + int count = 0; + foreach (RootCauseItem item in prediction.RootCause.Items) { + count++; + Console.WriteLine($"Root cause item #{count} ..."); + foreach (KeyValuePair pair in item.Dimension) { + Console.WriteLine($"{pair.Key} = {pair.Value}"); + } + } + + //Item #1 ... + //Country = UK + //DeviceType = ##SUM## + //DataCenter = DC1 + } + + private static List GetPoints() { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country","UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", AGG_SYMBOL); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", AGG_SYMBOL); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", AGG_SYMBOL); + dic7.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetAnomalyDimension() { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", AGG_SYMBOL); + dim.Add("DataCenter", AGG_SYMBOL); + + return dim; + } + + private static DateTime GetTimestamp() + { + return new DateTime(); } - private class RootCauseLocalizationData { [RootCauseLocalizationInputType] @@ -53,9 +140,10 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices,String aggregateType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, Microsoft.ML.TimeSeries.AggregateType.Sum, aggregateSymbol); + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateType, + aggregateSymbol); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index 8cdd2f6140..47163a49dc 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -23,7 +23,9 @@ public static void Example() int totalFn = 0; int totalCount = 0; - bool exactly = true; + bool exactly = false; + + int totalRunTime = 0; foreach (KeyValuePair> item in rootNodeMap) { @@ -31,6 +33,7 @@ public static void Example() DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", System.Globalization.CultureInfo.InvariantCulture); + //if (timeStamp.CompareTo(filterTime).Equals(0)) { int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); @@ -44,13 +47,17 @@ public static void Example() var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, AggregateType.Sum, aggSymbol); List list = new List(); + int startTime = System.Environment.TickCount; GetRootCause(list, newRootCauseInput, engine); + int endTime = System.Environment.TickCount; + int runTime = endTime - startTime; + totalRunTime += runTime; List> labeledRootCause = labeledRootCauseMap[timeStamp]; List> detectedRootCause = ConvertRootCauseItemToDic(list); RemoveAggSymbol(detectedRootCause, aggSymbol); - Tuple evaluation = ScoreRootCause(detectedRootCause, labeledRootCause, exactly, timeStamp); + Tuple evaluation = EvaluateRootCauseResult(detectedRootCause, labeledRootCause, exactly, timeStamp); totalTp += evaluation.Item1; totalFp += evaluation.Item2; totalFn += evaluation.Item3; @@ -63,13 +70,14 @@ public static void Example() double f1 = 2 * precision * recall / (precision + recall); Console.WriteLine(String.Format("Total Count : {0}, TP: {1}, FP: {2}, FN: {3}", totalCount, totalTp, totalFp, totalFn)); Console.WriteLine(String.Format("Precision : {0}, Recall: {1}, F1: {2}", precision, recall, f1)); + Console.WriteLine(String.Format("Mean calculation time is : {0} ms", (double)totalRunTime / totalCount)); } - private static Tuple ScoreRootCause(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) + private static Tuple EvaluateRootCauseResult(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) { int tp = 0; int fp = 0; - int fn; + int fn; List labelSet = new List(); foreach (Dictionary cause in detectedRootCause) { @@ -165,7 +173,7 @@ private static List> ConvertRootCauseItemToDic(List> list = new List>(); foreach (RootCauseItem item in items) { - list.Add(item.RootCause); + list.Add(item.Dimension); } return list; } @@ -278,8 +286,6 @@ private static Dictionary>> GetLabeled private static List GetPoints(string path) { - - var inputData = GetDataTabletFromCSVFile(path); DateTime timeStamp = new DateTime(); @@ -314,11 +320,10 @@ private static List GetPoints(string path) private static void GetRootCause(List rootCauseList, RootCauseLocalizationData inputData, PredictionEngine engine) { - RootCauseLocalizationTransformedData incrementalResult = engine.Predict(inputData); if (incrementalResult.RootCause.Items.Count == 0 || ( - incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].RootCause.Equals(inputData.Input.AnomalyDimensions) + incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].Dimension.Equals(inputData.Input.AnomalyDimensions) )) { if (!rootCauseList.Contains(new RootCauseItem(inputData.Input.AnomalyDimensions))) @@ -333,7 +338,7 @@ private static void GetRootCause(List rootCauseList, RootCauseLoc foreach (RootCauseItem item in incrementalResult.RootCause.Items) { RootCauseLocalizationData newData = new RootCauseLocalizationData(inputData.Input.AnomalyTimestamp, - item.RootCause, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); + item.Dimension, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); GetRootCause(rootCauseList, newData, engine); } } diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index a3da984b16..930ccc57f3 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -14,6 +14,7 @@ internal static void RunAll() foreach (var type in Assembly.GetExecutingAssembly().GetTypes()) { if (type.Name.Equals("LocalizeRootCauseEvaluation")) + //if (type.Name.Equals("LocalizeRootCauseByDT")) { var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 8e86b8b7aa..b7a66582b3 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -1,12 +1,75 @@ -using System; +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; using System.Collections.Generic; using System.Linq; -using Microsoft.ML.Transforms; +using Microsoft.ML.Internal.Utilities; namespace Microsoft.ML.TimeSeries { public class DTRootCauseAnalyzer { + public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + { + List points = new List(); + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + points = slice.Points; + } + } + + List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); + + return totalPoints; + } + + public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) + { + PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); + + //which means there is no aggregation in the input anomaly dimension + if (anomalyTree.ParentNode == null) + { + return; + } + + List rootCauses = new List(); + // no point under anomaly dimension + if (totalPoints.Count == 0) + { + if (anomalyTree.Leaves.Count != 0) + { + throw new Exception("point leaves not match with anomaly leaves"); + } + + rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else + { + double totalEntropy = 1; + if (anomalyTree.Leaves.Count > 0) + { + totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); + } + + if (totalEntropy > 0.9) + { + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } + + dst.Items = rootCauses; + } + } + public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); @@ -26,7 +89,7 @@ public static DimensionInfo SeperateDimension(Dictionary dimensi return info; } - public static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) + protected static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) { PointTree tree = PointTree.CreateDefaultInstance(); @@ -78,17 +141,10 @@ public static PointTree BuildPointTree(List pointList, List aggDi { tree.Leaves.Add(point); } - - //todo - need optimize, to see whether we can independent of leaves; } } } - // rebuild the tree from bottom to up incase the child node data is incomplete - //CompleteTreeBottomUp(tree, aggType, aggSymbol, aggDims); - - // todo- rebuild the tree from top to down incase the leaves node data is incomplete, including the other aggregate dims in middle level - return tree; } private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) @@ -199,7 +255,7 @@ private static Point SimulateBottomUpValue(List leaves, string key, strin return p; } - public static Dictionary GetsubDim(Dictionary dimension, List keyList) + public static Dictionary GetSubDim(Dictionary dimension, List keyList) { Dictionary subDim = new Dictionary(); @@ -210,7 +266,7 @@ public static Dictionary GetsubDim(Dictionary di return subDim; } - public static List SelectPoints(List points, Dictionary subDim) + protected static List SelectPoints(List points, Dictionary subDim) { List list = new List(); @@ -229,35 +285,7 @@ public static List SelectPoints(List points, Dictionary LocalizeRootCauseByAnomaly(List totalPoints, PointTree anomalyTree, Dictionary anomalyDimension) - { - if (anomalyTree.ChildrenNodes.Count == 0) - { - //As there is no children node under such dimension, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension) }; - } - else - { - AnomalyCause cause = GetAnomalyCause(anomalyTree, totalPoints); - if (cause == null) - { - //As the cause couldn't be found, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension) }; - } - else - { - List causes = new List(); - // For the found causes, we return the result - foreach (Point anomaly in cause.Anomalies) - { - causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, cause.DimensionKey, anomaly.Dimensions[cause.DimensionKey]))); - } - return causes; - } - } - } - - public static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) + protected static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { var set = anomalyTree.ChildrenNodes.Keys; @@ -295,7 +323,7 @@ public static List LocalizeRootCauseByDimension(List total } } - public static double GetEntropy(int totalNum, int anomalyNum) + protected static double GetEntropy(int totalNum, int anomalyNum) { double ratio = (double)anomalyNum / totalNum; if (ratio == 0 || ratio == 1) @@ -306,7 +334,7 @@ public static double GetEntropy(int totalNum, int anomalyNum) return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); } - public static Dictionary GetEntropyList(BestDimension best, List points) + protected static Dictionary GetEntropyList(BestDimension best, List points) { Dictionary list = new Dictionary(); // need to update, change to children if necessary @@ -323,7 +351,7 @@ public static Dictionary GetEntropyList(BestDimension best, List return list; } - public static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) + protected static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) { Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); @@ -345,13 +373,6 @@ public static List GetTopAnomaly(List anomalyPoints, Point root, L List causeList = new List(); foreach (Point anomaly in anomalyPoints) { - // if anomaly.delta equals to 0 , which means we have no data here, should not break, so update from <= to < - // if oposite is different, shouldn't divide it. - // if (anomaly.Delta * root.Delta < 0) - // { - // break; - //} - if (StopAnomalyComparison(delta, root.Delta, anomaly.Delta, preDelta)) { break; @@ -371,13 +392,12 @@ public static List GetTopAnomaly(List anomalyPoints, Point root, L return null; } - public static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + protected static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; - BestDimension best = null; foreach (string dimKey in aggDim) { BestDimension dimension = BestDimension.CreateDefaultInstance(); @@ -398,12 +418,7 @@ public static BestDimension SelectBestDimension(List totalPoints, List> Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; - BestDimension best = null; - - foreach (String dimKey in aggDim) + foreach (string dimKey in aggDim) { BestDimension dimension = BestDimension.CreateDefaultInstance(); dimension.DimensionKey = dimKey; @@ -435,12 +448,7 @@ public static BestDimension SelectBestDimension(Dictionary> double meanGain = sumGain / aggDim.Count(); - if (best != null) - { - return best; - } - - best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); return best; } @@ -478,73 +486,112 @@ private static BestDimension FindBestDimension(Dictionary return best; } - private static AnomalyCause GetAnomalyCause(PointTree anomalyTree, List totalPoints) + public static Point FindPointByDimension(Dictionary dim, List points) { - string bestKey = null; - List bestAnomalies = null; - - foreach (string key in anomalyTree.ChildrenNodes.Keys) + foreach (Point p in points) { - List anomalies = anomalyTree.ChildrenNodes[key]; - - List causeList = GetTopAnomaly(anomalies, anomalyTree.ParentNode, totalPoints, key); - if (causeList == null) + bool isEqual = true; + foreach (KeyValuePair item in p.Dimensions) { - continue; + if (!dim[item.Key].Equals(item.Value)) + { + isEqual = false; + } } - if (bestAnomalies == null || bestAnomalies.Count > causeList.Count) + if (isEqual) { - bestKey = key; - bestAnomalies = causeList; + return p; } } - if (bestKey == null) - { - return null; - } - else - { - AnomalyCause cause = new AnomalyCause(); - cause.DimensionKey = bestKey; - cause.Anomalies = bestAnomalies; - return cause; - } + return null; } - public static bool IsAnomalous(int pointSize, int anomalySize) + public static void UpdateRootCauseDirection(List points, ref RootCause dst) { - if (anomalySize == pointSize && anomalySize == 1) + foreach (RootCauseItem item in dst.Items) { - return false; - } + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + if (rootCausePoint != null) + { + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + item.Direction = AnomalyDirection.Up; + } + else + { + item.Direction = AnomalyDirection.Down; + } + } - return (double)anomalySize / pointSize > 0.5; + } } - public static Point FindPointByDimension(Dictionary dim, List points) + public static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) { - foreach (Point p in points) + if (dst.Items.Count > 1) { - bool isEqual = true; - foreach (KeyValuePair item in p.Dimensions) + //get surprise value and explanary power value + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + + double sumSurprise = 0; + double sumEp = 0; + List scoreList = new List(); + + foreach (RootCauseItem item in dst.Items) { - if (!dim[item.Key].Equals(item.Value)) + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + if (anomalyPoint != null && rootCausePoint != null) { - isEqual = false; + Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); + sumSurprise += scores.Item1; + sumEp += Math.Abs(scores.Item2); } } - if (isEqual) + //normalize and get final score + for (int i = 0; i < scoreList.Count; i++) { - return p; + dst.Items[i].Score = DTRootCauseAnalyzer.GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp, beta); + } } + else if (dst.Items.Count == 1) + { + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(dst.Items[0].Dimension, points); - return null; + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + if (anomalyPoint != null && rootCausePoint != null) + { + Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + dst.Items[0].Score = DTRootCauseAnalyzer.GetFinalScore(scores.Item1, scores.Item2, beta); + } + } } + private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + { + double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + double q = rootCausePoint.Value / anomalyPoint.Value; + double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); + + return surprise; + } + + private static double GetFinalScore(double surprise, double ep, double beta) + { + return Math.Max(1, beta * surprise + (1 - beta) * ep); + } + + private static Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) + { + double surprise = DTRootCauseAnalyzer.GetSurpriseScore(rootCausePoint, anomalyPoint); + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + + return new Tuple(surprise, ep); + } private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) { Dictionary newDim = new Dictionary(dimension); @@ -624,28 +671,6 @@ private static void UpdateDistribution(Dictionary distribution, Lis } } - private static int GetValueCountInDimKey(List points, string dimKey) - { - HashSet values = new HashSet(); - - foreach (Point point in points) - { - string dimVal = point.Dimensions[dimKey]; - if (!values.Contains(dimVal)) - { - values.Add(dimVal); - } - } - - return values.Count; - } - - //threshold needs to be updated, example, 8/23/2019 11:00:00 AM - private static bool IsLargeEntropyGain(double total, double dimEntropy) - { - return total - dimEntropy > 0.2; - } - public static double Log2(double val) { return Math.Log(val) / Math.Log(2); @@ -741,7 +766,7 @@ public Point(double value, double expectedValue, bool isAnomaly, Dictionary Points { get; set; } + + public MetricSlice(DateTime timeStamp, List points) + { + TimeStamp = timeStamp; + Points = points; + } + } + public sealed class BestDimension { public string DimensionKey; @@ -792,26 +829,26 @@ public sealed class RootCauseItem : IEquatable { public double Score; public string Path; - public Dictionary RootCause; + public Dictionary Dimension; public AnomalyDirection Direction; public RootCauseItem(Dictionary rootCause) { - RootCause = rootCause; + Dimension = rootCause; } public RootCauseItem(Dictionary rootCause, string path) { - RootCause = rootCause; + Dimension = rootCause; Path = path; } public bool Equals(RootCauseItem other) { - if (RootCause.Count == other.RootCause.Count) + if (Dimension.Count == other.Dimension.Count) { - foreach (KeyValuePair item in RootCause) + foreach (KeyValuePair item in Dimension) { - if (!other.RootCause[item.Key].Equals(item.Value)) + if (!other.Dimension[item.Key].Equals(item.Value)) { return false; } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 450888906e..90bb142be0 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -28,175 +28,28 @@ namespace Microsoft.ML.Transforms.TimeSeries { - public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute - { - /// - /// Create a root cause localizagin input type. - /// - public RootCauseLocalizationInputTypeAttribute() - { - } - - /// - /// Equal function. - /// - public override bool Equals(DataViewTypeAttribute other) - { - if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) - return false; - return true; - } - - /// - /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); - } - } - - public sealed class RootCauseTypeAttribute : DataViewTypeAttribute - { - /// - /// Create an root cause type. - /// - public RootCauseTypeAttribute() - { - } - - /// - /// RootCauseTypeAttribute with the same type should equal. - /// - public override bool Equals(DataViewTypeAttribute other) - { - if (other is RootCauseTypeAttribute otherAttribute) - return true; - return false; - } - - /// - /// Produce the same hash code for all RootCauseTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); - } - } - - public sealed class RootCause - { - public List Items { get; set; } - } - - public sealed class RootCauseLocalizationInput - { - public DateTime AnomalyTimestamp { get; set; } - - public Dictionary AnomalyDimensions { get; set; } - - public List Slices { get; set; } - - public AggregateType AggType { get; set; } - - public string AggSymbol { get; set; } - - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) - { - AnomalyTimestamp = anomalyTimestamp; - AnomalyDimensions = anomalyDimensions; - Slices = slices; - AggType = aggregateType; - AggSymbol = aggregateSymbol; - } - public void Dispose() - { - AnomalyDimensions = null; - Slices = null; - } - } - - public sealed class MetricSlice - { - public DateTime TimeStamp { get; set; } - public List Points { get; set; } - - public MetricSlice(DateTime timeStamp, List points) - { - TimeStamp = timeStamp; - Points = points; - } - } - - public sealed class RootCauseDataViewType : StructuredDataViewType - { - public RootCauseDataViewType() - : base(typeof(RootCause)) - { - } - - public override bool Equals(DataViewType other) - { - if (other == this) - return true; - if (!(other is RootCauseDataViewType tmp)) - return false; - return true; - } - - public override int GetHashCode() - { - return 0; - } - - public override string ToString() - { - return typeof(RootCauseDataViewType).Name; - } - } - - public sealed class RootCauseLocalizationInputDataViewType : StructuredDataViewType + /// + /// resulting from fitting an . + /// + public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase { - public RootCauseLocalizationInputDataViewType() - : base(typeof(RootCauseLocalizationInput)) - { - } - - public override bool Equals(DataViewType other) - { - if (!(other is RootCauseLocalizationInputDataViewType tmp)) - return false; - return true; - } + internal const string Summary = "Localize root cause for anomaly."; + internal const string UserName = "DT Root Cause Localization Transform"; + internal const string LoaderSignature = "DTRootCauseLTransform"; - public override int GetHashCode() + private static VersionInfo GetVersionInfo() { - return 0; + return new VersionInfo( + modelSignature: "DTRCL", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); } - public override string ToString() - { - return typeof(RootCauseLocalizationInputDataViewType).Name; - } - } + private const string RegistrationName = "RootCauseLocalization"; - // REVIEW: Rewrite as LambdaTransform to simplify. - // REVIEW: Should it be separate transform or part of ImageResizerTransform? - /// - /// resulting from fitting an . - /// - public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase - { internal sealed class Column : OneToOneColumn { internal static Column Parse(string str) @@ -224,24 +77,6 @@ internal class Options : TransformInputBase } - internal const string Summary = "Localize root cause for anomaly."; - - internal const string UserName = "DT Root Cause Localization Transform"; - internal const string LoaderSignature = "DTRootCauseLTransform"; - - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "DTRCL", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); - } - - private const string RegistrationName = "RootCauseLocalization"; - /// /// The input and output column pairs passed to this . /// @@ -409,193 +244,12 @@ private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause ds { return; } - Dictionary subDim = DTRootCauseAnalyzer.GetsubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); - - List totalPoints = GetTotalPointsForAnomalyTimestamp(src, subDim); - - GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); + Dictionary subDim = DTRootCauseAnalyzer.GetSubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); + List totalPoints = DTRootCauseAnalyzer.GetTotalPointsForAnomalyTimestamp(src, subDim); - GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst); - } - - private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) - { - List points = new List(); - foreach (MetricSlice slice in src.Slices) - { - if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) - { - points = slice.Points; - } - } - - List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); - - return totalPoints; - } - - private void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) - { - PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); - PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); - - // which means there is no all up here, we would return empty list; in ML.net , should we do the same thing? todo - if (anomalyTree.ParentNode == null) - { - return; - } - List rootCauses = new List(); - // no point under anomaly dimension - if (totalPoints.Count == 0) - { - if (anomalyTree.Leaves.Count != 0) - { - throw new Exception("point leaves not match with anomaly leaves"); - } - - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); - } - else - { - double totalEntropy = 1; - if (anomalyTree.Leaves.Count > 0) - { - // update from total points to pointTree.Leaves.Count - totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); - } - - if (totalEntropy > 0.9) - { - if (dimensionInfo.AggDim.Count == 1) - { - //root cause is itself; - rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); - } - else - { - // update from total points to pointTree.Leaves - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); - } - } - //comment here, as LocalizeRootCauseByAnomaly is not right, for example, data 2019-08-08T19:30:00Z seconds - 1566383400 - //else if (totalEntropy < 0.5) - //{ - // // todo- need improve, if anomalytree.leave.count == 0, need to filter by all up level - // //if (DTRootCauseLocalizationUtils.IsAnomalous(pointTree.Leaves.Count, anomalyTree.Leaves.Count)) - // //{ - // // //root cause is itself; - // // dst.Items.Add(new RootCauseItem(src.AnomalyDimensions)); - // //} - // //else - // //{ - // rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); - // //} - //} - else - { - // remove this part, has no different to the result - //if (dimensionInfo.AggDim.Count == 1) - //{ - // //update totalPoints to .leaves - // //because we have known the dimension, so by anomaly - // //rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(totalPoints, anomalyTree, src.AnomalyDimensions)); - // rootCauses.AddRange(DTRootCauseLocalizationUtils.LocalizeRootCauseByAnomaly(pointTree.Leaves, anomalyTree, src.AnomalyDimensions)); - //} - //else - { - //update totalPoints to .leaves - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); - } - } - - dst.Items = rootCauses; - } - } - - private void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst) - { - - if (dst.Items.Count > 1) - { - //get surprise value and explanary power value - Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); - - double sumSurprise = 0; - double sumEp = 0; - List scoreList = new List(); - - foreach (RootCauseItem item in dst.Items) - { - Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.RootCause, points); - if (rootCausePoint != null) - { - if (rootCausePoint.ExpectedValue < rootCausePoint.Value) - { - item.Direction = AnomalyDirection.Up; - } - else - { - item.Direction = AnomalyDirection.Down; - } - } - - if (anomalyPoint != null && rootCausePoint != null) - { - double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); - - double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); - - scoreList.Add(new RootCauseScore(surprise, ep)); - sumSurprise += surprise; - sumEp += Math.Abs(ep); - } - } - - //normalize and get final score - for (int i = 0; i < scoreList.Count; i++) - { - dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp); - - } - } - else if (dst.Items.Count == 1) - { - //surprise and expananory , max is 1 - Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(dst.Items[0].RootCause, points); - if (rootCausePoint != null) - { - if (rootCausePoint.ExpectedValue < rootCausePoint.Value) - { - dst.Items[0].Direction = AnomalyDirection.Up; - } - else - { - dst.Items[0].Direction = AnomalyDirection.Down; - } - } - - Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); - if (anomalyPoint != null && rootCausePoint != null) - { - double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); - double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); - dst.Items[0].Score = GetFinalScore(surprise, ep); - } - } - } - - private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) - { - double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; - double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); - - return surprise; - } - - private double GetFinalScore(double surprise, double ep) - { - return Math.Max(1, _parent._beta * surprise + (1 - _parent._beta) * ep); + DTRootCauseAnalyzer.GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); + DTRootCauseAnalyzer.UpdateRootCauseDirection(totalPoints,ref dst); + DTRootCauseAnalyzer.GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst, _parent._beta); } } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs new file mode 100644 index 0000000000..1e5e3f881e --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.TimeSeries +{ + /// + /// Allows a member to be marked as a , primarily allowing one to set + /// root cause localization input. + /// + public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute + { + /// + /// Create a root cause localizagin input type. + /// + public RootCauseLocalizationInputTypeAttribute() + { + } + + /// + /// Equal function. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) + return false; + return true; + } + + /// + /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); + } + } + + /// + /// Allows a member to be marked as a , primarily allowing one to set + /// root cause result. + /// + public sealed class RootCauseTypeAttribute : DataViewTypeAttribute + { + /// + /// Create an root cause type. + /// + public RootCauseTypeAttribute() + { + } + + /// + /// RootCauseTypeAttribute with the same type should equal. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (other is RootCauseTypeAttribute otherAttribute) + return true; + return false; + } + + /// + /// Produce the same hash code for all RootCauseTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); + } + } + + public sealed class RootCause + { + public List Items { get; set; } + } + + public sealed class RootCauseLocalizationInput + { + public DateTime AnomalyTimestamp { get; set; } + + public Dictionary AnomalyDimensions { get; set; } + + public List Slices { get; set; } + + public AggregateType AggType { get; set; } + + public string AggSymbol { get; set; } + + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) + { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimensions = anomalyDimensions; + Slices = slices; + AggType = aggregateType; + AggSymbol = aggregateSymbol; + } + public void Dispose() + { + AnomalyDimensions = null; + Slices = null; + } + } + + public sealed class RootCauseDataViewType : StructuredDataViewType + { + public RootCauseDataViewType() + : base(typeof(RootCause)) + { + } + + public override bool Equals(DataViewType other) + { + if (other == this) + return true; + if (!(other is RootCauseDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseDataViewType).Name; + } + } + + public sealed class RootCauseLocalizationInputDataViewType : StructuredDataViewType + { + public RootCauseLocalizationInputDataViewType() + : base(typeof(RootCauseLocalizationInput)) + { + } + + public override bool Equals(DataViewType other) + { + if (!(other is RootCauseLocalizationInputDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseLocalizationInputDataViewType).Name; + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 223be232df..3329a35b27 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -92,6 +92,8 @@ private sealed class SrCnnAnomalyDetection public double[] Prediction { get; set; } } + private static string _aggSymbol = "##SUM##"; + [Fact] public void ChangeDetection() { @@ -549,7 +551,7 @@ public RootCauseLocalizationTransformedData() public void RootCauseLocalizationWithDT() { // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Avg, "AVG") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "##SUM##") }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -570,16 +572,103 @@ public void RootCauseLocalizationWithDT() foreach (var dataPoint in transformedDataPoints) { var rootCause = dataPoint.RootCause; - Assert.NotNull(rootCause); } var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM"); + DateTime timeStamp = GetCurrentTimestamp(); + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, GetAnomalyDimension(), new List() { new MetricSlice(timeStamp, GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _aggSymbol); var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); - //todo - will add more tests here when onboarding mock data + Assert.Equal(1,(int)transformedRootCause.RootCause.Items.Count); + + Dictionary expectedDim = new Dictionary(); + expectedDim.Add("Country","UK"); + expectedDim.Add("DeviceType",_aggSymbol); + expectedDim.Add("DataCenter","DC1"); + + foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) { + Assert.Equal(expectedDim[pair.Key], pair.Value); + } + } + + + private static List GetRootCauseLocalizationPoints() + { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country", "UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", _aggSymbol); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", _aggSymbol); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", _aggSymbol); + dic7.Add("DataCenter", _aggSymbol); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", _aggSymbol); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", _aggSymbol); + points.Add(new Point(1200, 300, true, dic9)); + + return points; } + + private static Dictionary GetAnomalyDimension() + { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", _aggSymbol); + dim.Add("DataCenter", _aggSymbol); + + return dim; + } + + private static DateTime GetCurrentTimestamp() + { + return new DateTime(); + } + } } From 3a1d1c519198f953bf9f675196fb60a0358bbe1f Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 15:53:24 +0800 Subject: [PATCH 11/49] add root cause localization algorithm --- .../TimeSeries/LocalizeRootCauseByDT.cs | 115 ++- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 409 ++++++++ .../DTRootCauseAnalyzer.cs | 905 ++++++++++++++++++ .../DTRootCauseLocalization.cs | 301 ++---- .../DTRootCauseLocalizationType.cs | 168 ++++ .../TimeSeriesDirectApi.cs | 109 ++- 6 files changed, 1745 insertions(+), 262 deletions(-) create mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs create mode 100644 src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs create mode 100644 src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 94d117fd45..326d5f360d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -1,24 +1,23 @@ using System; using System.Collections.Generic; -using System.Drawing; -using System.IO; +using System.Runtime.InteropServices.ComTypes; using Microsoft.ML; -using Microsoft.ML.Data; -using Microsoft.ML.Transforms.TimeSeries; +using Microsoft.ML.TimeSeries; namespace Samples.Dynamic { - public static class LocalizeRootCause + public static class LocalizeRootCauseByDT { + private static string AGG_SYMBOL = "##SUM##"; public static void Example() { // Create a new ML context, for ML.NET operations. It can be used for // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'NormalizeText' API does not - // require training data as the estimator ('TextNormalizingEstimator') - // created by 'NormalizeText' API is not a trainable estimator. The + // Create an empty list as the dataset. The 'DTRootCauseLocalization' API does not + // require training data as the estimator ('DTRootCauseLocalizationEstimator') + // created by 'DTRootCauseLocalization' API is not a trainable estimator. The // empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); @@ -31,21 +30,106 @@ public static void Example() // Fit to data. var localizeTransformer = localizePipeline.Fit(emptyDataView); - // Create the prediction engine to get the root cause result from the - // input data. + // Create the prediction engine to get the root cause result from the input data. var predictionEngine = mlContext.Model.CreatePredictionEngine(localizeTransformer); // Call the prediction API. - var data = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, "SUM", "SUM"); + DateTime timestamp = GetTimestamp(); + var data = new RootCauseLocalizationData(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); var prediction = predictionEngine.Predict(data); // Print the localization result. - Console.WriteLine($"Localized result: {prediction.RootCause}"); + int count = 0; + foreach (RootCauseItem item in prediction.RootCause.Items) { + count++; + Console.WriteLine($"Root cause item #{count} ..."); + foreach (KeyValuePair pair in item.Dimension) { + Console.WriteLine($"{pair.Key} = {pair.Value}"); + } + } + + //Item #1 ... + //Country = UK + //DeviceType = ##SUM## + //DataCenter = DC1 + } + + private static List GetPoints() { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country","UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", AGG_SYMBOL); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", AGG_SYMBOL); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", AGG_SYMBOL); + dic7.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", AGG_SYMBOL); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetAnomalyDimension() { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", AGG_SYMBOL); + dim.Add("DataCenter", AGG_SYMBOL); + + return dim; + } + + private static DateTime GetTimestamp() + { + return new DateTime(); } - private class RootCauseLocalizationData { [RootCauseLocalizationInputType] @@ -56,9 +140,10 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices,String aggregateType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, DTRootCauseLocalizationEstimator.AggregateType.Sum, aggregateSymbol); + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateType, + aggregateSymbol); } } diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs new file mode 100644 index 0000000000..47163a49dc --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -0,0 +1,409 @@ +using System; +using System.Collections.Generic; +using System.Data; +using Microsoft.ML; +using Microsoft.ML.TimeSeries; +using Microsoft.ML.Transforms.TimeSeries; + +using Microsoft.VisualBasic.FileIO; + +namespace Samples.Dynamic.Transforms.TimeSeries +{ + public static class LocalizeRootCauseEvaluation + { + public static void Example() + { + Dictionary> rootNodeMap = GetAnomalyRootMap(); + Dictionary>> labeledRootCauseMap = GetLabeledRootCauseMap(); + + string aggSymbol = "##EMPTY##awqegp##"; + + int totalTp = 0; + int totalFp = 0; + int totalFn = 0; + int totalCount = 0; + + bool exactly = false; + + int totalRunTime = 0; + + foreach (KeyValuePair> item in rootNodeMap) + { + DateTime timeStamp = item.Key; + + DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", + System.Globalization.CultureInfo.InvariantCulture); + + //if (timeStamp.CompareTo(filterTime).Equals(0)) + { + int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); + string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); + List points = GetPoints(path); + List slices = new List(); + slices.Add(new MetricSlice(timeStamp, points)); + + PredictionEngine engine = GetRootCausePredictionEngine(); + + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, AggregateType.Sum, aggSymbol); + + List list = new List(); + int startTime = System.Environment.TickCount; + GetRootCause(list, newRootCauseInput, engine); + int endTime = System.Environment.TickCount; + int runTime = endTime - startTime; + totalRunTime += runTime; + + List> labeledRootCause = labeledRootCauseMap[timeStamp]; + List> detectedRootCause = ConvertRootCauseItemToDic(list); + RemoveAggSymbol(detectedRootCause, aggSymbol); + + Tuple evaluation = EvaluateRootCauseResult(detectedRootCause, labeledRootCause, exactly, timeStamp); + totalTp += evaluation.Item1; + totalFp += evaluation.Item2; + totalFn += evaluation.Item3; + totalCount++; + } + } + + double precision = (double)totalTp / (totalTp + totalFp); + double recall = (double)totalTp / (totalTp + totalFn); + double f1 = 2 * precision * recall / (precision + recall); + Console.WriteLine(String.Format("Total Count : {0}, TP: {1}, FP: {2}, FN: {3}", totalCount, totalTp, totalFp, totalFn)); + Console.WriteLine(String.Format("Precision : {0}, Recall: {1}, F1: {2}", precision, recall, f1)); + Console.WriteLine(String.Format("Mean calculation time is : {0} ms", (double)totalRunTime / totalCount)); + } + + private static Tuple EvaluateRootCauseResult(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) + { + int tp = 0; + int fp = 0; + int fn; + List labelSet = new List(); + foreach (Dictionary cause in detectedRootCause) + { + string tpCause = FindTruePositive(cause, labeledRootCause, exactly); + if (tpCause == null) + { + fp++; + Console.WriteLine(String.Format("FP : timestamp - {0}, detected root cause ", timeStamp)); + Console.WriteLine(string.Join(Environment.NewLine, cause)); + Console.WriteLine(" "); + } + else + { + tp++; + labelSet.Add(tpCause); + } + } + + fn = labeledRootCause.Count - labelSet.Count; + if (fn != 0) + { + List> nCause = GetFNegtiveCause(labeledRootCause, labelSet); + if (nCause.Count > 0) + { + Console.WriteLine(String.Format("FN : timestamp - {0}, labeled root cause", timeStamp)); + foreach (Dictionary cause in nCause) + { + Console.WriteLine(string.Join(Environment.NewLine, cause)); + Console.WriteLine("---------------------"); + } + + } + } + + return new Tuple(tp, fp, fn); + } + + private static List> GetFNegtiveCause(List> labelCauses, List labelSet) + { + List> causeList = new List>(); + foreach (Dictionary cause in labelCauses) + { + if (!labelSet.Contains(GetDicHashCode(cause))) + { + causeList.Add(cause); + } + } + return causeList; + } + + private static string FindTruePositive(Dictionary cause, List> labelCauses, bool exactly) + { + foreach (Dictionary label in labelCauses) + { + string id = GetDicHashCode(label); + int compare = CompareCause(cause, label); + if (compare == 0) + { + return id; + } + else if (!exactly && (compare == 1 || compare == 2)) + { + return id; + } + } + return null; + } + + + private static string GetDicHashCode(Dictionary dic) + { + return dic.GetHashCode().ToString(); + } + + private static int CompareCause(Dictionary detect, Dictionary label) + { + if (detect.Equals(label)) + { + return 0; + } + else if (DTRootCauseAnalyzer.ContainsAll(detect, label)) + { + return 1; + } + else if (DTRootCauseAnalyzer.ContainsAll(label, detect)) + { + return 2; + } + return 3; + } + private static List> ConvertRootCauseItemToDic(List items) + { + List> list = new List>(); + foreach (RootCauseItem item in items) + { + list.Add(item.Dimension); + } + return list; + } + + private static void RemoveAggSymbol(List> dimensions, string aggSymbol) + { + foreach (Dictionary dim in dimensions) + { + foreach (string key in dim.Keys) + { + if (dim[key].Equals(aggSymbol)) + { + dim.Remove(key); + } + } + } + } + + private static PredictionEngine GetRootCausePredictionEngine() + { + //// Create an root cause localizatiom input list from csv. + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM") }; + + + var ml = new MLContext(1); + // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. + var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); + + // Create pipeline to localize root cause by decision tree. + var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + + // Fit the model. + var model = pipeline.Fit(data); + + // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. + var transformedData = model.Transform(data); + + // Load input list in DataView back to Enumerable. + var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); + + var engine = ml.Model.CreatePredictionEngine(model); + return engine; + } + + private static string _ocsDataCenter = "OCSDatacenter"; + private static string _appType = "AppType"; + private static string _releaseAudienceGroup = "Release_AudienceGroup"; + private static string _wacDatacenter = "WACDatacenter"; + private static string _requestType = "RequestType"; + private static string _statusCode = "StatusCode"; + + private static List _dimensionKeys = new List() { _ocsDataCenter, _appType, _releaseAudienceGroup, _wacDatacenter, _statusCode, _requestType }; + + private static Dictionary> GetAnomalyRootMap() + { + var anomalyRootData = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/anomaly_root.csv"); + + Dictionary> rootNodeMap = new Dictionary>(); + foreach (DataRow row in anomalyRootData.Rows) + { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + rootNodeMap.Add(t, dimension); + } + return rootNodeMap; + } + + private static Dictionary>> GetLabeledRootCauseMap() + { + var labeldRootCause = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/labeled_root_cause.csv"); + + Dictionary>> map = new Dictionary>>(); + foreach (DataRow row in labeldRootCause.Rows) + { + // load the data, build the labled result, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + if (map.ContainsKey(t)) + { + map[t].Add(dimension); + } + else + { + map.Add(t, new List>() { dimension }); + } + } + return map; + } + + private static List GetPoints(string path) + { + var inputData = GetDataTabletFromCSVFile(path); + + DateTime timeStamp = new DateTime(); + + List points = new List(); + foreach (DataRow row in inputData.Rows) + { + // load the data, build the RootCauseInput, take care of empty value + long seconds = long.Parse(row["TimeStamp"].ToString()); + timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); + double value = Double.Parse(row["Value"].ToString()); + double expectedValue = 0; + if (!row["ExpectedValue"].ToString().Equals("")) + { + expectedValue = Double.Parse(row["ExpectedValue"].ToString()); + } + bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); + Dictionary dimension = new Dictionary(); + foreach (string key in _dimensionKeys) + { + if (!row[key].ToString().Equals("")) + { + dimension.Add(key, row[key].ToString()); + } + } + + points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; + } + + return points; + } + + private static void GetRootCause(List rootCauseList, RootCauseLocalizationData inputData, PredictionEngine engine) + { + RootCauseLocalizationTransformedData incrementalResult = engine.Predict(inputData); + + if (incrementalResult.RootCause.Items.Count == 0 || ( + incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].Dimension.Equals(inputData.Input.AnomalyDimensions) + )) + { + if (!rootCauseList.Contains(new RootCauseItem(inputData.Input.AnomalyDimensions))) + { + rootCauseList.Add(new RootCauseItem(inputData.Input.AnomalyDimensions)); + + } + return; + } + else + { + foreach (RootCauseItem item in incrementalResult.RootCause.Items) + { + RootCauseLocalizationData newData = new RootCauseLocalizationData(inputData.Input.AnomalyTimestamp, + item.Dimension, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); + GetRootCause(rootCauseList, newData, engine); + } + } + } + + private static DataTable GetDataTabletFromCSVFile(string filePath) + { + DataTable csvData = new DataTable(); + + + using (TextFieldParser csvReader = new TextFieldParser(filePath)) + { + csvReader.SetDelimiters(new string[] { "," }); + csvReader.HasFieldsEnclosedInQuotes = true; + string[] colFields = csvReader.ReadFields(); + foreach (string column in colFields) + { + DataColumn datecolumn = new DataColumn(column); + datecolumn.AllowDBNull = true; + csvData.Columns.Add(datecolumn); + } + + while (!csvReader.EndOfData) + { + string[] fieldData = csvReader.ReadFields(); + //Making empty value as null + for (int i = 0; i < fieldData.Length; i++) + { + if (fieldData[i] == "") + { + fieldData[i] = null; + } + } + csvData.Rows.Add(fieldData); + } + } + + return csvData; + } + + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs new file mode 100644 index 0000000000..b7a66582b3 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -0,0 +1,905 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Linq; +using Microsoft.ML.Internal.Utilities; + +namespace Microsoft.ML.TimeSeries +{ + public class DTRootCauseAnalyzer + { + public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + { + List points = new List(); + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + points = slice.Points; + } + } + + List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); + + return totalPoints; + } + + public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) + { + PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); + + //which means there is no aggregation in the input anomaly dimension + if (anomalyTree.ParentNode == null) + { + return; + } + + List rootCauses = new List(); + // no point under anomaly dimension + if (totalPoints.Count == 0) + { + if (anomalyTree.Leaves.Count != 0) + { + throw new Exception("point leaves not match with anomaly leaves"); + } + + rootCauses.Add(new RootCauseItem(src.AnomalyDimensions)); + } + else + { + double totalEntropy = 1; + if (anomalyTree.Leaves.Count > 0) + { + totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); + } + + if (totalEntropy > 0.9) + { + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } + else + { + rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + } + + dst.Items = rootCauses; + } + } + + public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + { + DimensionInfo info = DimensionInfo.CreateDefaultInstance(); + foreach (KeyValuePair entry in dimensions) + { + string key = entry.Key; + if (aggSymbol.Equals(entry.Value)) + { + info.AggDim.Add(key); + } + else + { + info.DetailDim.Add(key); + } + } + + return info; + } + + protected static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) + { + PointTree tree = PointTree.CreateDefaultInstance(); + + foreach (Point point in pointList) + { + bool isValidPoint = true; + if (filterByAnomaly) + { + isValidPoint = point.IsAnomaly == true; + } + if (ContainsAll(point.Dimensions, subDim) && isValidPoint) + { + if (aggDims.Count == 0) + { + tree.ParentNode = point; + tree.Leaves.Add(point); + } + else + { + int aggNum = 0; + string nextDim = null; + + foreach (string dim in aggDims) + { + if (IsAggregationDimension(point.Dimensions[dim], aggSymbol)) + { + aggNum++; + } + else + { + nextDim = dim; + } + } + + if (aggNum == aggDims.Count) + { + tree.ParentNode = point; + } + else if (aggNum == aggDims.Count - 1) + { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) + { + tree.ChildrenNodes.Add(nextDim, new List()); + } + tree.ChildrenNodes[nextDim].Add(point); + } + + if (aggNum == 0) + { + tree.Leaves.Add(point); + } + } + } + } + + return tree; + } + private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) + { + + if (tree.Leaves.Count == 0) return tree; + + Dictionary> map = new Dictionary>(); + foreach (Point p in tree.Leaves) + { + foreach (KeyValuePair keyValuePair in p.Dimensions) + { + if (aggDims.Contains(keyValuePair.Key)) + { + if (map.ContainsKey(keyValuePair.Key)) + { + map[keyValuePair.Key].Add(keyValuePair.Value); + } + else + { + map.Add(keyValuePair.Key, new HashSet() { keyValuePair.Value }); + } + } + } + } + + foreach (KeyValuePair> pair in map) + { + if (tree.ChildrenNodes.ContainsKey(pair.Key)) + { + if (tree.ChildrenNodes[pair.Key].Count < pair.Value.Count) + { + foreach (string value in pair.Value) + { + if (!IsAggDimensionExisted(pair.Key, value, tree.ChildrenNodes[pair.Key])) + { + Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); + tree.ChildrenNodes[pair.Key].Add(p); + } + } + } + } + else + { + List childPoints = new List(); + foreach (string value in pair.Value) + { + //simulate the aggregation value + Point p = SimulateBottomUpValue(tree.Leaves, pair.Key, value, aggType, aggSymbol); + childPoints.Add(p); + } + + tree.ChildrenNodes.Add(pair.Key, childPoints); + } + } + + return tree; + } + + private static bool IsAggDimensionExisted(string key, string value, List points) + { + foreach (Point p in points) + { + if (p.Dimensions[key].Equals(value)) + { + return true; + } + } + return false; + } + + private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) + { + Point p = null; + + Dictionary dimension = new Dictionary(); + + dimension.Add(key, keyValue); + + foreach (KeyValuePair pair in leaves[0].Dimensions) + { + if (!pair.Key.Equals(key)) + { + dimension.Add(pair.Key, aggSymbol); + } + } + + if (type.Equals(AggregateType.Sum)) + { + + bool isAnomaly = false; + double value = 0; + double expectedValue = 0; + foreach (Point leave in leaves) + { + + if (leave.Dimensions.ContainsKey(key) && leave.Dimensions[key].Equals(keyValue)) + { + value += leave.Value; + expectedValue = leave.ExpectedValue; + isAnomaly = isAnomaly || leave.IsAnomaly; + } + } + + p = new Point(value, expectedValue, isAnomaly, dimension); + } + + return p; + } + + public static Dictionary GetSubDim(Dictionary dimension, List keyList) + { + Dictionary subDim = new Dictionary(); + + foreach (String dim in keyList) + { + subDim.Add(dim, dimension[dim]); + } + return subDim; + } + + protected static List SelectPoints(List points, Dictionary subDim) + { + List list = new List(); + + foreach (Point point in points) + { + if (ContainsAll(point.Dimensions, subDim)) + { + //remove duplicated points + if (!list.Contains(point)) + { + list.Add(point); + } + } + } + + return list; + } + + protected static List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) + { + var set = anomalyTree.ChildrenNodes.Keys; + + BestDimension best = null; + if (anomalyTree.Leaves.Count > 0) + { + best = SelectBestDimension(totalPoints, anomalyTree.Leaves, set.ToList(), totoalEntropy); + } + else + { + //has no leaves information, should calculate the entropy information according to the children nodes + best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, set.ToList(), totoalEntropy); + } + + if (best == null) + { + return new List() { new RootCauseItem(anomalyDimension) }; + } + + List children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, totalPoints, best.DimensionKey); + if (children == null) + { + //As the cause couldn't be found, the root cause should be itself + return new List() { new RootCauseItem(anomalyDimension, best.DimensionKey) }; + } + else + { + List causes = new List(); + // For the found causes, we return the result + foreach (Point anomaly in children) + { + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimensions[best.DimensionKey]), best.DimensionKey)); + } + return causes; + } + } + + protected static double GetEntropy(int totalNum, int anomalyNum) + { + double ratio = (double)anomalyNum / totalNum; + if (ratio == 0 || ratio == 1) + { + return 0; + } + + return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); + } + + protected static Dictionary GetEntropyList(BestDimension best, List points) + { + Dictionary list = new Dictionary(); + // need to update, change to children if necessary + foreach (Point point in points) + { + string dimVal = point.Dimensions[best.DimensionKey]; + int pointSize = GetPointSize(best, dimVal); + int anomalySize = GetAnomalyPointSize(best, dimVal); + + double dimEntropy = GetEntropy(pointSize, anomalySize); + list.Add(dimVal, dimEntropy); + } + + return list; + } + + protected static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) + { + Dictionary pointDistribution = new Dictionary(); + UpdateDistribution(pointDistribution, totalPoints, dimKey); + + anomalyPoints.OrderBy(x => x.Delta); + + if (root.Delta > 0) + { + anomalyPoints.Reverse(); + } + + if (anomalyPoints.Count == 1) + { + return anomalyPoints; + } + + double delta = 0; + double preDelta = 0; + List causeList = new List(); + foreach (Point anomaly in anomalyPoints) + { + if (StopAnomalyComparison(delta, root.Delta, anomaly.Delta, preDelta)) + { + break; + } + + delta += anomaly.Delta; + causeList.Add(anomaly); + preDelta = anomaly.Delta; + } + + int pointSize = GetTotalNumber(pointDistribution); + if (ShouldSeperateAnomaly(delta, root.Delta, pointSize, causeList.Count)) + { + return causeList; + } + + return null; + } + + protected static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + { + Dictionary entroyGainMap = new Dictionary(); + Dictionary entroyGainRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (string dimKey in aggDim) + { + BestDimension dimension = BestDimension.CreateDefaultInstance(); + dimension.DimensionKey = dimKey; + + UpdateDistribution(dimension.PointDis, totalPoints, dimKey); + UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey); + + double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + dimension.Entropy = totalEntropy - gain; + entroyGainMap.Add(dimension, gain); + + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + entroyGainRatioMap.Add(dimension, gainRatio); + + sumGain += gain; + } + + double meanGain = sumGain / aggDim.Count(); + + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + return best; + } + + public static BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + { + Dictionary entroyGainMap = new Dictionary(); + Dictionary entroyGainRatioMap = new Dictionary(); + double sumGain = 0; + + foreach (string dimKey in aggDim) + { + BestDimension dimension = BestDimension.CreateDefaultInstance(); + dimension.DimensionKey = dimKey; + + UpdateDistribution(dimension.PointDis, pointChildren[dimKey], dimKey); + UpdateDistribution(dimension.AnomalyDis, anomalyChildren[dimKey], dimKey); + + double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + dimension.Entropy = totalEntropy - gain; + entroyGainMap.Add(dimension, gain); + + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + entroyGainRatioMap.Add(dimension, gainRatio); + + sumGain += gain; + } + + double meanGain = sumGain / aggDim.Count(); + + BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + + return best; + } + + private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + { + BestDimension best = null; + foreach (KeyValuePair dimension in entropyGainMap) + { + if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value >= meanGain) + { + if (dimension.Key.AnomalyDis.Count > 1) + { + if (best == null || (best.AnomalyDis.Count != 1 && entropyGainRationMap[best].CompareTo(dimension.Value) < 0)) + { + best = dimension.Key; + } + } + else + { + if (best == null || best.AnomalyDis.Count > 1) + { + best = dimension.Key; + } + else + { + if (entropyGainRationMap[best].CompareTo(dimension.Value) < 0) + { + best = dimension.Key; + } + } + } + } + } + return best; + } + + public static Point FindPointByDimension(Dictionary dim, List points) + { + foreach (Point p in points) + { + bool isEqual = true; + foreach (KeyValuePair item in p.Dimensions) + { + if (!dim[item.Key].Equals(item.Value)) + { + isEqual = false; + } + } + + if (isEqual) + { + return p; + } + } + + return null; + } + + public static void UpdateRootCauseDirection(List points, ref RootCause dst) + { + foreach (RootCauseItem item in dst.Items) + { + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + if (rootCausePoint != null) + { + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) + { + item.Direction = AnomalyDirection.Up; + } + else + { + item.Direction = AnomalyDirection.Down; + } + } + + } + } + + public static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) + { + if (dst.Items.Count > 1) + { + //get surprise value and explanary power value + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + + double sumSurprise = 0; + double sumEp = 0; + List scoreList = new List(); + + foreach (RootCauseItem item in dst.Items) + { + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + if (anomalyPoint != null && rootCausePoint != null) + { + Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); + sumSurprise += scores.Item1; + sumEp += Math.Abs(scores.Item2); + } + } + + //normalize and get final score + for (int i = 0; i < scoreList.Count; i++) + { + dst.Items[i].Score = DTRootCauseAnalyzer.GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp, beta); + + } + } + else if (dst.Items.Count == 1) + { + Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(dst.Items[0].Dimension, points); + + Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + if (anomalyPoint != null && rootCausePoint != null) + { + Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + dst.Items[0].Score = DTRootCauseAnalyzer.GetFinalScore(scores.Item1, scores.Item2, beta); + } + } + } + + private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + { + double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + double q = rootCausePoint.Value / anomalyPoint.Value; + double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); + + return surprise; + } + + private static double GetFinalScore(double surprise, double ep, double beta) + { + return Math.Max(1, beta * surprise + (1 - beta) * ep); + } + + private static Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) + { + double surprise = DTRootCauseAnalyzer.GetSurpriseScore(rootCausePoint, anomalyPoint); + double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + + return new Tuple(surprise, ep); + } + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) + { + Dictionary newDim = new Dictionary(dimension); + newDim[key] = value; + return newDim; + } + + private static bool StopAnomalyComparison(double preTotal, double parent, double current, double pre) + { + if (Math.Abs(preTotal) < Math.Abs(parent) * 0.95) + { + return false; + } + + return Math.Abs(pre) / Math.Abs(current) > 2; + } + + private static bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) + { + if (Math.Abs(total) < Math.Abs(parent) * 0.95) + { + return false; + } + + if (size == totalSize && size == 1) + { + return true; + } + + return size <= totalSize * 0.5; + } + + private static double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + { + int total = GetTotalNumber(pointDis); + double entropy = 0; + foreach (string key in anomalyDis.Keys) + { + double dimEntropy = GetEntropy(pointDis[key], anomalyDis[key]); + entropy += dimEntropy * pointDis[key] / total; + } + return totalEntropy - entropy; + } + + private static double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + { + double instrinsicValue = 0; + + foreach (string key in anomalyDis.Keys) + { + instrinsicValue -= Log2((double)anomalyDis[key] / pointDis[key]) * anomalyDis[key] / pointDis[key]; + } + + return instrinsicValue; + } + + private static int GetTotalNumber(Dictionary distribution) + { + int total = 0; + foreach (int num in distribution.Values) + { + total += num; + } + return total; + } + + private static void UpdateDistribution(Dictionary distribution, List points, string dimKey) + { + foreach (Point point in points) + { + string dimVal = point.Dimensions[dimKey]; + if (!distribution.ContainsKey(dimVal)) + { + distribution.Add(dimVal, 0); + } + distribution[dimVal] = distribution[dimVal] + 1; + } + } + + public static double Log2(double val) + { + return Math.Log(val) / Math.Log(2); + } + + public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) + { + foreach (var item in smallDic) + { + if (!bigDic.ContainsKey(item.Key)) + { + return false; + } + + if (bigDic.ContainsKey(item.Key) && !bigDic[item.Key].Equals(smallDic[item.Key])) + { + return false; + } + } + return true; + } + + private static bool IsAggregationDimension(string val, string aggSymbol) + { + return val.Equals(aggSymbol); + } + + private static int GetPointSize(BestDimension dim, string key) + { + int pointSize = 0; + if (dim.PointDis.ContainsKey(key)) + { + pointSize = dim.PointDis[key]; + } + return pointSize; + } + + private static int GetAnomalyPointSize(BestDimension dim, string key) + { + int anomalyPointSize = 0; + if (dim.AnomalyDis.ContainsKey(key)) + { + anomalyPointSize = dim.AnomalyDis[key]; + } + return anomalyPointSize; + } + } + + public class DimensionInfo + { + public List DetailDim { get; set; } + public List AggDim { get; set; } + + public static DimensionInfo CreateDefaultInstance() + { + DimensionInfo instance = new DimensionInfo(); + instance.DetailDim = new List(); + instance.AggDim = new List(); + return instance; + } + } + + public class PointTree + { + public Point ParentNode; + public Dictionary> ChildrenNodes; + public List Leaves; + + public static PointTree CreateDefaultInstance() + { + PointTree instance = new PointTree(); + instance.Leaves = new List(); + instance.ChildrenNodes = new Dictionary>(); + return instance; + } + } + + public sealed class Point : IEquatable + { + public double Value { get; set; } + public double ExpectedValue { get; set; } + public bool IsAnomaly { get; set; } + public Dictionary Dimensions { get; set; } + + public double Delta { get; set; } + + public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimensions) + { + Value = value; + ExpectedValue = expectedValue; + IsAnomaly = isAnomaly; + Dimensions = dimensions; + Delta = (value - expectedValue) / expectedValue; + if (expectedValue == 0) + { + Delta = 0; + } + } + + public bool Equals(Point other) + { + foreach (KeyValuePair item in Dimensions) + { + if (!other.Dimensions[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + + public override int GetHashCode() + { + return Dimensions.GetHashCode(); + } + } + + public sealed class MetricSlice + { + public DateTime TimeStamp { get; set; } + public List Points { get; set; } + + public MetricSlice(DateTime timeStamp, List points) + { + TimeStamp = timeStamp; + Points = points; + } + } + + public sealed class BestDimension + { + public string DimensionKey; + public double Entropy; + public Dictionary AnomalyDis; + public Dictionary PointDis; + + public BestDimension() { } + public static BestDimension CreateDefaultInstance() + { + BestDimension instance = new BestDimension(); + instance.AnomalyDis = new Dictionary(); + instance.PointDis = new Dictionary(); + return instance; + } + } + + public sealed class AnomalyCause + { + public string DimensionKey; + public List Anomalies; + + public AnomalyCause() { } + } + + public sealed class RootCauseItem : IEquatable + { + public double Score; + public string Path; + public Dictionary Dimension; + public AnomalyDirection Direction; + + public RootCauseItem(Dictionary rootCause) + { + Dimension = rootCause; + } + + public RootCauseItem(Dictionary rootCause, string path) + { + Dimension = rootCause; + Path = path; + } + public bool Equals(RootCauseItem other) + { + if (Dimension.Count == other.Dimension.Count) + { + foreach (KeyValuePair item in Dimension) + { + if (!other.Dimension[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + return false; + } + } + + public enum AnomalyDirection + { + /// + /// the value is larger than expected value. + /// + Up = 0, + /// + /// the value is lower than expected value. + /// + Down = 1 + } + + public class RootCauseScore + { + public double Surprise; + public double ExplainaryScore; + + public RootCauseScore(double surprise, double explainaryScore) + { + Surprise = surprise; + ExplainaryScore = explainaryScore; + } + } + + public enum AggregateType + { + /// + /// Make the aggregate type as sum. + /// + Sum = 0, + /// + /// Make the aggregate type as average. + /// + Avg = 1, + /// + /// Make the aggregate type as min. + /// + Min = 2, + /// + /// Make the aggregate type as max. + /// + Max = 3 + } +} diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index c70dd398ff..90bb142be0 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -11,6 +11,7 @@ using Microsoft.ML.Data; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; [assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), typeof(DTRootCauseLocalizationTransformer.Options), typeof(SignatureDataTransform), @@ -27,198 +28,28 @@ namespace Microsoft.ML.Transforms.TimeSeries { - public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute - { - /// - /// Create a root cause localizagin input type. - /// - public RootCauseLocalizationInputTypeAttribute() - { - } - - /// - /// Equal function. - /// - public override bool Equals(DataViewTypeAttribute other) - { - if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) - return false; - return true; - } - - /// - /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); - } - } - - public sealed class RootCauseTypeAttribute : DataViewTypeAttribute - { - /// - /// Create an root cause type. - /// - public RootCauseTypeAttribute() - { - } - - /// - /// RootCauseTypeAttribute with the same type should equal. - /// - public override bool Equals(DataViewTypeAttribute other) - { - if (other is RootCauseTypeAttribute otherAttribute) - return true; - return false; - } - - /// - /// Produce the same hash code for all RootCauseTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); - } - } - - public sealed class RootCause - { - public List Items { get; set; } - } - - public sealed class RootCauseItems { - public double Score; - public List Path; - public Dictionary RootCause; - public AnomalyDirection Direction; - } - - public enum AnomalyDirection { - /// - /// the value is larger than expected value. - /// - Up = 0, - /// - /// the value is lower than expected value. - /// - Down = 1 - } - - public sealed class RootCauseLocalizationInput - { - public DateTime AnomalyTimestamp { get; set; } - - public Dictionary AnomalyDimensions { get; set; } - - public List Slices { get; set; } - - public DTRootCauseLocalizationEstimator.AggregateType AggType{ get; set; } - - public string AggSymbol { get; set; } - - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateType, string aggregateSymbol) { - AnomalyTimestamp = anomalyTimestamp; - AnomalyDimensions = anomalyDimensions; - Slices = slices; - AggType = aggregateType; - AggSymbol = aggregateSymbol; - } - public void Dispose() - { - AnomalyDimensions = null; - Slices = null; - } - } - - public sealed class MetricSlice - { - public DateTime TimeStamp { get; set; } - public List Points { get; set; } - - public MetricSlice(DateTime timeStamp, List points) { - TimeStamp = timeStamp; - Points = points; - } - } - - public sealed class Point { - public double Value { get; set; } - public double ExpectedValue { get; set; } - public bool IsAnomaly { get; set; } - public Dictionary Dimensions{ get; set; } - } - - public sealed class RootCauseDataViewType : StructuredDataViewType - { - public RootCauseDataViewType() - : base(typeof(RootCause)) - { - } - - public override bool Equals(DataViewType other) - { - if (other == this) - return true; - if (!(other is RootCauseDataViewType tmp)) - return false; - return true; - } - - public override int GetHashCode() - { - return 0; - } - - public override string ToString() - { - return typeof(RootCauseDataViewType).Name; - } - } - - public sealed class RootCauseLocalizationInputDataViewType : StructuredDataViewType + /// + /// resulting from fitting an . + /// + public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase { - public RootCauseLocalizationInputDataViewType() - : base(typeof(RootCauseLocalizationInput)) - { - } - - public override bool Equals(DataViewType other) - { - if (!(other is RootCauseLocalizationInputDataViewType tmp)) - return false; - return true; - } + internal const string Summary = "Localize root cause for anomaly."; + internal const string UserName = "DT Root Cause Localization Transform"; + internal const string LoaderSignature = "DTRootCauseLTransform"; - public override int GetHashCode() + private static VersionInfo GetVersionInfo() { - return 0; + return new VersionInfo( + modelSignature: "DTRCL", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); } - public override string ToString() - { - return typeof(RootCauseLocalizationInputDataViewType).Name; - } - } + private const string RegistrationName = "RootCauseLocalization"; - // REVIEW: Rewrite as LambdaTransform to simplify. - // REVIEW: Should it be separate transform or part of ImageResizerTransform? - /// - /// resulting from fitting an . - /// - public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase - { internal sealed class Column : OneToOneColumn { internal static Column Parse(string str) @@ -246,24 +77,6 @@ internal class Options : TransformInputBase } - internal const string Summary = "Localize root cause for anomaly."; - - internal const string UserName = "DT Root Cause Localization Transform"; - internal const string LoaderSignature = "DTRootCauseLTransform"; - - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "DTRCL", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); - } - - private const string RegistrationName = "RootCauseLocalization"; - /// /// The input and output column pairs passed to this . /// @@ -278,10 +91,10 @@ private static VersionInfo GetVersionInfo() /// Weight for generating score. /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). - internal DTRootCauseLocalizationTransformer(IHostEnvironment env,double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + internal DTRootCauseLocalizationTransformer(IHostEnvironment env, double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns) { - Host.CheckUserArg(beta >=0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); + Host.CheckUserArg(beta >= 0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); _beta = beta; } @@ -294,7 +107,7 @@ internal static IDataTransform Create(IHostEnvironment env, Options options, IDa env.CheckValue(input, nameof(input)); env.CheckValue(options.Columns, nameof(options.Columns)); - return new DTRootCauseLocalizationTransformer(env,options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) + return new DTRootCauseLocalizationTransformer(env, options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) .MakeDataTransform(input); } @@ -343,7 +156,7 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int private sealed class Mapper : OneToOneMapperBase { - private DTRootCauseLocalizationTransformer _parent; + private readonly DTRootCauseLocalizationTransformer _parent; public Mapper(DTRootCauseLocalizationTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) @@ -392,18 +205,52 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func{ new RootCauseItems() }; - //todo- algorithms would be implememted here + LocalizeRootCauses(src, ref dst); }; return del; } + + private void CheckRootCauseInput(RootCauseLocalizationInput src, IHost host) + { + if (src.Slices.Count < 1) + { + throw host.Except($"Length of Slices must be larger than 0"); + } + + bool containsAnomalyTimestamp = false; + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + containsAnomalyTimestamp = true; + } + } + if (!containsAnomalyTimestamp) + { + throw host.Except($"Has no points in the given anomaly timestamp"); + } + } + + private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause dst) + { + dst = new RootCause(); + dst.Items = new List { }; + + DimensionInfo dimensionInfo = DTRootCauseAnalyzer.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); + if (dimensionInfo.AggDim.Count == 0) + { + return; + } + Dictionary subDim = DTRootCauseAnalyzer.GetSubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); + List totalPoints = DTRootCauseAnalyzer.GetTotalPointsForAnomalyTimestamp(src, subDim); + + DTRootCauseAnalyzer.GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); + DTRootCauseAnalyzer.UpdateRootCauseDirection(totalPoints,ref dst); + DTRootCauseAnalyzer.GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst, _parent._beta); + } } } @@ -434,26 +281,6 @@ internal static class Defaults public const double Beta = 0.5; } - public enum AggregateType - { - /// - /// Make the aggregate type as sum. - /// - Sum = 0, - /// - /// Make the aggregate type as average. - /// - Avg = 1, - /// - /// Make the aggregate type as min. - /// - Min = 2, - /// - /// Make the aggregate type as max. - /// - Max = 3 - } - /// /// Localize root cause. /// @@ -461,8 +288,8 @@ public enum AggregateType /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). /// The weight for generating score in output result. [BestFriend] - internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta,params(string outputColumnName, string inputColumnName)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta,columns)) + internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, columns)) { } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs new file mode 100644 index 0000000000..1e5e3f881e --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -0,0 +1,168 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using Microsoft.ML.Data; + +namespace Microsoft.ML.TimeSeries +{ + /// + /// Allows a member to be marked as a , primarily allowing one to set + /// root cause localization input. + /// + public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute + { + /// + /// Create a root cause localizagin input type. + /// + public RootCauseLocalizationInputTypeAttribute() + { + } + + /// + /// Equal function. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) + return false; + return true; + } + + /// + /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); + } + } + + /// + /// Allows a member to be marked as a , primarily allowing one to set + /// root cause result. + /// + public sealed class RootCauseTypeAttribute : DataViewTypeAttribute + { + /// + /// Create an root cause type. + /// + public RootCauseTypeAttribute() + { + } + + /// + /// RootCauseTypeAttribute with the same type should equal. + /// + public override bool Equals(DataViewTypeAttribute other) + { + if (other is RootCauseTypeAttribute otherAttribute) + return true; + return false; + } + + /// + /// Produce the same hash code for all RootCauseTypeAttribute. + /// + public override int GetHashCode() + { + return 0; + } + + public override void Register() + { + DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); + } + } + + public sealed class RootCause + { + public List Items { get; set; } + } + + public sealed class RootCauseLocalizationInput + { + public DateTime AnomalyTimestamp { get; set; } + + public Dictionary AnomalyDimensions { get; set; } + + public List Slices { get; set; } + + public AggregateType AggType { get; set; } + + public string AggSymbol { get; set; } + + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) + { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimensions = anomalyDimensions; + Slices = slices; + AggType = aggregateType; + AggSymbol = aggregateSymbol; + } + public void Dispose() + { + AnomalyDimensions = null; + Slices = null; + } + } + + public sealed class RootCauseDataViewType : StructuredDataViewType + { + public RootCauseDataViewType() + : base(typeof(RootCause)) + { + } + + public override bool Equals(DataViewType other) + { + if (other == this) + return true; + if (!(other is RootCauseDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseDataViewType).Name; + } + } + + public sealed class RootCauseLocalizationInputDataViewType : StructuredDataViewType + { + public RootCauseLocalizationInputDataViewType() + : base(typeof(RootCauseLocalizationInput)) + { + } + + public override bool Equals(DataViewType other) + { + if (!(other is RootCauseLocalizationInputDataViewType tmp)) + return false; + return true; + } + + public override int GetHashCode() + { + return 0; + } + + public override string ToString() + { + return typeof(RootCauseLocalizationInputDataViewType).Name; + } + } +} diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 51ea88c881..3329a35b27 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -1,14 +1,14 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. using System; using System.Collections.Generic; -using System.Drawing; using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; using Xunit.Abstractions; @@ -92,6 +92,8 @@ private sealed class SrCnnAnomalyDetection public double[] Prediction { get; set; } } + private static string _aggSymbol = "##SUM##"; + [Fact] public void ChangeDetection() { @@ -474,7 +476,7 @@ public void SsaForecastPredictionEngine() // The forecasted results should be the same because the state of the models // is the same. Assert.Equal(result.Forecast, resultCopy.Forecast); - + } [Fact] @@ -522,13 +524,13 @@ private class RootCauseLocalizationData { [RootCauseLocalizationInputType] public RootCauseLocalizationInput Input { get; set; } - + public RootCauseLocalizationData() { Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, DTRootCauseLocalizationEstimator.AggregateType aggregateteType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) { Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); } @@ -536,7 +538,7 @@ public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary() { new RootCauseLocalizationData(new DateTime(),new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"), new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, DTRootCauseLocalizationEstimator.AggregateType.Avg, "AVG") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "##SUM##") }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -570,16 +572,103 @@ public void RootCauseLocalizationWithDT() foreach (var dataPoint in transformedDataPoints) { var rootCause = dataPoint.RootCause; - Assert.NotNull(rootCause); } var engine = ml.Model.CreatePredictionEngine(model); - var newRootCauseInput = new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() {new MetricSlice(new DateTime(), new List())}, DTRootCauseLocalizationEstimator.AggregateType.Sum, "SUM"); + DateTime timeStamp = GetCurrentTimestamp(); + var newRootCauseInput = new RootCauseLocalizationData(timeStamp, GetAnomalyDimension(), new List() { new MetricSlice(timeStamp, GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _aggSymbol); var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); - //todo - will add more tests here when onboarding mock data + Assert.Equal(1,(int)transformedRootCause.RootCause.Items.Count); + + Dictionary expectedDim = new Dictionary(); + expectedDim.Add("Country","UK"); + expectedDim.Add("DeviceType",_aggSymbol); + expectedDim.Add("DataCenter","DC1"); + + foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) { + Assert.Equal(expectedDim[pair.Key], pair.Value); + } } + + + private static List GetRootCauseLocalizationPoints() + { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country", "UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", _aggSymbol); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", _aggSymbol); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", _aggSymbol); + dic7.Add("DataCenter", _aggSymbol); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", _aggSymbol); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", _aggSymbol); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetAnomalyDimension() + { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", _aggSymbol); + dim.Add("DataCenter", _aggSymbol); + + return dim; + } + + private static DateTime GetCurrentTimestamp() + { + return new DateTime(); + } + } } From 8f97602def77d6a06f873c8bd1aff1f1744809fb Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 16:43:05 +0800 Subject: [PATCH 12/49] print score, path and directions in sample --- .../TimeSeries/LocalizeRootCauseByDT.cs | 11 +- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 409 ------------------ 2 files changed, 4 insertions(+), 416 deletions(-) delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 326d5f360d..b95d88418d 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -42,18 +42,15 @@ public static void Example() // Print the localization result. int count = 0; - foreach (RootCauseItem item in prediction.RootCause.Items) { + foreach (RootCauseItem item in prediction.RootCause.Items) + { count++; Console.WriteLine($"Root cause item #{count} ..."); - foreach (KeyValuePair pair in item.Dimension) { - Console.WriteLine($"{pair.Key} = {pair.Value}"); - } + Console.WriteLine($"Score: {item.Score}, Path: {item.Path}, Direction: {item.Direction}, Dimension:{String.Join(" ", item.Dimension)}"); } //Item #1 ... - //Country = UK - //DeviceType = ##SUM## - //DataCenter = DC1 + //Score: 1, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] } private static List GetPoints() { diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs deleted file mode 100644 index 47163a49dc..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ /dev/null @@ -1,409 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Data; -using Microsoft.ML; -using Microsoft.ML.TimeSeries; -using Microsoft.ML.Transforms.TimeSeries; - -using Microsoft.VisualBasic.FileIO; - -namespace Samples.Dynamic.Transforms.TimeSeries -{ - public static class LocalizeRootCauseEvaluation - { - public static void Example() - { - Dictionary> rootNodeMap = GetAnomalyRootMap(); - Dictionary>> labeledRootCauseMap = GetLabeledRootCauseMap(); - - string aggSymbol = "##EMPTY##awqegp##"; - - int totalTp = 0; - int totalFp = 0; - int totalFn = 0; - int totalCount = 0; - - bool exactly = false; - - int totalRunTime = 0; - - foreach (KeyValuePair> item in rootNodeMap) - { - DateTime timeStamp = item.Key; - - DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", - System.Globalization.CultureInfo.InvariantCulture); - - //if (timeStamp.CompareTo(filterTime).Equals(0)) - { - int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); - string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); - List points = GetPoints(path); - List slices = new List(); - slices.Add(new MetricSlice(timeStamp, points)); - - PredictionEngine engine = GetRootCausePredictionEngine(); - - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, AggregateType.Sum, aggSymbol); - - List list = new List(); - int startTime = System.Environment.TickCount; - GetRootCause(list, newRootCauseInput, engine); - int endTime = System.Environment.TickCount; - int runTime = endTime - startTime; - totalRunTime += runTime; - - List> labeledRootCause = labeledRootCauseMap[timeStamp]; - List> detectedRootCause = ConvertRootCauseItemToDic(list); - RemoveAggSymbol(detectedRootCause, aggSymbol); - - Tuple evaluation = EvaluateRootCauseResult(detectedRootCause, labeledRootCause, exactly, timeStamp); - totalTp += evaluation.Item1; - totalFp += evaluation.Item2; - totalFn += evaluation.Item3; - totalCount++; - } - } - - double precision = (double)totalTp / (totalTp + totalFp); - double recall = (double)totalTp / (totalTp + totalFn); - double f1 = 2 * precision * recall / (precision + recall); - Console.WriteLine(String.Format("Total Count : {0}, TP: {1}, FP: {2}, FN: {3}", totalCount, totalTp, totalFp, totalFn)); - Console.WriteLine(String.Format("Precision : {0}, Recall: {1}, F1: {2}", precision, recall, f1)); - Console.WriteLine(String.Format("Mean calculation time is : {0} ms", (double)totalRunTime / totalCount)); - } - - private static Tuple EvaluateRootCauseResult(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) - { - int tp = 0; - int fp = 0; - int fn; - List labelSet = new List(); - foreach (Dictionary cause in detectedRootCause) - { - string tpCause = FindTruePositive(cause, labeledRootCause, exactly); - if (tpCause == null) - { - fp++; - Console.WriteLine(String.Format("FP : timestamp - {0}, detected root cause ", timeStamp)); - Console.WriteLine(string.Join(Environment.NewLine, cause)); - Console.WriteLine(" "); - } - else - { - tp++; - labelSet.Add(tpCause); - } - } - - fn = labeledRootCause.Count - labelSet.Count; - if (fn != 0) - { - List> nCause = GetFNegtiveCause(labeledRootCause, labelSet); - if (nCause.Count > 0) - { - Console.WriteLine(String.Format("FN : timestamp - {0}, labeled root cause", timeStamp)); - foreach (Dictionary cause in nCause) - { - Console.WriteLine(string.Join(Environment.NewLine, cause)); - Console.WriteLine("---------------------"); - } - - } - } - - return new Tuple(tp, fp, fn); - } - - private static List> GetFNegtiveCause(List> labelCauses, List labelSet) - { - List> causeList = new List>(); - foreach (Dictionary cause in labelCauses) - { - if (!labelSet.Contains(GetDicHashCode(cause))) - { - causeList.Add(cause); - } - } - return causeList; - } - - private static string FindTruePositive(Dictionary cause, List> labelCauses, bool exactly) - { - foreach (Dictionary label in labelCauses) - { - string id = GetDicHashCode(label); - int compare = CompareCause(cause, label); - if (compare == 0) - { - return id; - } - else if (!exactly && (compare == 1 || compare == 2)) - { - return id; - } - } - return null; - } - - - private static string GetDicHashCode(Dictionary dic) - { - return dic.GetHashCode().ToString(); - } - - private static int CompareCause(Dictionary detect, Dictionary label) - { - if (detect.Equals(label)) - { - return 0; - } - else if (DTRootCauseAnalyzer.ContainsAll(detect, label)) - { - return 1; - } - else if (DTRootCauseAnalyzer.ContainsAll(label, detect)) - { - return 2; - } - return 3; - } - private static List> ConvertRootCauseItemToDic(List items) - { - List> list = new List>(); - foreach (RootCauseItem item in items) - { - list.Add(item.Dimension); - } - return list; - } - - private static void RemoveAggSymbol(List> dimensions, string aggSymbol) - { - foreach (Dictionary dim in dimensions) - { - foreach (string key in dim.Keys) - { - if (dim[key].Equals(aggSymbol)) - { - dim.Remove(key); - } - } - } - } - - private static PredictionEngine GetRootCausePredictionEngine() - { - //// Create an root cause localizatiom input list from csv. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM") }; - - - var ml = new MLContext(1); - // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. - var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); - - // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit the model. - var model = pipeline.Fit(data); - - // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. - var transformedData = model.Transform(data); - - // Load input list in DataView back to Enumerable. - var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); - - var engine = ml.Model.CreatePredictionEngine(model); - return engine; - } - - private static string _ocsDataCenter = "OCSDatacenter"; - private static string _appType = "AppType"; - private static string _releaseAudienceGroup = "Release_AudienceGroup"; - private static string _wacDatacenter = "WACDatacenter"; - private static string _requestType = "RequestType"; - private static string _statusCode = "StatusCode"; - - private static List _dimensionKeys = new List() { _ocsDataCenter, _appType, _releaseAudienceGroup, _wacDatacenter, _statusCode, _requestType }; - - private static Dictionary> GetAnomalyRootMap() - { - var anomalyRootData = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/anomaly_root.csv"); - - Dictionary> rootNodeMap = new Dictionary>(); - foreach (DataRow row in anomalyRootData.Rows) - { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - rootNodeMap.Add(t, dimension); - } - return rootNodeMap; - } - - private static Dictionary>> GetLabeledRootCauseMap() - { - var labeldRootCause = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/labeled_root_cause.csv"); - - Dictionary>> map = new Dictionary>>(); - foreach (DataRow row in labeldRootCause.Rows) - { - // load the data, build the labled result, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - if (map.ContainsKey(t)) - { - map[t].Add(dimension); - } - else - { - map.Add(t, new List>() { dimension }); - } - } - return map; - } - - private static List GetPoints(string path) - { - var inputData = GetDataTabletFromCSVFile(path); - - DateTime timeStamp = new DateTime(); - - List points = new List(); - foreach (DataRow row in inputData.Rows) - { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - double value = Double.Parse(row["Value"].ToString()); - double expectedValue = 0; - if (!row["ExpectedValue"].ToString().Equals("")) - { - expectedValue = Double.Parse(row["ExpectedValue"].ToString()); - } - bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; - } - - return points; - } - - private static void GetRootCause(List rootCauseList, RootCauseLocalizationData inputData, PredictionEngine engine) - { - RootCauseLocalizationTransformedData incrementalResult = engine.Predict(inputData); - - if (incrementalResult.RootCause.Items.Count == 0 || ( - incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].Dimension.Equals(inputData.Input.AnomalyDimensions) - )) - { - if (!rootCauseList.Contains(new RootCauseItem(inputData.Input.AnomalyDimensions))) - { - rootCauseList.Add(new RootCauseItem(inputData.Input.AnomalyDimensions)); - - } - return; - } - else - { - foreach (RootCauseItem item in incrementalResult.RootCause.Items) - { - RootCauseLocalizationData newData = new RootCauseLocalizationData(inputData.Input.AnomalyTimestamp, - item.Dimension, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); - GetRootCause(rootCauseList, newData, engine); - } - } - } - - private static DataTable GetDataTabletFromCSVFile(string filePath) - { - DataTable csvData = new DataTable(); - - - using (TextFieldParser csvReader = new TextFieldParser(filePath)) - { - csvReader.SetDelimiters(new string[] { "," }); - csvReader.HasFieldsEnclosedInQuotes = true; - string[] colFields = csvReader.ReadFields(); - foreach (string column in colFields) - { - DataColumn datecolumn = new DataColumn(column); - datecolumn.AllowDBNull = true; - csvData.Columns.Add(datecolumn); - } - - while (!csvReader.EndOfData) - { - string[] fieldData = csvReader.ReadFields(); - //Making empty value as null - for (int i = 0; i < fieldData.Length; i++) - { - if (fieldData[i] == "") - { - fieldData[i] = null; - } - } - csvData.Rows.Add(fieldData); - } - } - - return csvData; - } - - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } - } -} From 48123f44a096d0a3f0cf3f99dd350d26807dcab8 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 16:51:37 +0800 Subject: [PATCH 13/49] merge with master --- .../TimeSeries/LocalizeRootCauseByDT.cs | 21 +++++++++---------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 326d5f360d..721fa2d86e 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -42,25 +42,23 @@ public static void Example() // Print the localization result. int count = 0; - foreach (RootCauseItem item in prediction.RootCause.Items) { + foreach (RootCauseItem item in prediction.RootCause.Items) + { count++; Console.WriteLine($"Root cause item #{count} ..."); - foreach (KeyValuePair pair in item.Dimension) { - Console.WriteLine($"{pair.Key} = {pair.Value}"); - } + Console.WriteLine($"Score: {item.Score}, Path: {item.Path}, Direction: {item.Direction}, Dimension:{String.Join(" ", item.Dimension)}"); } //Item #1 ... - //Country = UK - //DeviceType = ##SUM## - //DataCenter = DC1 + //Score: 1, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] } - private static List GetPoints() { + private static List GetPoints() + { List points = new List(); Dictionary dic1 = new Dictionary(); - dic1.Add("Country","UK"); + dic1.Add("Country", "UK"); dic1.Add("DeviceType", "Laptop"); dic1.Add("DataCenter", "DC1"); points.Add(new Point(200, 100, true, dic1)); @@ -116,7 +114,8 @@ private static List GetPoints() { return points; } - private static Dictionary GetAnomalyDimension() { + private static Dictionary GetAnomalyDimension() + { Dictionary dim = new Dictionary(); dim.Add("Country", "UK"); dim.Add("DeviceType", AGG_SYMBOL); @@ -158,4 +157,4 @@ public RootCauseLocalizationTransformedData() } } } -} +} \ No newline at end of file From c47302fabdf0b31ce29f41958fc4746cc1833574 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 23 Mar 2020 17:25:15 +0800 Subject: [PATCH 14/49] extract root cause analyzer --- .../TimeSeries/LocalizeRootCauseByDT.cs | 10 +- .../DTRootCauseAnalyzer.cs | 120 +++++++++++------- .../DTRootCauseLocalization.cs | 16 +-- .../DTRootCauseLocalizationType.cs | 4 + .../TimeSeriesDirectApi.cs | 15 +-- 5 files changed, 91 insertions(+), 74 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index b95d88418d..a2a61b6ea2 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -53,11 +53,12 @@ public static void Example() //Score: 1, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] } - private static List GetPoints() { + private static List GetPoints() + { List points = new List(); Dictionary dic1 = new Dictionary(); - dic1.Add("Country","UK"); + dic1.Add("Country", "UK"); dic1.Add("DeviceType", "Laptop"); dic1.Add("DataCenter", "DC1"); points.Add(new Point(200, 100, true, dic1)); @@ -113,7 +114,8 @@ private static List GetPoints() { return points; } - private static Dictionary GetAnomalyDimension() { + private static Dictionary GetAnomalyDimension() + { Dictionary dim = new Dictionary(); dim.Add("Country", "UK"); dim.Add("DeviceType", AGG_SYMBOL); @@ -124,7 +126,7 @@ private static Dictionary GetAnomalyDimension() { private static DateTime GetTimestamp() { - return new DateTime(); + return new DateTime(2020, 3, 23, 0, 0, 0); } private class RootCauseLocalizationData diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index b7a66582b3..c127b7067f 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -11,7 +11,31 @@ namespace Microsoft.ML.TimeSeries { public class DTRootCauseAnalyzer { - public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + private RootCauseLocalizationInput _src; + private double _beta; + public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) + { + _src = src; + _beta = beta; + } + + public RootCause Analyze() + { + RootCause dst = new RootCause(); + DimensionInfo dimensionInfo = SeperateDimension(_src.AnomalyDimensions, _src.AggSymbol); + if (dimensionInfo.AggDim.Count == 0) + { + dst.Items.Add(new RootCauseItem(_src.AnomalyDimensions)); + } + Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDim); + List totalPoints = GetTotalPointsForAnomalyTimestamp(_src, subDim); + + GetRootCauseList(_src, ref dst, dimensionInfo, totalPoints, subDim); + UpdateRootCauseDirection(totalPoints, ref dst); + return dst; + } + + public List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) { List points = new List(); foreach (MetricSlice slice in src.Slices) @@ -22,15 +46,15 @@ public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizatio } } - List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); + List totalPoints = SelectPoints(points, subDim); return totalPoints; } - public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) + public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) { - PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); - PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); + PointTree pointTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); //which means there is no aggregation in the input anomaly dimension if (anomalyTree.ParentNode == null) @@ -54,23 +78,23 @@ public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCaus double totalEntropy = 1; if (anomalyTree.Leaves.Count > 0) { - totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); + totalEntropy = GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); } if (totalEntropy > 0.9) { - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } else { - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } dst.Items = rootCauses; } } - public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + public DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); foreach (KeyValuePair entry in dimensions) @@ -89,7 +113,7 @@ public static DimensionInfo SeperateDimension(Dictionary dimensi return info; } - protected static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) + protected PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) { PointTree tree = PointTree.CreateDefaultInstance(); @@ -147,7 +171,7 @@ protected static PointTree BuildPointTree(List pointList, List ag return tree; } - private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) + private PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) { if (tree.Leaves.Count == 0) return tree; @@ -204,7 +228,7 @@ private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggT return tree; } - private static bool IsAggDimensionExisted(string key, string value, List points) + private bool IsAggDimensionExisted(string key, string value, List points) { foreach (Point p in points) { @@ -216,7 +240,7 @@ private static bool IsAggDimensionExisted(string key, string value, List return false; } - private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) + private Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) { Point p = null; @@ -255,7 +279,7 @@ private static Point SimulateBottomUpValue(List leaves, string key, strin return p; } - public static Dictionary GetSubDim(Dictionary dimension, List keyList) + public Dictionary GetSubDim(Dictionary dimension, List keyList) { Dictionary subDim = new Dictionary(); @@ -266,7 +290,7 @@ public static Dictionary GetSubDim(Dictionary di return subDim; } - protected static List SelectPoints(List points, Dictionary subDim) + protected List SelectPoints(List points, Dictionary subDim) { List list = new List(); @@ -285,7 +309,7 @@ protected static List SelectPoints(List points, Dictionary LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) + protected List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { var set = anomalyTree.ChildrenNodes.Keys; @@ -323,7 +347,7 @@ protected static List LocalizeRootCauseByDimension(List to } } - protected static double GetEntropy(int totalNum, int anomalyNum) + protected double GetEntropy(int totalNum, int anomalyNum) { double ratio = (double)anomalyNum / totalNum; if (ratio == 0 || ratio == 1) @@ -334,7 +358,7 @@ protected static double GetEntropy(int totalNum, int anomalyNum) return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); } - protected static Dictionary GetEntropyList(BestDimension best, List points) + protected Dictionary GetEntropyList(BestDimension best, List points) { Dictionary list = new Dictionary(); // need to update, change to children if necessary @@ -351,7 +375,7 @@ protected static Dictionary GetEntropyList(BestDimension best, L return list; } - protected static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) + protected List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) { Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); @@ -392,7 +416,7 @@ protected static List GetTopAnomaly(List anomalyPoints, Point root return null; } - protected static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); @@ -422,7 +446,7 @@ protected static BestDimension SelectBestDimension(List totalPoints, List return best; } - public static BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + public BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); @@ -453,7 +477,7 @@ public static BestDimension SelectBestDimension(Dictionary> return best; } - private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) { BestDimension best = null; foreach (KeyValuePair dimension in entropyGainMap) @@ -486,7 +510,7 @@ private static BestDimension FindBestDimension(Dictionary return best; } - public static Point FindPointByDimension(Dictionary dim, List points) + public Point FindPointByDimension(Dictionary dim, List points) { foreach (Point p in points) { @@ -508,11 +532,11 @@ public static Point FindPointByDimension(Dictionary dim, List points, ref RootCause dst) + public void UpdateRootCauseDirection(List points, ref RootCause dst) { foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + Point rootCausePoint = FindPointByDimension(item.Dimension, points); if (rootCausePoint != null) { if (rootCausePoint.ExpectedValue < rootCausePoint.Value) @@ -528,12 +552,12 @@ public static void UpdateRootCauseDirection(List points, ref RootCause ds } } - public static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) + public void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) { if (dst.Items.Count > 1) { //get surprise value and explanary power value - Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + Point anomalyPoint = FindPointByDimension(anomalyRoot, points); double sumSurprise = 0; double sumEp = 0; @@ -541,7 +565,7 @@ public static void GetRootCauseScore(List points, Dictionary scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -554,40 +578,40 @@ public static void GetRootCauseScore(List points, Dictionary scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); - dst.Items[0].Score = DTRootCauseAnalyzer.GetFinalScore(scores.Item1, scores.Item2, beta); + dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); } } } - private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); + double surprise = 0.5 * (p * Log2(2 * p / (p + q)) + q * Log2(2 * q / (p + q))); return surprise; } - private static double GetFinalScore(double surprise, double ep, double beta) + private double GetFinalScore(double surprise, double ep, double beta) { return Math.Max(1, beta * surprise + (1 - beta) * ep); } - private static Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) + private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) { - double surprise = DTRootCauseAnalyzer.GetSurpriseScore(rootCausePoint, anomalyPoint); + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); return new Tuple(surprise, ep); @@ -599,7 +623,7 @@ private static Dictionary UpdateDimensionValue(Dictionary 2; } - private static bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) + private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) { if (Math.Abs(total) < Math.Abs(parent) * 0.95) { @@ -624,7 +648,7 @@ private static bool ShouldSeperateAnomaly(double total, double parent, int total return size <= totalSize * 0.5; } - private static double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) { int total = GetTotalNumber(pointDis); double entropy = 0; @@ -636,7 +660,7 @@ private static double GetDimensionEntropyGain(Dictionary pointDis, return totalEntropy - entropy; } - private static double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) { double instrinsicValue = 0; @@ -648,7 +672,7 @@ private static double GetDimensionInstrinsicValue(Dictionary pointD return instrinsicValue; } - private static int GetTotalNumber(Dictionary distribution) + private int GetTotalNumber(Dictionary distribution) { int total = 0; foreach (int num in distribution.Values) @@ -658,7 +682,7 @@ private static int GetTotalNumber(Dictionary distribution) return total; } - private static void UpdateDistribution(Dictionary distribution, List points, string dimKey) + private void UpdateDistribution(Dictionary distribution, List points, string dimKey) { foreach (Point point in points) { @@ -671,12 +695,12 @@ private static void UpdateDistribution(Dictionary distribution, Lis } } - public static double Log2(double val) + public double Log2(double val) { return Math.Log(val) / Math.Log(2); } - public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) + public bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { foreach (var item in smallDic) { @@ -693,12 +717,12 @@ public static bool ContainsAll(Dictionary bigDic, Dictionary { }; - - DimensionInfo dimensionInfo = DTRootCauseAnalyzer.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); - if (dimensionInfo.AggDim.Count == 0) - { - return; - } - Dictionary subDim = DTRootCauseAnalyzer.GetSubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); - List totalPoints = DTRootCauseAnalyzer.GetTotalPointsForAnomalyTimestamp(src, subDim); - - DTRootCauseAnalyzer.GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); - DTRootCauseAnalyzer.UpdateRootCauseDirection(totalPoints,ref dst); - DTRootCauseAnalyzer.GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst, _parent._beta); + DTRootCauseAnalyzer analyzer = new DTRootCauseAnalyzer(src, _parent._beta) ; + dst = analyzer.Analyze(); } } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 1e5e3f881e..4ecd41f17b 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -85,6 +85,10 @@ public override void Register() public sealed class RootCause { public List Items { get; set; } + public RootCause() + { + Items = new List(); + } } public sealed class RootCauseLocalizationInput diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 3329a35b27..0d82a3ab51 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -581,19 +581,19 @@ public void RootCauseLocalizationWithDT() var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); - Assert.Equal(1,(int)transformedRootCause.RootCause.Items.Count); + Assert.Equal(1, (int)transformedRootCause.RootCause.Items.Count); Dictionary expectedDim = new Dictionary(); - expectedDim.Add("Country","UK"); - expectedDim.Add("DeviceType",_aggSymbol); - expectedDim.Add("DataCenter","DC1"); + expectedDim.Add("Country", "UK"); + expectedDim.Add("DeviceType", _aggSymbol); + expectedDim.Add("DataCenter", "DC1"); - foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) { + foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) + { Assert.Equal(expectedDim[pair.Key], pair.Value); } } - private static List GetRootCauseLocalizationPoints() { List points = new List(); @@ -667,8 +667,7 @@ private static Dictionary GetAnomalyDimension() private static DateTime GetCurrentTimestamp() { - return new DateTime(); + return new DateTime(2020, 3, 23, 0, 0, 0); } - } } From b07ad2803bf117e012614260212cd22c0d60652d Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 24 Mar 2020 07:59:31 +0800 Subject: [PATCH 15/49] refine code --- .../DTRootCauseAnalyzer.cs | 160 +++--------------- .../DTRootCauseLocalizationType.cs | 119 +++++++++++++ 2 files changed, 139 insertions(+), 140 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index c127b7067f..1a5c66b828 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -11,6 +11,8 @@ namespace Microsoft.ML.TimeSeries { public class DTRootCauseAnalyzer { + private static double _anomalyRatioThreshold = 0.5; + private RootCauseLocalizationInput _src; private double _beta; public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) @@ -23,15 +25,18 @@ public RootCause Analyze() { RootCause dst = new RootCause(); DimensionInfo dimensionInfo = SeperateDimension(_src.AnomalyDimensions, _src.AggSymbol); + //no aggregation dimension if (dimensionInfo.AggDim.Count == 0) { - dst.Items.Add(new RootCauseItem(_src.AnomalyDimensions)); + return dst; } Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDim); List totalPoints = GetTotalPointsForAnomalyTimestamp(_src, subDim); GetRootCauseList(_src, ref dst, dimensionInfo, totalPoints, subDim); UpdateRootCauseDirection(totalPoints, ref dst); + GetRootCauseScore(totalPoints, _src.AnomalyDimensions, ref dst, _beta); + return dst; } @@ -56,7 +61,7 @@ public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, PointTree pointTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); PointTree anomalyTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); - //which means there is no aggregation in the input anomaly dimension + //which means there is no anomaly point with the anomaly dimension if (anomalyTree.ParentNode == null) { return; @@ -81,15 +86,7 @@ public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, totalEntropy = GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); } - if (totalEntropy > 0.9) - { - rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); - } - else - { - rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); - } - + rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); dst.Items = rootCauses; } } @@ -171,6 +168,7 @@ protected PointTree BuildPointTree(List pointList, List aggDims, return tree; } + private PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) { @@ -283,7 +281,7 @@ public Dictionary GetSubDim(Dictionary dimension { Dictionary subDim = new Dictionary(); - foreach (String dim in keyList) + foreach (string dim in keyList) { subDim.Add(dim, dimension[dim]); } @@ -434,7 +432,7 @@ protected BestDimension SelectBestDimension(List totalPoints, List dimension.Entropy = totalEntropy - gain; entroyGainMap.Add(dimension, gain); - double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis); entroyGainRatioMap.Add(dimension, gainRatio); sumGain += gain; @@ -464,7 +462,7 @@ public BestDimension SelectBestDimension(Dictionary> pointCh dimension.Entropy = totalEntropy - gain; entroyGainMap.Add(dimension, gain); - double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis, totalEntropy); + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis); entroyGainRatioMap.Add(dimension, gainRatio); sumGain += gain; @@ -645,7 +643,7 @@ private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, i return true; } - return size <= totalSize * 0.5; + return size <= totalSize * _anomalyRatioThreshold; } private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) @@ -660,7 +658,7 @@ private double GetDimensionEntropyGain(Dictionary pointDis, Diction return totalEntropy - entropy; } - private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis) { double instrinsicValue = 0; @@ -772,58 +770,6 @@ public static PointTree CreateDefaultInstance() } } - public sealed class Point : IEquatable - { - public double Value { get; set; } - public double ExpectedValue { get; set; } - public bool IsAnomaly { get; set; } - public Dictionary Dimensions { get; set; } - - public double Delta { get; set; } - - public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimensions) - { - Value = value; - ExpectedValue = expectedValue; - IsAnomaly = isAnomaly; - Dimensions = dimensions; - Delta = (value - expectedValue) / expectedValue; - if (expectedValue == 0) - { - Delta = 0; - } - } - - public bool Equals(Point other) - { - foreach (KeyValuePair item in Dimensions) - { - if (!other.Dimensions[item.Key].Equals(item.Value)) - { - return false; - } - } - return true; - } - - public override int GetHashCode() - { - return Dimensions.GetHashCode(); - } - } - - public sealed class MetricSlice - { - public DateTime TimeStamp { get; set; } - public List Points { get; set; } - - public MetricSlice(DateTime timeStamp, List points) - { - TimeStamp = timeStamp; - Points = points; - } - } - public sealed class BestDimension { public string DimensionKey; @@ -841,59 +787,13 @@ public static BestDimension CreateDefaultInstance() } } - public sealed class AnomalyCause - { - public string DimensionKey; - public List Anomalies; - - public AnomalyCause() { } - } - - public sealed class RootCauseItem : IEquatable - { - public double Score; - public string Path; - public Dictionary Dimension; - public AnomalyDirection Direction; - - public RootCauseItem(Dictionary rootCause) - { - Dimension = rootCause; - } - - public RootCauseItem(Dictionary rootCause, string path) - { - Dimension = rootCause; - Path = path; - } - public bool Equals(RootCauseItem other) - { - if (Dimension.Count == other.Dimension.Count) - { - foreach (KeyValuePair item in Dimension) - { - if (!other.Dimension[item.Key].Equals(item.Value)) - { - return false; - } - } - return true; - } - return false; - } - } + //public sealed class AnomalyCause + //{ + // public string DimensionKey; + // public List Anomalies; - public enum AnomalyDirection - { - /// - /// the value is larger than expected value. - /// - Up = 0, - /// - /// the value is lower than expected value. - /// - Down = 1 - } + // public AnomalyCause() { } + //} public class RootCauseScore { @@ -906,24 +806,4 @@ public RootCauseScore(double surprise, double explainaryScore) ExplainaryScore = explainaryScore; } } - - public enum AggregateType - { - /// - /// Make the aggregate type as sum. - /// - Sum = 0, - /// - /// Make the aggregate type as average. - /// - Avg = 1, - /// - /// Make the aggregate type as min. - /// - Min = 2, - /// - /// Make the aggregate type as max. - /// - Max = 3 - } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 4ecd41f17b..49d8252d05 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -169,4 +169,123 @@ public override string ToString() return typeof(RootCauseLocalizationInputDataViewType).Name; } } + + public enum AggregateType + { + /// + /// Make the aggregate type as sum. + /// + Sum = 0, + /// + /// Make the aggregate type as average. + /// + Avg = 1, + /// + /// Make the aggregate type as min. + /// + Min = 2, + /// + /// Make the aggregate type as max. + /// + Max = 3 + } + + public enum AnomalyDirection + { + /// + /// the value is larger than expected value. + /// + Up = 0, + /// + /// the value is lower than expected value. + /// + Down = 1 + } + + public sealed class RootCauseItem : IEquatable + { + public double Score; + public string Path; + public Dictionary Dimension; + public AnomalyDirection Direction; + + public RootCauseItem(Dictionary rootCause) + { + Dimension = rootCause; + } + + public RootCauseItem(Dictionary rootCause, string path) + { + Dimension = rootCause; + Path = path; + } + public bool Equals(RootCauseItem other) + { + if (Dimension.Count == other.Dimension.Count) + { + foreach (KeyValuePair item in Dimension) + { + if (!other.Dimension[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + return false; + } + } + + public sealed class MetricSlice + { + public DateTime TimeStamp { get; set; } + public List Points { get; set; } + + public MetricSlice(DateTime timeStamp, List points) + { + TimeStamp = timeStamp; + Points = points; + } + } + + public sealed class Point : IEquatable + { + public double Value { get; set; } + public double ExpectedValue { get; set; } + public bool IsAnomaly { get; set; } + public Dictionary Dimensions { get; set; } + + public double Delta { get; set; } + + public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimensions) + { + Value = value; + ExpectedValue = expectedValue; + IsAnomaly = isAnomaly; + Dimensions = dimensions; + Delta = (value - expectedValue) / expectedValue; + if (expectedValue == 0) + { + Delta = 0; + } + } + + public bool Equals(Point other) + { + foreach (KeyValuePair item in Dimensions) + { + if (!other.Dimensions[item.Key].Equals(item.Value)) + { + return false; + } + } + return true; + } + + public override int GetHashCode() + { + return Dimensions.GetHashCode(); + } + } + } From c729877560a63593b6113f34dbc126ade5096b8c Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 24 Mar 2020 08:25:50 +0800 Subject: [PATCH 16/49] merge with master --- .../TimeSeries/LocalizeRootCauseByDT.cs | 2 +- .../DTRootCauseAnalyzer.cs | 120 +++++++++++------- .../DTRootCauseLocalization.cs | 16 +-- .../DTRootCauseLocalizationType.cs | 4 + .../TimeSeriesDirectApi.cs | 87 ++++++++++++- 5 files changed, 161 insertions(+), 68 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 721fa2d86e..067afdbf53 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -126,7 +126,7 @@ private static Dictionary GetAnomalyDimension() private static DateTime GetTimestamp() { - return new DateTime(); + return new DateTime(2020, 3, 23, 0, 0, 0); } private class RootCauseLocalizationData diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index b7a66582b3..c127b7067f 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -11,7 +11,31 @@ namespace Microsoft.ML.TimeSeries { public class DTRootCauseAnalyzer { - public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + private RootCauseLocalizationInput _src; + private double _beta; + public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) + { + _src = src; + _beta = beta; + } + + public RootCause Analyze() + { + RootCause dst = new RootCause(); + DimensionInfo dimensionInfo = SeperateDimension(_src.AnomalyDimensions, _src.AggSymbol); + if (dimensionInfo.AggDim.Count == 0) + { + dst.Items.Add(new RootCauseItem(_src.AnomalyDimensions)); + } + Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDim); + List totalPoints = GetTotalPointsForAnomalyTimestamp(_src, subDim); + + GetRootCauseList(_src, ref dst, dimensionInfo, totalPoints, subDim); + UpdateRootCauseDirection(totalPoints, ref dst); + return dst; + } + + public List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) { List points = new List(); foreach (MetricSlice slice in src.Slices) @@ -22,15 +46,15 @@ public static List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizatio } } - List totalPoints = DTRootCauseAnalyzer.SelectPoints(points, subDim); + List totalPoints = SelectPoints(points, subDim); return totalPoints; } - public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) + public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim) { - PointTree pointTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); - PointTree anomalyTree = DTRootCauseAnalyzer.BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); + PointTree pointTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType); + PointTree anomalyTree = BuildPointTree(totalPoints, dimensionInfo.AggDim, subDim, src.AggSymbol, src.AggType, true); //which means there is no aggregation in the input anomaly dimension if (anomalyTree.ParentNode == null) @@ -54,23 +78,23 @@ public static void GetRootCauseList(RootCauseLocalizationInput src, ref RootCaus double totalEntropy = 1; if (anomalyTree.Leaves.Count > 0) { - totalEntropy = DTRootCauseAnalyzer.GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); + totalEntropy = GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); } if (totalEntropy > 0.9) { - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } else { - rootCauses.AddRange(DTRootCauseAnalyzer.LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); + rootCauses.AddRange(LocalizeRootCauseByDimension(pointTree.Leaves, anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions)); } dst.Items = rootCauses; } } - public static DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + public DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); foreach (KeyValuePair entry in dimensions) @@ -89,7 +113,7 @@ public static DimensionInfo SeperateDimension(Dictionary dimensi return info; } - protected static PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) + protected PointTree BuildPointTree(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType, bool filterByAnomaly = false) { PointTree tree = PointTree.CreateDefaultInstance(); @@ -147,7 +171,7 @@ protected static PointTree BuildPointTree(List pointList, List ag return tree; } - private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) + private PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggType, string aggSymbol, List aggDims) { if (tree.Leaves.Count == 0) return tree; @@ -204,7 +228,7 @@ private static PointTree CompleteTreeBottomUp(PointTree tree, AggregateType aggT return tree; } - private static bool IsAggDimensionExisted(string key, string value, List points) + private bool IsAggDimensionExisted(string key, string value, List points) { foreach (Point p in points) { @@ -216,7 +240,7 @@ private static bool IsAggDimensionExisted(string key, string value, List return false; } - private static Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) + private Point SimulateBottomUpValue(List leaves, string key, string keyValue, AggregateType type, string aggSymbol) { Point p = null; @@ -255,7 +279,7 @@ private static Point SimulateBottomUpValue(List leaves, string key, strin return p; } - public static Dictionary GetSubDim(Dictionary dimension, List keyList) + public Dictionary GetSubDim(Dictionary dimension, List keyList) { Dictionary subDim = new Dictionary(); @@ -266,7 +290,7 @@ public static Dictionary GetSubDim(Dictionary di return subDim; } - protected static List SelectPoints(List points, Dictionary subDim) + protected List SelectPoints(List points, Dictionary subDim) { List list = new List(); @@ -285,7 +309,7 @@ protected static List SelectPoints(List points, Dictionary LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) + protected List LocalizeRootCauseByDimension(List totalPoints, PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension) { var set = anomalyTree.ChildrenNodes.Keys; @@ -323,7 +347,7 @@ protected static List LocalizeRootCauseByDimension(List to } } - protected static double GetEntropy(int totalNum, int anomalyNum) + protected double GetEntropy(int totalNum, int anomalyNum) { double ratio = (double)anomalyNum / totalNum; if (ratio == 0 || ratio == 1) @@ -334,7 +358,7 @@ protected static double GetEntropy(int totalNum, int anomalyNum) return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); } - protected static Dictionary GetEntropyList(BestDimension best, List points) + protected Dictionary GetEntropyList(BestDimension best, List points) { Dictionary list = new Dictionary(); // need to update, change to children if necessary @@ -351,7 +375,7 @@ protected static Dictionary GetEntropyList(BestDimension best, L return list; } - protected static List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) + protected List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) { Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); @@ -392,7 +416,7 @@ protected static List GetTopAnomaly(List anomalyPoints, Point root return null; } - protected static BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); @@ -422,7 +446,7 @@ protected static BestDimension SelectBestDimension(List totalPoints, List return best; } - public static BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + public BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); @@ -453,7 +477,7 @@ public static BestDimension SelectBestDimension(Dictionary> return best; } - private static BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) { BestDimension best = null; foreach (KeyValuePair dimension in entropyGainMap) @@ -486,7 +510,7 @@ private static BestDimension FindBestDimension(Dictionary return best; } - public static Point FindPointByDimension(Dictionary dim, List points) + public Point FindPointByDimension(Dictionary dim, List points) { foreach (Point p in points) { @@ -508,11 +532,11 @@ public static Point FindPointByDimension(Dictionary dim, List points, ref RootCause dst) + public void UpdateRootCauseDirection(List points, ref RootCause dst) { foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = DTRootCauseAnalyzer.FindPointByDimension(item.Dimension, points); + Point rootCausePoint = FindPointByDimension(item.Dimension, points); if (rootCausePoint != null) { if (rootCausePoint.ExpectedValue < rootCausePoint.Value) @@ -528,12 +552,12 @@ public static void UpdateRootCauseDirection(List points, ref RootCause ds } } - public static void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) + public void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) { if (dst.Items.Count > 1) { //get surprise value and explanary power value - Point anomalyPoint = DTRootCauseAnalyzer.FindPointByDimension(anomalyRoot, points); + Point anomalyPoint = FindPointByDimension(anomalyRoot, points); double sumSurprise = 0; double sumEp = 0; @@ -541,7 +565,7 @@ public static void GetRootCauseScore(List points, Dictionary scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -554,40 +578,40 @@ public static void GetRootCauseScore(List points, Dictionary scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); - dst.Items[0].Score = DTRootCauseAnalyzer.GetFinalScore(scores.Item1, scores.Item2, beta); + dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); } } } - private static double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) + private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * DTRootCauseAnalyzer.Log2(2 * p / (p + q)) + q * DTRootCauseAnalyzer.Log2(2 * q / (p + q))); + double surprise = 0.5 * (p * Log2(2 * p / (p + q)) + q * Log2(2 * q / (p + q))); return surprise; } - private static double GetFinalScore(double surprise, double ep, double beta) + private double GetFinalScore(double surprise, double ep, double beta) { return Math.Max(1, beta * surprise + (1 - beta) * ep); } - private static Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) + private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) { - double surprise = DTRootCauseAnalyzer.GetSurpriseScore(rootCausePoint, anomalyPoint); + double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); return new Tuple(surprise, ep); @@ -599,7 +623,7 @@ private static Dictionary UpdateDimensionValue(Dictionary 2; } - private static bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) + private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) { if (Math.Abs(total) < Math.Abs(parent) * 0.95) { @@ -624,7 +648,7 @@ private static bool ShouldSeperateAnomaly(double total, double parent, int total return size <= totalSize * 0.5; } - private static double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) { int total = GetTotalNumber(pointDis); double entropy = 0; @@ -636,7 +660,7 @@ private static double GetDimensionEntropyGain(Dictionary pointDis, return totalEntropy - entropy; } - private static double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) { double instrinsicValue = 0; @@ -648,7 +672,7 @@ private static double GetDimensionInstrinsicValue(Dictionary pointD return instrinsicValue; } - private static int GetTotalNumber(Dictionary distribution) + private int GetTotalNumber(Dictionary distribution) { int total = 0; foreach (int num in distribution.Values) @@ -658,7 +682,7 @@ private static int GetTotalNumber(Dictionary distribution) return total; } - private static void UpdateDistribution(Dictionary distribution, List points, string dimKey) + private void UpdateDistribution(Dictionary distribution, List points, string dimKey) { foreach (Point point in points) { @@ -671,12 +695,12 @@ private static void UpdateDistribution(Dictionary distribution, Lis } } - public static double Log2(double val) + public double Log2(double val) { return Math.Log(val) / Math.Log(2); } - public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) + public bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { foreach (var item in smallDic) { @@ -693,12 +717,12 @@ public static bool ContainsAll(Dictionary bigDic, Dictionary { }; - - DimensionInfo dimensionInfo = DTRootCauseAnalyzer.SeperateDimension(src.AnomalyDimensions, src.AggSymbol); - if (dimensionInfo.AggDim.Count == 0) - { - return; - } - Dictionary subDim = DTRootCauseAnalyzer.GetSubDim(src.AnomalyDimensions, dimensionInfo.DetailDim); - List totalPoints = DTRootCauseAnalyzer.GetTotalPointsForAnomalyTimestamp(src, subDim); - - DTRootCauseAnalyzer.GetRootCauseList(src, ref dst, dimensionInfo, totalPoints, subDim); - DTRootCauseAnalyzer.UpdateRootCauseDirection(totalPoints,ref dst); - DTRootCauseAnalyzer.GetRootCauseScore(totalPoints, src.AnomalyDimensions, ref dst, _parent._beta); + DTRootCauseAnalyzer analyzer = new DTRootCauseAnalyzer(src, _parent._beta) ; + dst = analyzer.Analyze(); } } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 1e5e3f881e..4ecd41f17b 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -85,6 +85,10 @@ public override void Register() public sealed class RootCause { public List Items { get; set; } + public RootCause() + { + Items = new List(); + } } public sealed class RootCauseLocalizationInput diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 3329a35b27..fd254f4937 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -581,18 +581,95 @@ public void RootCauseLocalizationWithDT() var transformedRootCause = engine.Predict(newRootCauseInput); Assert.NotNull(transformedRootCause); - Assert.Equal(1,(int)transformedRootCause.RootCause.Items.Count); + Assert.Equal(1, (int)transformedRootCause.RootCause.Items.Count); Dictionary expectedDim = new Dictionary(); - expectedDim.Add("Country","UK"); - expectedDim.Add("DeviceType",_aggSymbol); - expectedDim.Add("DataCenter","DC1"); + expectedDim.Add("Country", "UK"); + expectedDim.Add("DeviceType", _aggSymbol); + expectedDim.Add("DataCenter", "DC1"); - foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) { + foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) + { Assert.Equal(expectedDim[pair.Key], pair.Value); } } + private static List GetRootCauseLocalizationPoints() + { + List points = new List(); + + Dictionary dic1 = new Dictionary(); + dic1.Add("Country", "UK"); + dic1.Add("DeviceType", "Laptop"); + dic1.Add("DataCenter", "DC1"); + points.Add(new Point(200, 100, true, dic1)); + + Dictionary dic2 = new Dictionary(); + dic2.Add("Country", "UK"); + dic2.Add("DeviceType", "Mobile"); + dic2.Add("DataCenter", "DC1"); + points.Add(new Point(1000, 100, true, dic2)); + + Dictionary dic3 = new Dictionary(); + dic3.Add("Country", "UK"); + dic3.Add("DeviceType", _aggSymbol); + dic3.Add("DataCenter", "DC1"); + points.Add(new Point(1200, 200, true, dic3)); + + Dictionary dic4 = new Dictionary(); + dic4.Add("Country", "UK"); + dic4.Add("DeviceType", "Laptop"); + dic4.Add("DataCenter", "DC2"); + points.Add(new Point(100, 100, false, dic4)); + + Dictionary dic5 = new Dictionary(); + dic5.Add("Country", "UK"); + dic5.Add("DeviceType", "Mobile"); + dic5.Add("DataCenter", "DC2"); + points.Add(new Point(200, 200, false, dic5)); + + Dictionary dic6 = new Dictionary(); + dic6.Add("Country", "UK"); + dic6.Add("DeviceType", _aggSymbol); + dic6.Add("DataCenter", "DC2"); + points.Add(new Point(300, 300, false, dic6)); + + Dictionary dic7 = new Dictionary(); + dic7.Add("Country", "UK"); + dic7.Add("DeviceType", _aggSymbol); + dic7.Add("DataCenter", _aggSymbol); + points.Add(new Point(1500, 500, true, dic7)); + + Dictionary dic8 = new Dictionary(); + dic8.Add("Country", "UK"); + dic8.Add("DeviceType", "Laptop"); + dic8.Add("DataCenter", _aggSymbol); + points.Add(new Point(300, 200, true, dic8)); + + Dictionary dic9 = new Dictionary(); + dic9.Add("Country", "UK"); + dic9.Add("DeviceType", "Mobile"); + dic9.Add("DataCenter", _aggSymbol); + points.Add(new Point(1200, 300, true, dic9)); + + return points; + } + + private static Dictionary GetAnomalyDimension() + { + Dictionary dim = new Dictionary(); + dim.Add("Country", "UK"); + dim.Add("DeviceType", _aggSymbol); + dim.Add("DataCenter", _aggSymbol); + + return dim; + } + + private static DateTime GetCurrentTimestamp() + { + return new DateTime(2020, 3, 23, 0, 0, 0); + } + private static List GetRootCauseLocalizationPoints() { From 0d43b0d242721a1a5b00fb65d804f653af961600 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 26 Mar 2020 16:48:57 +0800 Subject: [PATCH 17/49] add evaluatin --- .../TimeSeries/LocalizeRootCauseEvaluation.cs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs index 47163a49dc..7a664a4ab5 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs @@ -1,6 +1,7 @@ using System; using System.Collections.Generic; using System.Data; +using System.Globalization; using Microsoft.ML; using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; @@ -23,7 +24,7 @@ public static void Example() int totalFn = 0; int totalCount = 0; - bool exactly = false; + bool exactly = false; int totalRunTime = 0; @@ -31,7 +32,7 @@ public static void Example() { DateTime timeStamp = item.Key; - DateTime filterTime = DateTime.ParseExact("2019-11-13 13:00:00,000", "yyyy-MM-dd HH:mm:ss,fff", + DateTime filterTime = DateTime.ParseExact("2019-11-07 11:30:00,000", "yyyy-MM-dd HH:mm:ss,fff", System.Globalization.CultureInfo.InvariantCulture); //if (timeStamp.CompareTo(filterTime).Equals(0)) @@ -85,7 +86,8 @@ private static Tuple EvaluateRootCauseResult(List EvaluateRootCauseResult(List> nCause = GetFNegtiveCause(labeledRootCause, labelSet); if (nCause.Count > 0) { - Console.WriteLine(String.Format("FN : timestamp - {0}, labeled root cause", timeStamp)); + int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); + Console.WriteLine(String.Format("FN : timestamp - {0} - {1} labeled root cause", timeStamp.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'", DateTimeFormatInfo.InvariantInfo), seconds)); foreach (Dictionary cause in nCause) { Console.WriteLine(string.Join(Environment.NewLine, cause)); @@ -154,7 +157,7 @@ private static string GetDicHashCode(Dictionary dic) private static int CompareCause(Dictionary detect, Dictionary label) { - if (detect.Equals(label)) + if (DTRootCauseAnalyzer.ContainsAll(detect, label) && DTRootCauseAnalyzer.ContainsAll(label, detect)) { return 0; } From 5778eed8ea12b8cfff4c053ac630bea065ded3c3 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 26 Mar 2020 21:55:14 +0800 Subject: [PATCH 18/49] some refine for code --- .../time-series-root-cause-localization-dt.md | 47 ++ .../TimeSeries/LocalizeRootCauseEvaluation.cs | 412 ------------------ .../Microsoft.ML.Samples.csproj | 3 +- docs/samples/Microsoft.ML.Samples/Program.cs | 16 +- .../DTRootCauseAnalyzer.cs | 11 +- .../DTRootCauseLocalization.cs | 15 +- .../DTRootCauseLocalizationType.cs | 5 - .../ExtensionsCatalog.cs | 2 +- .../TimeSeriesDirectApi.cs | 55 +-- 9 files changed, 95 insertions(+), 471 deletions(-) create mode 100644 docs/api-reference/time-series-root-cause-localization-dt.md delete mode 100644 docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs diff --git a/docs/api-reference/time-series-root-cause-localization-dt.md b/docs/api-reference/time-series-root-cause-localization-dt.md new file mode 100644 index 0000000000..16994b8c48 --- /dev/null +++ b/docs/api-reference/time-series-root-cause-localization-dt.md @@ -0,0 +1,47 @@ +At Mircosoft, we develop a decision tree based root cause localization method which helps to find out the root causes for an anomaly incident incrementally. + +## Multi-Dimensional Root Cause Localization +It's a common case that one measure are collected with many dimensions (*e.g.*, Province, ISP) whose values are categorical(*e.g.*, Beijing or Shanghai for dimension Province). When a measure's value deviates from its expected value, this measure encounters anomalies. In such case, operators would like to localize the root cause dimension combinations rapidly and accurately. Multi-dimensional root cause localization is critical to troubleshoot and mitigate such case. + +## Algorithm + +The decision based root cause localization method is unsupervised, which means training step is no needed. It consists of the following major steps: +(1) Find best dimension which divides the anomaly and unanomaly data based on decision tree according to entropy gain and entropy gain ratio. +(2) Find the top anomaly points for the selected best dimension. + +### Decision Tree + +[Decision tree](https://en.wikipedia.org/wiki/Decision_tree) algorithm chooses the highest information gain to split or construct a decision tree.  We use it to choose the dimension which contributes the most to the anomaly. Following are some concepts used in decision tree. + +#### Information Entropy + +Information [entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) is a measure of disorder or uncertainty. You can think of it as a measure of purity as well.The less the value , the more pure of data D. + +$$Ent(D) = - \sum_{k=1}^{|y|} p_k\log_2(p_k) $$ + +where $p_k$ represents the probability of an element in dataset. In our case, there are only two classed, the anomaly points and the normaly points. $|y|$ is the count of total anomalies. + +#### Information Gain +[Information gain](https://en.wikipedia.org/wiki/Information_gain_in_decision_trees) is a metric to measure the reduction of this disorder in our target class given additional information about it. Mathematically it can be written as: + +$$Gain(D, a) = Ent(D) - \sum_{v=1}^{|V|} \frac{|D^V|}{|D |} Ent(D^v) $$ + +Where $Ent(D^v)$ is the entropy of set points in D for which dimension $a$ is equal to $v$, $|D|$ is the total number of points in dataset $D$. $|D^V|$ is the total number of points in dataset $D$ for which dimension $a$ is equal to $v$. + +For all aggregated dimensions, we calculate the information for each dimension. The greater the reduction in this uncertainty, the more information is gained about D from dimension $a$. + +#### Entropy Gain Ratio + +Information gain is biased toward variables with large number of distinct values. A modification is [information gain ratio](https://en.wikipedia.org/wiki/Information_gain_ratio), which reduces its bias. + +$$Ratio(D, a) = \frac{Gain(D,a)} {IV(a)} $$ + +where intrinsic value(IV) is the entropy of split (with respect to dimension $a$ on focus). + +$$IV(a) = -\sum_{v=1}^V\frac{|D^v|} {|D|} \log_2 \frac{|D^v|} {|D|} $$ + +In out strategy, firstly, for all the aggration dimensions, we loop all the dimensions to find the dimension who's entropy gain is above mean entropy gain ration, then from the filtered dimensions, we select the dimension with highest entropy ratio as the best dimension. In the meanwhile, dimensions for which the anomaly value count is only one, we include it when calculation. + +> [!Note] +> 1. As our algorithm depends on the data you input, so if the input points is incorrect or incomplete, the calculated result will be unexpected. +> 2. Currently, the algorithm localize the root cause incrementally, which means at most one dimension with the values are detected. If you want to find out all the dimension that contributes to the anomaly, you can call this API recursively. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs deleted file mode 100644 index 7a664a4ab5..0000000000 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseEvaluation.cs +++ /dev/null @@ -1,412 +0,0 @@ -using System; -using System.Collections.Generic; -using System.Data; -using System.Globalization; -using Microsoft.ML; -using Microsoft.ML.TimeSeries; -using Microsoft.ML.Transforms.TimeSeries; - -using Microsoft.VisualBasic.FileIO; - -namespace Samples.Dynamic.Transforms.TimeSeries -{ - public static class LocalizeRootCauseEvaluation - { - public static void Example() - { - Dictionary> rootNodeMap = GetAnomalyRootMap(); - Dictionary>> labeledRootCauseMap = GetLabeledRootCauseMap(); - - string aggSymbol = "##EMPTY##awqegp##"; - - int totalTp = 0; - int totalFp = 0; - int totalFn = 0; - int totalCount = 0; - - bool exactly = false; - - int totalRunTime = 0; - - foreach (KeyValuePair> item in rootNodeMap) - { - DateTime timeStamp = item.Key; - - DateTime filterTime = DateTime.ParseExact("2019-11-07 11:30:00,000", "yyyy-MM-dd HH:mm:ss,fff", - System.Globalization.CultureInfo.InvariantCulture); - - //if (timeStamp.CompareTo(filterTime).Equals(0)) - { - int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); - string path = String.Format("D:/rootcause/Dataset_yaniv/raw_data_201908_202002/{0}.csv", seconds); - List points = GetPoints(path); - List slices = new List(); - slices.Add(new MetricSlice(timeStamp, points)); - - PredictionEngine engine = GetRootCausePredictionEngine(); - - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, rootNodeMap[timeStamp], new List() { new MetricSlice(timeStamp, points) }, AggregateType.Sum, aggSymbol); - - List list = new List(); - int startTime = System.Environment.TickCount; - GetRootCause(list, newRootCauseInput, engine); - int endTime = System.Environment.TickCount; - int runTime = endTime - startTime; - totalRunTime += runTime; - - List> labeledRootCause = labeledRootCauseMap[timeStamp]; - List> detectedRootCause = ConvertRootCauseItemToDic(list); - RemoveAggSymbol(detectedRootCause, aggSymbol); - - Tuple evaluation = EvaluateRootCauseResult(detectedRootCause, labeledRootCause, exactly, timeStamp); - totalTp += evaluation.Item1; - totalFp += evaluation.Item2; - totalFn += evaluation.Item3; - totalCount++; - } - } - - double precision = (double)totalTp / (totalTp + totalFp); - double recall = (double)totalTp / (totalTp + totalFn); - double f1 = 2 * precision * recall / (precision + recall); - Console.WriteLine(String.Format("Total Count : {0}, TP: {1}, FP: {2}, FN: {3}", totalCount, totalTp, totalFp, totalFn)); - Console.WriteLine(String.Format("Precision : {0}, Recall: {1}, F1: {2}", precision, recall, f1)); - Console.WriteLine(String.Format("Mean calculation time is : {0} ms", (double)totalRunTime / totalCount)); - } - - private static Tuple EvaluateRootCauseResult(List> detectedRootCause, List> labeledRootCause, bool exactly, DateTime timeStamp) - { - int tp = 0; - int fp = 0; - int fn; - List labelSet = new List(); - foreach (Dictionary cause in detectedRootCause) - { - string tpCause = FindTruePositive(cause, labeledRootCause, exactly); - if (tpCause == null) - { - fp++; - int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); - Console.WriteLine(String.Format("FP : timestamp - {0}, - {1} detected root cause ", timeStamp.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'", DateTimeFormatInfo.InvariantInfo), seconds)); - Console.WriteLine(string.Join(Environment.NewLine, cause)); - Console.WriteLine(" "); - } - else - { - tp++; - labelSet.Add(tpCause); - } - } - - fn = labeledRootCause.Count - labelSet.Count; - if (fn != 0) - { - List> nCause = GetFNegtiveCause(labeledRootCause, labelSet); - if (nCause.Count > 0) - { - int seconds = Convert.ToInt32(timeStamp.Subtract(new DateTime(1970, 1, 1, 0, 0, 0, 0)).TotalSeconds); - Console.WriteLine(String.Format("FN : timestamp - {0} - {1} labeled root cause", timeStamp.ToString("yyyy'-'MM'-'dd'T'HH':'mm':'ss'Z'", DateTimeFormatInfo.InvariantInfo), seconds)); - foreach (Dictionary cause in nCause) - { - Console.WriteLine(string.Join(Environment.NewLine, cause)); - Console.WriteLine("---------------------"); - } - - } - } - - return new Tuple(tp, fp, fn); - } - - private static List> GetFNegtiveCause(List> labelCauses, List labelSet) - { - List> causeList = new List>(); - foreach (Dictionary cause in labelCauses) - { - if (!labelSet.Contains(GetDicHashCode(cause))) - { - causeList.Add(cause); - } - } - return causeList; - } - - private static string FindTruePositive(Dictionary cause, List> labelCauses, bool exactly) - { - foreach (Dictionary label in labelCauses) - { - string id = GetDicHashCode(label); - int compare = CompareCause(cause, label); - if (compare == 0) - { - return id; - } - else if (!exactly && (compare == 1 || compare == 2)) - { - return id; - } - } - return null; - } - - - private static string GetDicHashCode(Dictionary dic) - { - return dic.GetHashCode().ToString(); - } - - private static int CompareCause(Dictionary detect, Dictionary label) - { - if (DTRootCauseAnalyzer.ContainsAll(detect, label) && DTRootCauseAnalyzer.ContainsAll(label, detect)) - { - return 0; - } - else if (DTRootCauseAnalyzer.ContainsAll(detect, label)) - { - return 1; - } - else if (DTRootCauseAnalyzer.ContainsAll(label, detect)) - { - return 2; - } - return 3; - } - private static List> ConvertRootCauseItemToDic(List items) - { - List> list = new List>(); - foreach (RootCauseItem item in items) - { - list.Add(item.Dimension); - } - return list; - } - - private static void RemoveAggSymbol(List> dimensions, string aggSymbol) - { - foreach (Dictionary dim in dimensions) - { - foreach (string key in dim.Keys) - { - if (dim[key].Equals(aggSymbol)) - { - dim.Remove(key); - } - } - } - } - - private static PredictionEngine GetRootCausePredictionEngine() - { - //// Create an root cause localizatiom input list from csv. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "SUM") }; - - - var ml = new MLContext(1); - // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. - var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); - - // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit the model. - var model = pipeline.Fit(data); - - // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. - var transformedData = model.Transform(data); - - // Load input list in DataView back to Enumerable. - var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); - - var engine = ml.Model.CreatePredictionEngine(model); - return engine; - } - - private static string _ocsDataCenter = "OCSDatacenter"; - private static string _appType = "AppType"; - private static string _releaseAudienceGroup = "Release_AudienceGroup"; - private static string _wacDatacenter = "WACDatacenter"; - private static string _requestType = "RequestType"; - private static string _statusCode = "StatusCode"; - - private static List _dimensionKeys = new List() { _ocsDataCenter, _appType, _releaseAudienceGroup, _wacDatacenter, _statusCode, _requestType }; - - private static Dictionary> GetAnomalyRootMap() - { - var anomalyRootData = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/anomaly_root.csv"); - - Dictionary> rootNodeMap = new Dictionary>(); - foreach (DataRow row in anomalyRootData.Rows) - { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - rootNodeMap.Add(t, dimension); - } - return rootNodeMap; - } - - private static Dictionary>> GetLabeledRootCauseMap() - { - var labeldRootCause = GetDataTabletFromCSVFile("D:/rootcause/Dataset_yaniv/root_cause_201908_202002/labeled_root_cause.csv"); - - Dictionary>> map = new Dictionary>>(); - foreach (DataRow row in labeldRootCause.Rows) - { - // load the data, build the labled result, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - DateTime t = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - if (map.ContainsKey(t)) - { - map[t].Add(dimension); - } - else - { - map.Add(t, new List>() { dimension }); - } - } - return map; - } - - private static List GetPoints(string path) - { - var inputData = GetDataTabletFromCSVFile(path); - - DateTime timeStamp = new DateTime(); - - List points = new List(); - foreach (DataRow row in inputData.Rows) - { - // load the data, build the RootCauseInput, take care of empty value - long seconds = long.Parse(row["TimeStamp"].ToString()); - timeStamp = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Utc).AddSeconds(seconds); - double value = Double.Parse(row["Value"].ToString()); - double expectedValue = 0; - if (!row["ExpectedValue"].ToString().Equals("")) - { - expectedValue = Double.Parse(row["ExpectedValue"].ToString()); - } - bool isAnomaly = Boolean.Parse(row["IsAnomaly"].ToString()); - Dictionary dimension = new Dictionary(); - foreach (string key in _dimensionKeys) - { - if (!row[key].ToString().Equals("")) - { - dimension.Add(key, row[key].ToString()); - } - } - - points.Add(new Microsoft.ML.TimeSeries.Point(value, expectedValue, isAnomaly, dimension)); ; - } - - return points; - } - - private static void GetRootCause(List rootCauseList, RootCauseLocalizationData inputData, PredictionEngine engine) - { - RootCauseLocalizationTransformedData incrementalResult = engine.Predict(inputData); - - if (incrementalResult.RootCause.Items.Count == 0 || ( - incrementalResult.RootCause.Items.Count == 1 && incrementalResult.RootCause.Items[0].Dimension.Equals(inputData.Input.AnomalyDimensions) - )) - { - if (!rootCauseList.Contains(new RootCauseItem(inputData.Input.AnomalyDimensions))) - { - rootCauseList.Add(new RootCauseItem(inputData.Input.AnomalyDimensions)); - - } - return; - } - else - { - foreach (RootCauseItem item in incrementalResult.RootCause.Items) - { - RootCauseLocalizationData newData = new RootCauseLocalizationData(inputData.Input.AnomalyTimestamp, - item.Dimension, inputData.Input.Slices, inputData.Input.AggType, inputData.Input.AggSymbol); - GetRootCause(rootCauseList, newData, engine); - } - } - } - - private static DataTable GetDataTabletFromCSVFile(string filePath) - { - DataTable csvData = new DataTable(); - - - using (TextFieldParser csvReader = new TextFieldParser(filePath)) - { - csvReader.SetDelimiters(new string[] { "," }); - csvReader.HasFieldsEnclosedInQuotes = true; - string[] colFields = csvReader.ReadFields(); - foreach (string column in colFields) - { - DataColumn datecolumn = new DataColumn(column); - datecolumn.AllowDBNull = true; - csvData.Columns.Add(datecolumn); - } - - while (!csvReader.EndOfData) - { - string[] fieldData = csvReader.ReadFields(); - //Making empty value as null - for (int i = 0; i < fieldData.Length; i++) - { - if (fieldData[i] == "") - { - fieldData[i] = null; - } - } - csvData.Rows.Add(fieldData); - } - } - - return csvData; - } - - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } - } -} diff --git a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj index d0f3e2dff5..812114e7a5 100644 --- a/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj +++ b/docs/samples/Microsoft.ML.Samples/Microsoft.ML.Samples.csproj @@ -1,8 +1,7 @@  - - netcoreapp3.0 + netcoreapp2.1 Exe false diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 930ccc57f3..6e65499862 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -13,17 +13,13 @@ internal static void RunAll() int samples = 0; foreach (var type in Assembly.GetExecutingAssembly().GetTypes()) { - if (type.Name.Equals("LocalizeRootCauseEvaluation")) - //if (type.Name.Equals("LocalizeRootCauseByDT")) - { - var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); + var sample = type.GetMethod("Example", BindingFlags.Public | BindingFlags.Static | BindingFlags.FlattenHierarchy); - if (sample != null) - { - Console.WriteLine(type.Name); - sample.Invoke(null, null); - samples++; - } + if (sample != null) + { + Console.WriteLine(type.Name); + sample.Invoke(null, null); + samples++; } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index e385bd9890..95130f2a12 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -562,11 +562,10 @@ private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, i private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy, bool isChildren = false) { - //todo - update int total = GetTotalNumber(pointDis); double entropy = 0; - if (!isChildren) + //if (!isChildren) { foreach (string key in anomalyDis.Keys) { @@ -575,10 +574,10 @@ private double GetDimensionEntropyGain(Dictionary pointDis, Diction } } - else - { - entropy = GetEntropy(pointDis.Count, anomalyDis.Count); - } + //else + //{ + // entropy = GetEntropy(pointDis.Count, anomalyDis.Count); + //} return totalEntropy - entropy; } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 659d2ff664..0ef315afe5 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -144,6 +144,8 @@ private protected override void SaveModel(ModelSaveContext ctx) // *** Binary format *** // base.SaveColumns(ctx); + + ctx.Writer.Write((byte)_beta); } private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); @@ -191,11 +193,6 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func { - if (src != null) - { - src.Dispose(); - src = null; - } }; ValueGetter del = @@ -236,7 +233,7 @@ private void CheckRootCauseInput(RootCauseLocalizationInput src, IHost host) private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause dst) { - DTRootCauseAnalyzer analyzer = new DTRootCauseAnalyzer(src, _parent._beta) ; + DTRootCauseAnalyzer analyzer = new DTRootCauseAnalyzer(src, _parent._beta); dst = analyzer.Analyze(); } } @@ -257,6 +254,8 @@ private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause ds /// | Output column data type | | /// | Exportable to ONNX | No | /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-localization-dt.md)] + /// /// The resulting creates a new column, named as specified in the output column name parameters, and /// localize the root causes which contribute most to the anomaly. /// Check the See Also section for links to usage examples. @@ -275,8 +274,8 @@ internal static class Defaults /// Localize root cause. /// /// The estimator's local . - /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). /// The weight for generating score in output result. + /// Pairs of columns to run the root cause localization. [BestFriend] internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, columns)) @@ -295,7 +294,7 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.inputColumnName, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName); - if (!(col.ItemType is RootCauseLocalizationInputDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) + if (!(col.ItemType is RootCauseLocalizationInputDataViewType)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName, new RootCauseLocalizationInputDataViewType().ToString(), col.GetTypeString()); result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, col.Kind, col.ItemType, col.IsKey, col.Annotations); diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 573cbe229f..c8811a5651 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -111,11 +111,6 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Create , which localizes root causess using decision tree algorithm. + /// Create , which localizes root causes using decision tree algorithm. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 0d82a3ab51..edb7258553 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -94,6 +94,33 @@ private sealed class SrCnnAnomalyDetection private static string _aggSymbol = "##SUM##"; + private class RootCauseLocalizationData + { + [RootCauseLocalizationInputType] + public RootCauseLocalizationInput Input { get; set; } + + public RootCauseLocalizationData() + { + Input = null; + } + + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) + { + Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); + } + } + + private class RootCauseLocalizationTransformedData + { + [RootCauseType()] + public RootCause RootCause { get; set; } + + public RootCauseLocalizationTransformedData() + { + RootCause = null; + } + } + [Fact] public void ChangeDetection() { @@ -520,33 +547,6 @@ public void AnomalyDetectionWithSrCnn() } } - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } - [Fact] public void RootCauseLocalizationWithDT() { @@ -576,6 +576,7 @@ public void RootCauseLocalizationWithDT() } var engine = ml.Model.CreatePredictionEngine(model); + DateTime timeStamp = GetCurrentTimestamp(); var newRootCauseInput = new RootCauseLocalizationData(timeStamp, GetAnomalyDimension(), new List() { new MetricSlice(timeStamp, GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _aggSymbol); var transformedRootCause = engine.Predict(newRootCauseInput); From c9ed044293cacd4443bdb0f21e92648e8047792e Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 27 Mar 2020 10:16:03 +0800 Subject: [PATCH 19/49] fix some typo --- .../DTRootCauseAnalyzer.cs | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 95130f2a12..0f8b923dff 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -565,29 +565,23 @@ private double GetDimensionEntropyGain(Dictionary pointDis, Diction int total = GetTotalNumber(pointDis); double entropy = 0; - //if (!isChildren) + foreach (string key in anomalyDis.Keys) { - foreach (string key in anomalyDis.Keys) - { - double dimEntropy = GetEntropy(pointDis[key], anomalyDis[key]); - entropy += dimEntropy * pointDis[key] / total; - } - + double dimEntropy = GetEntropy(pointDis[key], anomalyDis[key]); + entropy += dimEntropy * pointDis[key] / total; } - //else - //{ - // entropy = GetEntropy(pointDis.Count, anomalyDis.Count); - //} + return totalEntropy - entropy; } private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis) { + int total = GetTotalNumber(pointDis); double instrinsicValue = 0; - foreach (string key in anomalyDis.Keys) + foreach (string key in pointDis.Keys) { - instrinsicValue -= Log2((double)anomalyDis[key] / pointDis[key]) * anomalyDis[key] / pointDis[key]; + instrinsicValue -= Log2((double)pointDis[key] / total) * (double)pointDis[key] / total; } return instrinsicValue; From e440f256046d8e517b9d1b6d769730ebe9104ac4 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 27 Mar 2020 10:18:41 +0800 Subject: [PATCH 20/49] remove unused code --- src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 0f8b923dff..78a88158e3 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -684,14 +684,6 @@ public static BestDimension CreateDefaultInstance() } } - //public sealed class AnomalyCause - //{ - // public string DimensionKey; - // public List Anomalies; - - // public AnomalyCause() { } - //} - public class RootCauseScore { public double Surprise; From feba6f4f1e9897af3bc1a33a2018fca80a9e3f8c Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 27 Mar 2020 11:03:30 +0800 Subject: [PATCH 21/49] reformat code --- .../TimeSeries/LocalizeRootCauseByDT.cs | 1 - .../DTRootCauseAnalyzer.cs | 187 +++++++++--------- .../DTRootCauseLocalizationType.cs | 1 - 3 files changed, 94 insertions(+), 95 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 067afdbf53..0c6be78ed4 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Runtime.InteropServices.ComTypes; using Microsoft.ML; using Microsoft.ML.TimeSeries; diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 78a88158e3..3a6afc57c5 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -12,6 +12,8 @@ namespace Microsoft.ML.TimeSeries public class DTRootCauseAnalyzer { private static double _anomalyRatioThreshold = 0.5; + private static double _anomalyDeltaThreshold = 0.95; + private static double _anomalyPreDeltaThreshold = 2; private RootCauseLocalizationInput _src; private double _beta; @@ -39,7 +41,7 @@ public RootCause Analyze() return dst; } - public List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) { List points = new List(); foreach (MetricSlice slice in src.Slices) @@ -53,7 +55,7 @@ public List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput return points; } - public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim, List aggDims) + protected void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim, List aggDims) { Tuple> pointInfo = BuildPointInfo(totalPoints, dimensionInfo.AggDims, subDim, src.AggSymbol, src.AggType); PointTree pointTree = pointInfo.Item1; @@ -90,7 +92,7 @@ public void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, } } - public DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + protected DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); foreach (KeyValuePair entry in dimensions) @@ -142,52 +144,7 @@ protected Tuple> BuildPointInfo(List po return new Tuple>(pointTree, anomalyTree, uniquePointList); } - private void BuildTree(PointTree tree, List aggDims, Point point, string aggSymbol) - { - - if (aggDims.Count == 0) - { - tree.ParentNode = point; - tree.Leaves.Add(point); - } - else - { - int aggNum = 0; - string nextDim = null; - - foreach (string dim in aggDims) - { - if (IsAggregationDimension(point.Dimension[dim], aggSymbol)) - { - aggNum++; - } - else - { - nextDim = dim; - } - } - - if (aggNum == aggDims.Count) - { - tree.ParentNode = point; - } - else if (aggNum == aggDims.Count - 1) - { - if (!tree.ChildrenNodes.ContainsKey(nextDim)) - { - tree.ChildrenNodes.Add(nextDim, new List()); - } - tree.ChildrenNodes[nextDim].Add(point); - } - - if (aggNum == 0) - { - tree.Leaves.Add(point); - } - } - } - - public Dictionary GetSubDim(Dictionary dimension, List keyList) + protected Dictionary GetSubDim(Dictionary dimension, List keyList) { Dictionary subDim = new Dictionary(); @@ -352,7 +309,7 @@ protected BestDimension SelectBestDimension(List totalPoints, List return best; } - public BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) { Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); @@ -372,7 +329,7 @@ public BestDimension SelectBestDimension(Dictionary> pointCh UpdateDistribution(dimension.AnomalyDis, anomalyChildren[dimKey], dimKey); } - double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy, true); + double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); dimension.Entropy = totalEntropy - gain; entroyGainMap.Add(dimension, gain); @@ -389,40 +346,7 @@ public BestDimension SelectBestDimension(Dictionary> pointCh return best; } - private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) - { - BestDimension best = null; - foreach (KeyValuePair dimension in entropyGainMap) - { - if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value >= meanGain) - { - if (dimension.Key.AnomalyDis.Count > 1) - { - if (best == null || (best.AnomalyDis.Count != 1 && entropyGainRationMap[best].CompareTo(dimension.Value) < 0)) - { - best = dimension.Key; - } - } - else - { - if (best == null || best.AnomalyDis.Count > 1) - { - best = dimension.Key; - } - else - { - if (entropyGainRationMap[best].CompareTo(dimension.Value) < 0) - { - best = dimension.Key; - } - } - } - } - } - return best; - } - - public Point FindPointByDimension(Dictionary dim, List points) + private Point FindPointByDimension(Dictionary dim, List points) { foreach (Point p in points) { @@ -444,7 +368,7 @@ public Point FindPointByDimension(Dictionary dim, List po return null; } - public void UpdateRootCauseDirection(List points, ref RootCause dst) + private void UpdateRootCauseDirection(List points, ref RootCause dst) { foreach (RootCauseItem item in dst.Items) { @@ -460,11 +384,10 @@ public void UpdateRootCauseDirection(List points, ref RootCause dst) item.Direction = AnomalyDirection.Down; } } - } } - public void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) + private void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) { if (dst.Items.Count > 1) { @@ -497,7 +420,6 @@ public void GetRootCauseScore(List points, Dictionary ano else if (dst.Items.Count == 1) { Point rootCausePoint = FindPointByDimension(dst.Items[0].Dimension, points); - Point anomalyPoint = FindPointByDimension(anomalyRoot, points); if (anomalyPoint != null && rootCausePoint != null) { @@ -507,6 +429,84 @@ public void GetRootCauseScore(List points, Dictionary ano } } + private void BuildTree(PointTree tree, List aggDims, Point point, string aggSymbol) + { + + if (aggDims.Count == 0) + { + tree.ParentNode = point; + tree.Leaves.Add(point); + } + else + { + int aggNum = 0; + string nextDim = null; + + foreach (string dim in aggDims) + { + if (IsAggregationDimension(point.Dimension[dim], aggSymbol)) + { + aggNum++; + } + else + { + nextDim = dim; + } + } + + if (aggNum == aggDims.Count) + { + tree.ParentNode = point; + } + else if (aggNum == aggDims.Count - 1) + { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) + { + tree.ChildrenNodes.Add(nextDim, new List()); + } + tree.ChildrenNodes[nextDim].Add(point); + } + + if (aggNum == 0) + { + tree.Leaves.Add(point); + } + } + } + + private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + { + BestDimension best = null; + foreach (KeyValuePair dimension in entropyGainMap) + { + if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value >= meanGain) + { + if (dimension.Key.AnomalyDis.Count > 1) + { + if (best == null || (best.AnomalyDis.Count != 1 && entropyGainRationMap[best].CompareTo(dimension.Value) < 0)) + { + best = dimension.Key; + } + } + else + { + if (best == null || best.AnomalyDis.Count > 1) + { + best = dimension.Key; + } + else + { + if (entropyGainRationMap[best].CompareTo(dimension.Value) < 0) + { + best = dimension.Key; + } + } + } + } + } + return best; + } + private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; @@ -528,6 +528,7 @@ private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, return new Tuple(surprise, ep); } + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) { Dictionary newDim = new Dictionary(dimension); @@ -537,17 +538,17 @@ private static Dictionary UpdateDimensionValue(Dictionary 2; + return Math.Abs(pre) / Math.Abs(current) > _anomalyPreDeltaThreshold; } private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) { - if (Math.Abs(total) < Math.Abs(parent) * 0.95) + if (Math.Abs(total) < Math.Abs(parent) * _anomalyDeltaThreshold) { return false; } @@ -560,7 +561,7 @@ private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, i return size <= totalSize * _anomalyRatioThreshold; } - private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy, bool isChildren = false) + private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) { int total = GetTotalNumber(pointDis); double entropy = 0; diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index c8811a5651..9447501a9f 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -282,5 +282,4 @@ public override int GetHashCode() return Dimension.GetHashCode(); } } - } From 686831cb1fc039c3919e0dde7599a746723356a1 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 27 Mar 2020 11:16:29 +0800 Subject: [PATCH 22/49] updates --- .../DTRootCauseAnalyzer.cs | 38 +++++-------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 3a6afc57c5..8e221f0b6c 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -32,7 +32,7 @@ public RootCause Analyze() { return dst; } - Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDim); + Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDims); List totalPoints = GetTotalPointsForAnomalyTimestamp(_src, subDim); GetRootCauseList(_src, ref dst, dimensionInfo, totalPoints, subDim, dimensionInfo.AggDims); UpdateRootCauseDirection(totalPoints, ref dst); @@ -104,7 +104,7 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, } else { - info.DetailDim.Add(key); + info.DetailDims.Add(key); } } @@ -113,11 +113,9 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, protected Tuple> BuildPointInfo(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType) { - - List uniquePointList = new List(); - PointTree pointTree = PointTree.CreateDefaultInstance(); PointTree anomalyTree = PointTree.CreateDefaultInstance(); + List uniquePointList = new List(); foreach (Point point in pointList) { @@ -155,36 +153,17 @@ protected Dictionary GetSubDim(Dictionary dimens return subDim; } - protected List SelectPoints(List points, Dictionary subDim) - { - List list = new List(); - - foreach (Point point in points) - { - if (ContainsAll(point.Dimension, subDim)) - { - //remove duplicated points - if (!list.Contains(point)) - { - list.Add(point); - } - } - } - - return list; - } - - protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, double totoalEntropy, Dictionary anomalyDimension, List aggDims) + protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, double totalEntropy, Dictionary anomalyDimension, List aggDims) { BestDimension best = null; if (anomalyTree.ChildrenNodes.Count == 0) { - best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims, totoalEntropy); + best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims, totalEntropy); } else { //has no leaves information, should calculate the entropy information according to the children nodes - best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, aggDims, totoalEntropy); + best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, aggDims, totalEntropy); } if (best == null) @@ -504,6 +483,7 @@ private BestDimension FindBestDimension(Dictionary entrop } } } + return best; } @@ -641,13 +621,13 @@ private bool IsAggregationDimension(string val, string aggSymbol) public class DimensionInfo { - public List DetailDim { get; set; } + public List DetailDims { get; set; } public List AggDims { get; set; } public static DimensionInfo CreateDefaultInstance() { DimensionInfo instance = new DimensionInfo(); - instance.DetailDim = new List(); + instance.DetailDims = new List(); instance.AggDims = new List(); return instance; } From ddc8a3600e07862d37f2c6f2c8a06557ed560c04 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sun, 29 Mar 2020 21:36:35 +0800 Subject: [PATCH 23/49] update from review --- .../DTRootCauseLocalization.cs | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 0ef315afe5..149d9d88d8 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -35,12 +35,12 @@ public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase { internal const string Summary = "Localize root cause for anomaly."; internal const string UserName = "DT Root Cause Localization Transform"; - internal const string LoaderSignature = "DTRootCauseLTransform"; + internal const string LoaderSignature = "DTRootCauseTransform"; private static VersionInfo GetVersionInfo() { return new VersionInfo( - modelSignature: "DTRCL", + modelSignature: "DTROOTCL", verWrittenCur: 0x00010001, // Initial verReadableCur: 0x00010001, verWeCanReadBack: 0x00010001, @@ -124,6 +124,11 @@ private static DTRootCauseLocalizationTransformer Create(IHostEnvironment env, M private DTRootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { + var columnsLength = ColumnPairs.Length; + // *** Binary format *** + // + // double: beta + _beta = ctx.Reader.ReadByte(); } // Factory method for SignatureLoadDataTransform. @@ -145,7 +150,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // base.SaveColumns(ctx); - ctx.Writer.Write((byte)_beta); + ctx.Writer.Write(_beta); } private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); @@ -190,10 +195,7 @@ protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func(input.Schema[ColMapNewToOld[iinfo]]); - disposer = - () => - { - }; + disposer = null; ValueGetter del = (ref RootCause dst) => @@ -294,10 +296,10 @@ public override SchemaShape GetOutputSchema(SchemaShape inputSchema) { if (!inputSchema.TryFindColumn(colInfo.inputColumnName, out var col)) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName); - if (!(col.ItemType is RootCauseLocalizationInputDataViewType)) + if (!(col.ItemType is RootCauseLocalizationInputDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName, new RootCauseLocalizationInputDataViewType().ToString(), col.GetTypeString()); - result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, col.Kind, col.ItemType, col.IsKey, col.Annotations); + result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, SchemaShape.Column.VectorKind.Scalar, new RootCauseDataViewType(), false); } return new SchemaShape(result.Values); From 475ee8a224b23933140f1ae8606dd1b63293ccaf Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 1 Apr 2020 11:06:13 +0800 Subject: [PATCH 24/49] read double for beta --- src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 149d9d88d8..a5a6449b0b 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -124,11 +124,11 @@ private static DTRootCauseLocalizationTransformer Create(IHostEnvironment env, M private DTRootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { - var columnsLength = ColumnPairs.Length; // *** Binary format *** // // double: beta - _beta = ctx.Reader.ReadByte(); + _beta = ctx.Reader.ReadDouble(); + Host.CheckDecode(_beta >= 0 && _beta <= 1); } // Factory method for SignatureLoadDataTransform. From 8d874cae57b3a4c874ea1962cec06f341269fa47 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 1 Apr 2020 15:21:15 +0800 Subject: [PATCH 25/49] remove SignatureDataTransform constructor --- .../DTRootCauseLocalization.cs | 27 +++++-------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index a5a6449b0b..f6364a3caf 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -14,12 +14,6 @@ using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; -[assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), typeof(DTRootCauseLocalizationTransformer.Options), typeof(SignatureDataTransform), - DTRootCauseLocalizationTransformer.UserName, "DTRootCauseLocalizationTransform", "DTRootCauseLocalization")] - -[assembly: LoadableClass(DTRootCauseLocalizationTransformer.Summary, typeof(IDataTransform), typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadDataTransform), - DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] - [assembly: LoadableClass(typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadModel), DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] @@ -69,8 +63,11 @@ internal bool TryUnparse(StringBuilder sb) internal class Options : TransformInputBase { - [Argument(ArgumentType.Multiple | ArgumentType.Required, HelpText = "New column definition(s) (optional form: name:src)", Name = "Column", ShortName = "col", SortOrder = 1)] - public Column[] Columns; + [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] + public string Source; + + [Argument(ArgumentType.Required, HelpText = "The name of the output column.", SortOrder = 2)] + public string Output; [Argument(ArgumentType.AtMostOnce, HelpText = "Weight for getting the score for the root cause item.", ShortName = "Beta", SortOrder = 2)] public double Beta = DTRootCauseLocalizationEstimator.Defaults.Beta; @@ -99,18 +96,6 @@ internal DTRootCauseLocalizationTransformer(IHostEnvironment env, double beta = _beta = beta; } - // Factory method for SignatureDataTransform. - internal static IDataTransform Create(IHostEnvironment env, Options options, IDataView input) - { - Contracts.CheckValue(env, nameof(env)); - env.CheckValue(options, nameof(options)); - env.CheckValue(input, nameof(input)); - env.CheckValue(options.Columns, nameof(options.Columns)); - - return new DTRootCauseLocalizationTransformer(env, options.Beta, options.Columns.Select(x => (x.Name, x.Source ?? x.Name)).ToArray()) - .MakeDataTransform(input); - } - // Factory method for SignatureLoadModel. private static DTRootCauseLocalizationTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { @@ -149,7 +134,7 @@ private protected override void SaveModel(ModelSaveContext ctx) // *** Binary format *** // base.SaveColumns(ctx); - + // double: beta ctx.Writer.Write(_beta); } From 0674ab3fb01438251d08e00edd1ca8f05f61e679 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 1 Apr 2020 15:47:11 +0800 Subject: [PATCH 26/49] update --- src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index f6364a3caf..5f6f008275 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -261,11 +261,12 @@ internal static class Defaults /// Localize root cause. /// /// The estimator's local . + /// Name of output column to run the root cause localization. + /// Name of input column to run the root cause localization. /// The weight for generating score in output result. - /// Pairs of columns to run the root cause localization. [BestFriend] - internal DTRootCauseLocalizationEstimator(IHostEnvironment env, double beta = Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, columns)) + internal DTRootCauseLocalizationEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName, double beta = Defaults.Beta) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) })) { } From 4c5b8fba56e34d6b1d7c223de6825f3812d349b2 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 1 Apr 2020 21:11:24 +0800 Subject: [PATCH 27/49] update --- docs/samples/Microsoft.ML.Samples/Program.cs | 1 - .../DTRootCauseLocalizationType.cs | 25 ++++++++++++++++--- .../ExtensionsCatalog.cs | 6 ++--- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Program.cs b/docs/samples/Microsoft.ML.Samples/Program.cs index 6e65499862..4c46399421 100644 --- a/docs/samples/Microsoft.ML.Samples/Program.cs +++ b/docs/samples/Microsoft.ML.Samples/Program.cs @@ -23,7 +23,6 @@ internal static void RunAll() } } - Console.WriteLine("Number of samples that ran without any exception: " + samples); } } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 9447501a9f..257a875207 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -111,6 +111,15 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, string aggregateSymbol) + { + AnomalyTimestamp = anomalyTimestamp; + AnomalyDimensions = anomalyDimensions; + Slices = slices; + AggType = AggregateType.Unknow; + AggSymbol = aggregateSymbol; + } } public sealed class RootCauseDataViewType : StructuredDataViewType @@ -170,19 +179,27 @@ public enum AggregateType /// /// Make the aggregate type as sum. /// - Sum = 0, + Unknow = 0, + /// + /// Make the aggregate type as sum. + /// + Sum = 1, /// /// Make the aggregate type as average. /// - Avg = 1, + Avg = 2, /// /// Make the aggregate type as min. /// - Min = 2, + Min = 3, /// /// Make the aggregate type as max. /// - Max = 3 + Max = 4, + /// + /// Make the aggregate type as count. + /// + Count = 4 } public enum AnomalyDirection diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 34258a08b8..6fed0c7114 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -143,7 +143,7 @@ public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, /// /// public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, - int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) + int windowSize = 64, int backAddWindowSize = 5, int lookaheadWindowSize = 5, int averageingWindowSize = 3, int judgementWindowSize = 21, double threshold = 0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// @@ -160,8 +160,8 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// ]]> /// /// - public static DTRootCauseLocalizationEstimator LocalizeRootCauseByDT(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta=0.5) - => new DTRootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog),beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) }); + public static DTRootCauseLocalizationEstimator LocalizeRootCauseByDT(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta = 0.5) + => new DTRootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, beta); /// /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. From 08d607c47122f4a10f6c006dbad7498fe166dd50 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 2 Apr 2020 11:26:58 +0800 Subject: [PATCH 28/49] remove white space --- src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 257a875207..dd6074159f 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -111,7 +111,7 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, string aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; From c6882334509973bd3b4ddba3f7975ab05f9d30b8 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 7 Apr 2020 16:50:22 +0800 Subject: [PATCH 29/49] refine internal logic --- .../TimeSeries/LocalizeRootCauseByDT.cs | 26 +- .../DTRootCauseAnalyzer.cs | 272 +++++++----------- .../DTRootCauseLocalization.cs | 6 +- .../DTRootCauseLocalizationType.cs | 38 +-- .../ExtensionsCatalog.cs | 6 +- .../TimeSeriesDirectApi.cs | 46 ++- 6 files changed, 181 insertions(+), 213 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index 0c6be78ed4..b5f62dad41 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -45,7 +45,7 @@ public static void Example() { count++; Console.WriteLine($"Root cause item #{count} ..."); - Console.WriteLine($"Score: {item.Score}, Path: {item.Path}, Direction: {item.Direction}, Dimension:{String.Join(" ", item.Dimension)}"); + Console.WriteLine($"Score: {item.Score}, Path: {String.Join(" ",item.Path)}, Direction: {item.Direction}, Dimension:{String.Join(" ", item.Dimension)}"); } //Item #1 ... @@ -56,55 +56,55 @@ private static List GetPoints() { List points = new List(); - Dictionary dic1 = new Dictionary(); + Dictionary dic1 = new Dictionary(); dic1.Add("Country", "UK"); dic1.Add("DeviceType", "Laptop"); dic1.Add("DataCenter", "DC1"); points.Add(new Point(200, 100, true, dic1)); - Dictionary dic2 = new Dictionary(); + Dictionary dic2 = new Dictionary(); dic2.Add("Country", "UK"); dic2.Add("DeviceType", "Mobile"); dic2.Add("DataCenter", "DC1"); points.Add(new Point(1000, 100, true, dic2)); - Dictionary dic3 = new Dictionary(); + Dictionary dic3 = new Dictionary(); dic3.Add("Country", "UK"); dic3.Add("DeviceType", AGG_SYMBOL); dic3.Add("DataCenter", "DC1"); points.Add(new Point(1200, 200, true, dic3)); - Dictionary dic4 = new Dictionary(); + Dictionary dic4 = new Dictionary(); dic4.Add("Country", "UK"); dic4.Add("DeviceType", "Laptop"); dic4.Add("DataCenter", "DC2"); points.Add(new Point(100, 100, false, dic4)); - Dictionary dic5 = new Dictionary(); + Dictionary dic5 = new Dictionary(); dic5.Add("Country", "UK"); dic5.Add("DeviceType", "Mobile"); dic5.Add("DataCenter", "DC2"); points.Add(new Point(200, 200, false, dic5)); - Dictionary dic6 = new Dictionary(); + Dictionary dic6 = new Dictionary(); dic6.Add("Country", "UK"); dic6.Add("DeviceType", AGG_SYMBOL); dic6.Add("DataCenter", "DC2"); points.Add(new Point(300, 300, false, dic6)); - Dictionary dic7 = new Dictionary(); + Dictionary dic7 = new Dictionary(); dic7.Add("Country", "UK"); dic7.Add("DeviceType", AGG_SYMBOL); dic7.Add("DataCenter", AGG_SYMBOL); points.Add(new Point(1500, 500, true, dic7)); - Dictionary dic8 = new Dictionary(); + Dictionary dic8 = new Dictionary(); dic8.Add("Country", "UK"); dic8.Add("DeviceType", "Laptop"); dic8.Add("DataCenter", AGG_SYMBOL); points.Add(new Point(300, 200, true, dic8)); - Dictionary dic9 = new Dictionary(); + Dictionary dic9 = new Dictionary(); dic9.Add("Country", "UK"); dic9.Add("DeviceType", "Mobile"); dic9.Add("DataCenter", AGG_SYMBOL); @@ -113,9 +113,9 @@ private static List GetPoints() return points; } - private static Dictionary GetAnomalyDimension() + private static Dictionary GetAnomalyDimension() { - Dictionary dim = new Dictionary(); + Dictionary dim = new Dictionary(); dim.Add("Country", "UK"); dim.Add("DeviceType", AGG_SYMBOL); dim.Add("DataCenter", AGG_SYMBOL); @@ -138,7 +138,7 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) { Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateType, aggregateSymbol); diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 8e221f0b6c..c6d184e36a 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -17,6 +17,7 @@ public class DTRootCauseAnalyzer private RootCauseLocalizationInput _src; private double _beta; + public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) { _src = src; @@ -24,24 +25,41 @@ public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) } public RootCause Analyze() + { + return AnalyzeOneLayer(_src); + } + + //This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. For full result, call this function recursively + private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) { RootCause dst = new RootCause(); - DimensionInfo dimensionInfo = SeperateDimension(_src.AnomalyDimensions, _src.AggSymbol); + DimensionInfo dimensionInfo = SeperateDimension(src.AnomalyDimension, src.AggSymbol); //no aggregation dimension if (dimensionInfo.AggDims.Count == 0) { return dst; } - Dictionary subDim = GetSubDim(_src.AnomalyDimensions, dimensionInfo.DetailDims); - List totalPoints = GetTotalPointsForAnomalyTimestamp(_src, subDim); - GetRootCauseList(_src, ref dst, dimensionInfo, totalPoints, subDim, dimensionInfo.AggDims); - UpdateRootCauseDirection(totalPoints, ref dst); - GetRootCauseScore(totalPoints, _src.AnomalyDimensions, ref dst, _beta); + + Tuple> pointInfo = GetPointsInfo(src, dimensionInfo, src.AggSymbol); + PointTree pointTree = pointInfo.Item1; + PointTree anomalyTree = pointInfo.Item2; + Dictionary dimPointMapping = pointInfo.Item3; + + //which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension + if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0) + { + return dst; + } + + dst.Items = new List(); + dst.Items.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, src.AnomalyDimension, dimensionInfo.AggDims)); + + GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta); return dst; } - protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src, Dictionary subDim) + protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) { List points = new List(); foreach (MetricSlice slice in src.Slices) @@ -55,47 +73,10 @@ protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInp return points; } - protected void GetRootCauseList(RootCauseLocalizationInput src, ref RootCause dst, DimensionInfo dimensionInfo, List totalPoints, Dictionary subDim, List aggDims) - { - Tuple> pointInfo = BuildPointInfo(totalPoints, dimensionInfo.AggDims, subDim, src.AggSymbol, src.AggType); - PointTree pointTree = pointInfo.Item1; - PointTree anomalyTree = pointInfo.Item2; - List uniquePoints = pointInfo.Item3; - - //which means there is no anomaly point with the anomaly dimension - if (anomalyTree.ParentNode == null) - { - return; - } - - List rootCauses = new List(); - // no point under anomaly dimension - if (uniquePoints.Count == 0) - { - if (anomalyTree.Leaves.Count != 0) - { - throw new Exception("point leaves not match with anomaly leaves"); - } - - return; - } - else - { - double totalEntropy = 1; - if (anomalyTree.Leaves.Count > 0) - { - totalEntropy = GetEntropy(pointTree.Leaves.Count, anomalyTree.Leaves.Count); - } - - rootCauses.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, totalEntropy, src.AnomalyDimensions, aggDims)); - dst.Items = rootCauses; - } - } - - protected DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + protected DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); - foreach (KeyValuePair entry in dimensions) + foreach (KeyValuePair entry in dimensions) { string key = entry.Key; if (aggSymbol.Equals(entry.Value)) @@ -111,41 +92,42 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, return info; } - protected Tuple> BuildPointInfo(List pointList, List aggDims, Dictionary subDim, string aggSymbol, AggregateType aggType) + protected Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo, string aggSymbol) { PointTree pointTree = PointTree.CreateDefaultInstance(); PointTree anomalyTree = PointTree.CreateDefaultInstance(); - List uniquePointList = new List(); + Dictionary dimPointMapping = new Dictionary(); - foreach (Point point in pointList) + List totalPoints = GetTotalPointsForAnomalyTimestamp(src); + Dictionary subDim = GetSubDim(src.AnomalyDimension, dimensionInfo.DetailDims); + + foreach (Point point in totalPoints) { if (ContainsAll(point.Dimension, subDim)) { - //remove duplicated points - if (!uniquePointList.Contains(point)) + if (!dimPointMapping.ContainsKey(GetDicCode(point.Dimension))) { - uniquePointList.Add(point); + dimPointMapping.Add(GetDicCode(point.Dimension), point); bool isValidPoint = point.IsAnomaly == true; if (ContainsAll(point.Dimension, subDim)) { - BuildTree(pointTree, aggDims, point, aggSymbol); + BuildTree(pointTree, dimensionInfo.AggDims, point, aggSymbol); if (isValidPoint) { - BuildTree(anomalyTree, aggDims, point, aggSymbol); + BuildTree(anomalyTree, dimensionInfo.AggDims, point, aggSymbol); } } } } } - return new Tuple>(pointTree, anomalyTree, uniquePointList); + return new Tuple>(pointTree, anomalyTree, dimPointMapping); } - protected Dictionary GetSubDim(Dictionary dimension, List keyList) + protected Dictionary GetSubDim(Dictionary dimension, List keyList) { - Dictionary subDim = new Dictionary(); - + Dictionary subDim = new Dictionary(); foreach (string dim in keyList) { subDim.Add(dim, dimension[dim]); @@ -153,17 +135,18 @@ protected Dictionary GetSubDim(Dictionary dimens return subDim; } - protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, double totalEntropy, Dictionary anomalyDimension, List aggDims) + protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) { BestDimension best = null; if (anomalyTree.ChildrenNodes.Count == 0) { - best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims, totalEntropy); + //has no children node information, should use the leaves node(whose point has no aggrgated dimensions)information + best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims); } else { //has no leaves information, should calculate the entropy information according to the children nodes - best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, aggDims, totalEntropy); + best = SelectBestDimension(pointTree.ChildrenNodes, anomalyTree.ChildrenNodes, aggDims); } if (best == null) @@ -192,7 +175,7 @@ protected List LocalizeRootCauseByDimension(PointTree anomalyTree if (children == null) { //As the cause couldn't be found, the root cause should be itself - return new List() { new RootCauseItem(anomalyDimension, best.DimensionKey) }; + return new List() { new RootCauseItem(anomalyDimension) }; } else { @@ -200,7 +183,7 @@ protected List LocalizeRootCauseByDimension(PointTree anomalyTree // For the found causes, we return the result foreach (Point anomaly in children) { - causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimension[best.DimensionKey]), best.DimensionKey)); + causes.Add(new RootCauseItem(UpdateDimensionValue(anomalyDimension, best.DimensionKey, anomaly.Dimension[best.DimensionKey]), new List() { best.DimensionKey })); } return causes; } @@ -258,8 +241,9 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< return null; } - protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim, double totalEntropy) + protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim) { + double totalEntropy = GetEntropy(totalPoints.Count, anomalyPoints.Count); Dictionary entroyGainMap = new Dictionary(); Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; @@ -272,11 +256,11 @@ protected BestDimension SelectBestDimension(List totalPoints, List UpdateDistribution(dimension.PointDis, totalPoints, dimKey); UpdateDistribution(dimension.AnomalyDis, anomalyPoints, dimKey); - double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); - dimension.Entropy = totalEntropy - gain; + double relativeEntropy = GetDimensionEntropy(dimension.PointDis, dimension.AnomalyDis); + double gain = totalEntropy - relativeEntropy; entroyGainMap.Add(dimension, gain); - double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis); + double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis); entroyGainRatioMap.Add(dimension, gainRatio); sumGain += gain; @@ -288,10 +272,10 @@ protected BestDimension SelectBestDimension(List totalPoints, List return best; } - private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim, double totalEntropy) + private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim) { - Dictionary entroyGainMap = new Dictionary(); - Dictionary entroyGainRatioMap = new Dictionary(); + Dictionary entropyGainMap = new Dictionary(); + Dictionary entropyGainRatioMap = new Dictionary(); double sumGain = 0; foreach (string dimKey in aggDim) @@ -308,84 +292,54 @@ private BestDimension SelectBestDimension(Dictionary> pointC UpdateDistribution(dimension.AnomalyDis, anomalyChildren[dimKey], dimKey); } - double gain = GetDimensionEntropyGain(dimension.PointDis, dimension.AnomalyDis, totalEntropy); - dimension.Entropy = totalEntropy - gain; - entroyGainMap.Add(dimension, gain); + double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count); + entropyGainMap.Add(dimension, entropy); - double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis, dimension.AnomalyDis); - entroyGainRatioMap.Add(dimension, gainRatio); + double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis); + entropyGainRatioMap.Add(dimension, gainRatio); - sumGain += gain; + sumGain += entropy; } - double meanGain = sumGain / aggDim.Count(); - - BestDimension best = FindBestDimension(entroyGainMap, entroyGainRatioMap, meanGain); + double meanGain = sumGain / aggDim.Count; + BestDimension best = FindBestDimension(entropyGainMap, entropyGainRatioMap, meanGain, false); return best; } - private Point FindPointByDimension(Dictionary dim, List points) + private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) { - foreach (Point p in points) + if (rootCausePoint.ExpectedValue < rootCausePoint.Value) { - bool isEqual = true; - foreach (KeyValuePair item in p.Dimension) - { - if (!dim[item.Key].Equals(item.Value)) - { - isEqual = false; - } - } - - if (isEqual) - { - return p; - } + return AnomalyDirection.Up; } - - return null; - } - - private void UpdateRootCauseDirection(List points, ref RootCause dst) - { - foreach (RootCauseItem item in dst.Items) + else { - Point rootCausePoint = FindPointByDimension(item.Dimension, points); - if (rootCausePoint != null) - { - if (rootCausePoint.ExpectedValue < rootCausePoint.Value) - { - item.Direction = AnomalyDirection.Up; - } - else - { - item.Direction = AnomalyDirection.Down; - } - } + return AnomalyDirection.Down; } } - private void GetRootCauseScore(List points, Dictionary anomalyRoot, ref RootCause dst, double beta) + private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta) { + Point anomalyPoint = dimPointMapping[GetDicCode(anomalyRoot)]; if (dst.Items.Count > 1) { //get surprise value and explanary power value - Point anomalyPoint = FindPointByDimension(anomalyRoot, points); - double sumSurprise = 0; double sumEp = 0; List scoreList = new List(); foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = FindPointByDimension(item.Dimension, points); + Point rootCausePoint = dimPointMapping[GetDicCode(item.Dimension)]; if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); sumSurprise += scores.Item1; sumEp += Math.Abs(scores.Item2); + + item.Direction = GetRootCauseDirection(rootCausePoint); } } @@ -398,71 +352,67 @@ private void GetRootCauseScore(List points, Dictionary an } else if (dst.Items.Count == 1) { - Point rootCausePoint = FindPointByDimension(dst.Items[0].Dimension, points); - Point anomalyPoint = FindPointByDimension(anomalyRoot, points); + Point rootCausePoint = dimPointMapping[GetDicCode(dst.Items[0].Dimension)]; if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); + dst.Items[0].Direction = GetRootCauseDirection(rootCausePoint); } } } + private static string GetDicCode(Dictionary dic) + { + return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray()); + } + private void BuildTree(PointTree tree, List aggDims, Point point, string aggSymbol) { + int aggNum = 0; + string nextDim = null; - if (aggDims.Count == 0) + foreach (string dim in aggDims) { - tree.ParentNode = point; - tree.Leaves.Add(point); - } - else - { - int aggNum = 0; - string nextDim = null; - - foreach (string dim in aggDims) + if (IsAggregationDimension((string)point.Dimension[dim], aggSymbol)) { - if (IsAggregationDimension(point.Dimension[dim], aggSymbol)) - { - aggNum++; - } - else - { - nextDim = dim; - } + aggNum++; } - - if (aggNum == aggDims.Count) + else { - tree.ParentNode = point; - } - else if (aggNum == aggDims.Count - 1) - { - if (!tree.ChildrenNodes.ContainsKey(nextDim)) - { - tree.ChildrenNodes.Add(nextDim, new List()); - } - tree.ChildrenNodes[nextDim].Add(point); + nextDim = dim; } + } - if (aggNum == 0) + if (aggNum == aggDims.Count) + { + tree.ParentNode = point; + } + else if (aggNum == aggDims.Count - 1) + { + if (!tree.ChildrenNodes.ContainsKey(nextDim)) { - tree.Leaves.Add(point); + tree.ChildrenNodes.Add(nextDim, new List()); } + tree.ChildrenNodes[nextDim].Add(point); + } + + if (aggNum == 0) + { + tree.Leaves.Add(point); } } - private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain) + private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain, bool isLeavesLevel = true) { BestDimension best = null; foreach (KeyValuePair dimension in entropyGainMap) { - if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value >= meanGain) + if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value <= meanGain) { if (dimension.Key.AnomalyDis.Count > 1) { - if (best == null || (best.AnomalyDis.Count != 1 && entropyGainRationMap[best].CompareTo(dimension.Value) < 0)) + if (best == null || (best.AnomalyDis.Count != 1 && (isLeavesLevel ? entropyGainRationMap[best].CompareTo(dimension.Value) <= 0 : entropyGainRationMap[best].CompareTo(dimension.Value) >= 0))) { best = dimension.Key; } @@ -475,7 +425,7 @@ private BestDimension FindBestDimension(Dictionary entrop } else { - if (entropyGainRationMap[best].CompareTo(dimension.Value) < 0) + if ((isLeavesLevel ? entropyGainRationMap[best].CompareTo(dimension.Value) <= 0 : entropyGainRationMap[best].CompareTo(dimension.Value) >= 0)) { best = dimension.Key; } @@ -509,9 +459,9 @@ private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, return new Tuple(surprise, ep); } - private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, string value) + private static Dictionary UpdateDimensionValue(Dictionary dimension, string key, Object value) { - Dictionary newDim = new Dictionary(dimension); + Dictionary newDim = new Dictionary(dimension); newDim[key] = value; return newDim; } @@ -541,7 +491,7 @@ private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, i return size <= totalSize * _anomalyRatioThreshold; } - private double GetDimensionEntropyGain(Dictionary pointDis, Dictionary anomalyDis, double totalEntropy) + private double GetDimensionEntropy(Dictionary pointDis, Dictionary anomalyDis) { int total = GetTotalNumber(pointDis); double entropy = 0; @@ -552,10 +502,10 @@ private double GetDimensionEntropyGain(Dictionary pointDis, Diction entropy += dimEntropy * pointDis[key] / total; } - return totalEntropy - entropy; + return entropy; } - private double GetDimensionInstrinsicValue(Dictionary pointDis, Dictionary anomalyDis) + private double GetDimensionInstrinsicValue(Dictionary pointDis) { int total = GetTotalNumber(pointDis); double instrinsicValue = 0; @@ -582,7 +532,7 @@ private void UpdateDistribution(Dictionary distribution, List bigDic, Dictionary smallDic) + public static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { foreach (var item in smallDic) { diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs index 5f6f008275..57becd0524 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs @@ -237,13 +237,13 @@ private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause ds /// | | | /// | -- | -- | /// | Does this estimator need to look at the data to train its parameters? | No | - /// | Input column data type | | - /// | Output column data type | | + /// | Input column data type | | + /// | Output column data type | | /// | Exportable to ONNX | No | /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-localization-dt.md)] /// - /// The resulting creates a new column, named as specified in the output column name parameters, and + /// The resulting creates a new column, named as specified in the output column name parameters, and /// localize the root causes which contribute most to the anomaly. /// Check the See Also section for links to usage examples. /// ]]> diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index dd6074159f..7ef1c60eb5 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -95,7 +95,7 @@ public sealed class RootCauseLocalizationInput { public DateTime AnomalyTimestamp { get; set; } - public Dictionary AnomalyDimensions { get; set; } + public Dictionary AnomalyDimension { get; set; } public List Slices { get; set; } @@ -103,21 +103,21 @@ public sealed class RootCauseLocalizationInput public string AggSymbol { get; set; } - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, AggregateType aggregateType, string aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; - AnomalyDimensions = anomalyDimensions; + AnomalyDimension = anomalyDimension; Slices = slices; AggType = aggregateType; AggSymbol = aggregateSymbol; } - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, string aggregateSymbol) + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, string aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; - AnomalyDimensions = anomalyDimensions; + AnomalyDimension = anomalyDimension; Slices = slices; - AggType = AggregateType.Unknow; + AggType = AggregateType.Unknown; AggSymbol = aggregateSymbol; } } @@ -179,7 +179,7 @@ public enum AggregateType /// /// Make the aggregate type as sum. /// - Unknow = 0, + Unknown = 0, /// /// Make the aggregate type as sum. /// @@ -217,16 +217,17 @@ public enum AnomalyDirection public sealed class RootCauseItem : IEquatable { public double Score; - public string Path; - public Dictionary Dimension; + public List Path; + public Dictionary Dimension; public AnomalyDirection Direction; - public RootCauseItem(Dictionary rootCause) + public RootCauseItem(Dictionary rootCause) { Dimension = rootCause; + Path = new List(); } - public RootCauseItem(Dictionary rootCause, string path) + public RootCauseItem(Dictionary rootCause, List path) { Dimension = rootCause; Path = path; @@ -235,7 +236,7 @@ public bool Equals(RootCauseItem other) { if (Dimension.Count == other.Dimension.Count) { - foreach (KeyValuePair item in Dimension) + foreach (KeyValuePair item in Dimension) { if (!other.Dimension[item.Key].Equals(item.Value)) { @@ -265,26 +266,25 @@ public sealed class Point : IEquatable public double Value { get; set; } public double ExpectedValue { get; set; } public bool IsAnomaly { get; set; } - public Dictionary Dimension { get; set; } - + public Dictionary Dimension { get; set; } public double Delta { get; set; } - public Point( Dictionary dimensions) + public Point(Dictionary dimension) { - Dimension = dimensions; + Dimension = dimension; } - public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimensions) + public Point(double value, double expectedValue, bool isAnomaly, Dictionary dimension) { Value = value; ExpectedValue = expectedValue; IsAnomaly = isAnomaly; - Dimension = dimensions; + Dimension = dimension; Delta = value - expectedValue; } public bool Equals(Point other) { - foreach (KeyValuePair item in Dimension) + foreach (KeyValuePair item in Dimension) { if (!other.Dimension[item.Key].Equals(item.Value)) { diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 6fed0c7114..6e591606b8 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -150,8 +150,10 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// Create , which localizes root causes using decision tree algorithm. /// /// The transform's catalog. - /// Name of the column resulting from the transformation of . - /// Name of column to transform. + /// Name of the column resulting from the transformation of . + /// The column data is an instance of . + /// Name of the input column. + /// The column data is an instance of . /// The weight parameter in score. The range of the parameter should be in [0,1]. /// /// diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index edb7258553..cbca2157df 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -8,6 +8,7 @@ using Microsoft.ML.Data; using Microsoft.ML.TestFramework; using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.TestFrameworkCommon; using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -104,7 +105,7 @@ public RootCauseLocalizationData() Input = null; } - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) + public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) { Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); } @@ -551,7 +552,7 @@ public void AnomalyDetectionWithSrCnn() public void RootCauseLocalizationWithDT() { // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, "##SUM##") }; + var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, _aggSymbol) }; var ml = new MLContext(1); // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. @@ -584,70 +585,85 @@ public void RootCauseLocalizationWithDT() Assert.NotNull(transformedRootCause); Assert.Equal(1, (int)transformedRootCause.RootCause.Items.Count); - Dictionary expectedDim = new Dictionary(); + Dictionary expectedDim = new Dictionary(); expectedDim.Add("Country", "UK"); expectedDim.Add("DeviceType", _aggSymbol); expectedDim.Add("DataCenter", "DC1"); - foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) + foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) { Assert.Equal(expectedDim[pair.Key], pair.Value); } + + var dummyData = ml.Data.LoadFromEnumerable(new List() { "Test"}); + + //Create path + var modelPath = "temp.zip"; + //Save model to a file + ml.Model.Save(model, dummyData.Schema, modelPath); + + //Load model from a file + ITransformer serializedModel; + using (var file = File.OpenRead(modelPath)) + { + serializedModel = ml.Model.Load(file, out var serializedSchema); + TestCommon.CheckSameSchemas(dummyData.Schema, serializedSchema); + } } private static List GetRootCauseLocalizationPoints() { List points = new List(); - Dictionary dic1 = new Dictionary(); + Dictionary dic1 = new Dictionary(); dic1.Add("Country", "UK"); dic1.Add("DeviceType", "Laptop"); dic1.Add("DataCenter", "DC1"); points.Add(new Point(200, 100, true, dic1)); - Dictionary dic2 = new Dictionary(); + Dictionary dic2 = new Dictionary(); dic2.Add("Country", "UK"); dic2.Add("DeviceType", "Mobile"); dic2.Add("DataCenter", "DC1"); points.Add(new Point(1000, 100, true, dic2)); - Dictionary dic3 = new Dictionary(); + Dictionary dic3 = new Dictionary(); dic3.Add("Country", "UK"); dic3.Add("DeviceType", _aggSymbol); dic3.Add("DataCenter", "DC1"); points.Add(new Point(1200, 200, true, dic3)); - Dictionary dic4 = new Dictionary(); + Dictionary dic4 = new Dictionary(); dic4.Add("Country", "UK"); dic4.Add("DeviceType", "Laptop"); dic4.Add("DataCenter", "DC2"); points.Add(new Point(100, 100, false, dic4)); - Dictionary dic5 = new Dictionary(); + Dictionary dic5 = new Dictionary(); dic5.Add("Country", "UK"); dic5.Add("DeviceType", "Mobile"); dic5.Add("DataCenter", "DC2"); points.Add(new Point(200, 200, false, dic5)); - Dictionary dic6 = new Dictionary(); + Dictionary dic6 = new Dictionary(); dic6.Add("Country", "UK"); dic6.Add("DeviceType", _aggSymbol); dic6.Add("DataCenter", "DC2"); points.Add(new Point(300, 300, false, dic6)); - Dictionary dic7 = new Dictionary(); + Dictionary dic7 = new Dictionary(); dic7.Add("Country", "UK"); dic7.Add("DeviceType", _aggSymbol); dic7.Add("DataCenter", _aggSymbol); points.Add(new Point(1500, 500, true, dic7)); - Dictionary dic8 = new Dictionary(); + Dictionary dic8 = new Dictionary(); dic8.Add("Country", "UK"); dic8.Add("DeviceType", "Laptop"); dic8.Add("DataCenter", _aggSymbol); points.Add(new Point(300, 200, true, dic8)); - Dictionary dic9 = new Dictionary(); + Dictionary dic9 = new Dictionary(); dic9.Add("Country", "UK"); dic9.Add("DeviceType", "Mobile"); dic9.Add("DataCenter", _aggSymbol); @@ -656,9 +672,9 @@ private static List GetRootCauseLocalizationPoints() return points; } - private static Dictionary GetAnomalyDimension() + private static Dictionary GetAnomalyDimension() { - Dictionary dim = new Dictionary(); + Dictionary dim = new Dictionary(); dim.Add("Country", "UK"); dim.Add("DeviceType", _aggSymbol); dim.Add("DataCenter", _aggSymbol); From 98637db8a20ed2112f4f3015fd757e0b87e1debe Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 8 Apr 2020 09:26:32 +0800 Subject: [PATCH 30/49] update --- .../DTRootCauseAnalyzer.cs | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index c6d184e36a..179ca2da16 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -46,7 +46,11 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) Dictionary dimPointMapping = pointInfo.Item3; //which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension - if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0) + if (anomalyTree.ParentNode == null) + { + return dst; + } + if (dimPointMapping.Count == 0) { return dst; } @@ -54,7 +58,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) dst.Items = new List(); dst.Items.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, src.AnomalyDimension, dimensionInfo.AggDims)); - GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta); + GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta, pointTree); return dst; } @@ -319,9 +323,9 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) } } - private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta) + private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree) { - Point anomalyPoint = dimPointMapping[GetDicCode(anomalyRoot)]; + Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot, _src.AggType, _src.AggSymbol, pointTree); if (dst.Items.Count > 1) { //get surprise value and explanary power value @@ -331,7 +335,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = dimPointMapping[GetDicCode(item.Dimension)]; + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension, _src.AggType, _src.AggSymbol, pointTree); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -352,7 +356,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } else if (dst.Items.Count == 1) { - Point rootCausePoint = dimPointMapping[GetDicCode(dst.Items[0].Dimension)]; + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension, _src.AggType, _src.AggSymbol, pointTree); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -362,6 +366,15 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } } + private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension, AggregateType aggType, String aggSymbol, PointTree pointTree) + { + if (dimPointMapping.ContainsKey(GetDicCode(dimension))) + { + return dimPointMapping[GetDicCode(dimension)]; + } + return null; + } + private static string GetDicCode(Dictionary dic) { return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray()); From 4ff2ed1b13d7c22a2d010ec787bb85acdcefba8f Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 8 Apr 2020 14:41:55 +0800 Subject: [PATCH 31/49] update --- .../DTRootCauseAnalyzer.cs | 61 ++++++++----------- .../DTRootCauseLocalizationType.cs | 2 +- 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 179ca2da16..e2007b37d3 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -33,13 +33,9 @@ public RootCause Analyze() private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) { RootCause dst = new RootCause(); - DimensionInfo dimensionInfo = SeperateDimension(src.AnomalyDimension, src.AggSymbol); - //no aggregation dimension - if (dimensionInfo.AggDims.Count == 0) - { - return dst; - } + dst.Items = new List(); + DimensionInfo dimensionInfo = SeperateDimension(src.AnomalyDimension, src.AggSymbol); Tuple> pointInfo = GetPointsInfo(src, dimensionInfo, src.AggSymbol); PointTree pointTree = pointInfo.Item1; PointTree anomalyTree = pointInfo.Item2; @@ -55,9 +51,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) return dst; } - dst.Items = new List(); dst.Items.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, src.AnomalyDimension, dimensionInfo.AggDims)); - GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta, pointTree); return dst; @@ -144,7 +138,7 @@ protected List LocalizeRootCauseByDimension(PointTree anomalyTree BestDimension best = null; if (anomalyTree.ChildrenNodes.Count == 0) { - //has no children node information, should use the leaves node(whose point has no aggrgated dimensions)information + //has no children node information, should use the leaves node(whose point has no aggrgated dimensions) information best = SelectBestDimension(pointTree.Leaves, anomalyTree.Leaves, aggDims); } else @@ -161,19 +155,13 @@ protected List LocalizeRootCauseByDimension(PointTree anomalyTree List children = null; if (anomalyTree.ChildrenNodes.ContainsKey(best.DimensionKey)) { - children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, pointTree.ChildrenNodes[best.DimensionKey].Count > 0 ? pointTree.ChildrenNodes[best.DimensionKey] : pointTree.Leaves, best.DimensionKey); + //Use children node information to get top anomalies + children = GetTopAnomaly(anomalyTree.ChildrenNodes[best.DimensionKey], anomalyTree.ParentNode, pointTree.ChildrenNodes[best.DimensionKey].Count > 0 ? pointTree.ChildrenNodes[best.DimensionKey] : pointTree.Leaves, best.DimensionKey, !(pointTree.ChildrenNodes[best.DimensionKey].Count > 0)); } else { - if (best.AnomalyDis.Count > 0) - { - children = new List(); - foreach (string dimValue in best.AnomalyDis.Keys.ToArray()) - { - Point p = new Point(UpdateDimensionValue(anomalyDimension, best.DimensionKey, dimValue)); - children.Add(p); - } - } + //Use leaves node informatin to get top anomalies + children = GetTopAnomaly(anomalyTree.Leaves, anomalyTree.ParentNode, pointTree.Leaves, best.DimensionKey, true); } if (children == null) @@ -204,7 +192,7 @@ protected double GetEntropy(int totalNum, int anomalyNum) return -(ratio * Log2(ratio) + (1 - ratio) * Log2(1 - ratio)); } - protected List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey) + protected List GetTopAnomaly(List anomalyPoints, Point root, List totalPoints, string dimKey, bool isLeaveslevel = false) { Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); @@ -236,7 +224,7 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< preDelta = anomaly.Delta; } - int pointSize = GetTotalNumber(pointDistribution); + int pointSize = isLeaveslevel ? pointDistribution.Count : GetTotalNumber(pointDistribution); if (ShouldSeperateAnomaly(delta, root.Delta, pointSize, causeList.Count)) { return causeList; @@ -245,6 +233,7 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< return null; } + //Use leaves point information to select best dimension protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim) { double totalEntropy = GetEntropy(totalPoints.Count, anomalyPoints.Count); @@ -276,10 +265,11 @@ protected BestDimension SelectBestDimension(List totalPoints, List return best; } + //Use children point information to select best dimension private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim) { - Dictionary entropyGainMap = new Dictionary(); - Dictionary entropyGainRatioMap = new Dictionary(); + Dictionary entropyMap = new Dictionary(); + Dictionary entropyRatioMap = new Dictionary(); double sumGain = 0; foreach (string dimKey in aggDim) @@ -297,17 +287,17 @@ private BestDimension SelectBestDimension(Dictionary> pointC } double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count); - entropyGainMap.Add(dimension, entropy); + entropyMap.Add(dimension, entropy); double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis); - entropyGainRatioMap.Add(dimension, gainRatio); + entropyRatioMap.Add(dimension, gainRatio); sumGain += entropy; } double meanGain = sumGain / aggDim.Count; - BestDimension best = FindBestDimension(entropyGainMap, entropyGainRatioMap, meanGain, false); + BestDimension best = FindBestDimension(entropyMap, entropyRatioMap, meanGain, false); return best; } @@ -325,7 +315,7 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree) { - Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot, _src.AggType, _src.AggSymbol, pointTree); + Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot); if (dst.Items.Count > 1) { //get surprise value and explanary power value @@ -335,7 +325,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension, _src.AggType, _src.AggSymbol, pointTree); + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -356,7 +346,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } else if (dst.Items.Count == 1) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension, _src.AggType, _src.AggSymbol, pointTree); + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -366,7 +356,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } } - private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension, AggregateType aggType, String aggSymbol, PointTree pointTree) + private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension) { if (dimPointMapping.ContainsKey(GetDicCode(dimension))) { @@ -416,16 +406,16 @@ private void BuildTree(PointTree tree, List aggDims, Point point, string } } - private BestDimension FindBestDimension(Dictionary entropyGainMap, Dictionary entropyGainRationMap, double meanGain, bool isLeavesLevel = true) + private BestDimension FindBestDimension(Dictionary valueMap, Dictionary valueRatioMap, double meanGain, bool isLeavesLevel = true) { BestDimension best = null; - foreach (KeyValuePair dimension in entropyGainMap) + foreach (KeyValuePair dimension in valueMap) { - if (dimension.Key.AnomalyDis.Count == 1 || dimension.Value <= meanGain) + if (dimension.Key.AnomalyDis.Count == 1 || (isLeavesLevel ? dimension.Value >= meanGain : dimension.Value <= meanGain)) { if (dimension.Key.AnomalyDis.Count > 1) { - if (best == null || (best.AnomalyDis.Count != 1 && (isLeavesLevel ? entropyGainRationMap[best].CompareTo(dimension.Value) <= 0 : entropyGainRationMap[best].CompareTo(dimension.Value) >= 0))) + if (best == null || (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))) { best = dimension.Key; } @@ -438,7 +428,7 @@ private BestDimension FindBestDimension(Dictionary entrop } else { - if ((isLeavesLevel ? entropyGainRationMap[best].CompareTo(dimension.Value) <= 0 : entropyGainRationMap[best].CompareTo(dimension.Value) >= 0)) + if ((isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) { best = dimension.Key; } @@ -614,7 +604,6 @@ public static PointTree CreateDefaultInstance() public sealed class BestDimension { public string DimensionKey; - public double Entropy; public Dictionary AnomalyDis; public Dictionary PointDis; diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 7ef1c60eb5..8759fb9d03 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -199,7 +199,7 @@ public enum AggregateType /// /// Make the aggregate type as count. /// - Count = 4 + Count = 5 } public enum AnomalyDirection From c22ad50ea32125dc905f22ad2e541d704d1eb88c Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 13 Apr 2020 10:48:22 +0800 Subject: [PATCH 32/49] updated test --- .../TimeSeriesDirectApi.cs | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index cbca2157df..5696c032f9 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -598,7 +598,7 @@ public void RootCauseLocalizationWithDT() var dummyData = ml.Data.LoadFromEnumerable(new List() { "Test"}); //Create path - var modelPath = "temp.zip"; + var modelPath = "DTRootCauseLocalizationModel.zip"; //Save model to a file ml.Model.Save(model, dummyData.Schema, modelPath); @@ -608,6 +608,18 @@ public void RootCauseLocalizationWithDT() { serializedModel = ml.Model.Load(file, out var serializedSchema); TestCommon.CheckSameSchemas(dummyData.Schema, serializedSchema); + + var serializedEngine = ml.Model.CreatePredictionEngine(serializedModel); + var returnedRootCause = serializedEngine.Predict(newRootCauseInput); + + Assert.NotNull(returnedRootCause); + Assert.Equal(1, (int)returnedRootCause.RootCause.Items.Count); + + foreach (KeyValuePair pair in returnedRootCause.RootCause.Items[0].Dimension) + { + Assert.Equal(expectedDim[pair.Key], pair.Value); + } + DeleteOutputPath(modelPath); } } From ea7ddbe5cb1f7ff97e0eadb8060037464b6bc0d8 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Wed, 15 Apr 2020 12:17:34 +0800 Subject: [PATCH 33/49] update score --- .../TimeSeries/LocalizeRootCauseByDT.cs | 2 +- .../DTRootCauseAnalyzer.cs | 113 +++++++++++++++--- 2 files changed, 100 insertions(+), 15 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs index b5f62dad41..2d8f0a4e83 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs @@ -49,7 +49,7 @@ public static void Example() } //Item #1 ... - //Score: 1, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] + //Score: 0.26670448876705927, Path: DataCenter, Direction: Up, Dimension:[Country, UK] [DeviceType, ##SUM##] [DataCenter, DC1] } private static List GetPoints() diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index e2007b37d3..140b77158f 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -52,7 +52,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) } dst.Items.AddRange(LocalizeRootCauseByDimension(anomalyTree, pointTree, src.AnomalyDimension, dimensionInfo.AggDims)); - GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta, pointTree); + GetRootCauseDirectionAndScore(dimPointMapping, src.AnomalyDimension, dst, _beta, pointTree, src.AggType, src.AggSymbol); return dst; } @@ -313,9 +313,9 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) } } - private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree) + private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, string aggSymbol) { - Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot); + Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); if (dst.Items.Count > 1) { //get surprise value and explanary power value @@ -325,7 +325,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension); + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -337,16 +337,16 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } } - //normalize and get final score + //get final score for (int i = 0; i < scoreList.Count; i++) { - dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise / sumSurprise, Math.Abs(scoreList[i].ExplainaryScore) / sumEp, beta); + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); } } else if (dst.Items.Count == 1) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension); + Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -356,13 +356,46 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } } - private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension) + private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, string aggSymbol) { if (dimPointMapping.ContainsKey(GetDicCode(dimension))) { return dimPointMapping[GetDicCode(dimension)]; } - return null; + + int count = 0; + Point p = new Point(dimension); + DimensionInfo dimensionInfo = SeperateDimension(dimension, aggSymbol); + Dictionary subDim = GetSubDim(dimension, dimensionInfo.DetailDims); + + foreach (Point leave in pointTree.Leaves) + { + if (ContainsAll(leave.Dimension, subDim)) + { + count++; + + p.Value = +leave.Value; + p.ExpectedValue = +leave.ExpectedValue; + p.Delta = +leave.Delta; + } + + } + if (aggType.Equals(AggregateType.Avg)) + { + p.Value = p.Value / count; + p.ExpectedValue = p.ExpectedValue / count; + p.Delta = p.Delta / count; + } + + if (count > 0) + { + return p; + + } + else + { + return null; + } } private static string GetDicCode(Dictionary dic) @@ -442,22 +475,70 @@ private BestDimension FindBestDimension(Dictionary valueM private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { - double p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; - double q = rootCausePoint.Value / anomalyPoint.Value; - double surprise = 0.5 * (p * Log2(2 * p / (p + q)) + q * Log2(2 * q / (p + q))); + double p; + double q; + + if (anomalyPoint.ExpectedValue == 0) + { + p = 0; + } + else + { + p = rootCausePoint.ExpectedValue / anomalyPoint.ExpectedValue; + } + + if (anomalyPoint.Value == 0) + { + q = 0; + } + else + { + q = rootCausePoint.Value / anomalyPoint.Value; + } + + double surprise = 0; + + if (p == 0) + { + surprise = 0.5 * (q * Log2(2 * q / (p + q))); + } + else if (q == 0) + { + surprise = 0.5 * (p * Log2(2 * p / (p + q))); + } + else + { + surprise = 0.5 * (p * Log2(2 * p / (p + q)) + q * Log2(2 * q / (p + q))); + } return surprise; } private double GetFinalScore(double surprise, double ep, double beta) { - return Math.Max(1, beta * surprise + (1 - beta) * ep); + double a = 0; + double b = 0; + if (surprise == 0) + { + a = 0; + } + if (ep == 0) + { + b = 0; + } + else + { + a = (1 - Math.Pow(2, -surprise)); + b = (1 - Math.Pow(2, -ep)); + } + return beta * a + (1 - beta) * b; } private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) { double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); - double ep = (rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue); + + double ep = anomalyPoint.Value - anomalyPoint.ExpectedValue == 0 ? 0 : Math.Abs((rootCausePoint.Value - rootCausePoint.ExpectedValue) / (anomalyPoint.Value - anomalyPoint.ExpectedValue)); return new Tuple(surprise, ep); } @@ -546,6 +627,10 @@ private void UpdateDistribution(Dictionary distribution, List Date: Fri, 17 Apr 2020 14:24:48 +0800 Subject: [PATCH 34/49] update variable name --- src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index 140b77158f..c6ccc3dfa8 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -315,24 +315,19 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, string aggSymbol) { - Point anomalyPoint = GetPointByDimenstion(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); + Point anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); if (dst.Items.Count > 1) { //get surprise value and explanary power value - double sumSurprise = 0; - double sumEp = 0; List scoreList = new List(); foreach (RootCauseItem item in dst.Items) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); + Point rootCausePoint = GetPointByDimension(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); - sumSurprise += scores.Item1; - sumEp += Math.Abs(scores.Item2); - item.Direction = GetRootCauseDirection(rootCausePoint); } } @@ -346,7 +341,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } else if (dst.Items.Count == 1) { - Point rootCausePoint = GetPointByDimenstion(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); + Point rootCausePoint = GetPointByDimension(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); @@ -356,7 +351,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } } - private Point GetPointByDimenstion(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, string aggSymbol) + private Point GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, string aggSymbol) { if (dimPointMapping.ContainsKey(GetDicCode(dimension))) { From 8d17c3cdb57f0594318a5be3bf915294f4d4aa1b Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 21 Apr 2020 17:38:18 +0800 Subject: [PATCH 35/49] add some comments --- .../DTRootCauseAnalyzer.cs | 22 ++++++++++++++----- .../DTRootCauseLocalizationType.cs | 6 +++++ 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index c6ccc3dfa8..e7e6f4e671 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -237,7 +237,7 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim) { double totalEntropy = GetEntropy(totalPoints.Count, anomalyPoints.Count); - Dictionary entroyGainMap = new Dictionary(); + SortedDictionary entroyGainMap = new SortedDictionary(); Dictionary entroyGainRatioMap = new Dictionary(); double sumGain = 0; @@ -268,7 +268,7 @@ protected BestDimension SelectBestDimension(List totalPoints, List //Use children point information to select best dimension private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim) { - Dictionary entropyMap = new Dictionary(); + SortedDictionary entropyMap = new SortedDictionary(); Dictionary entropyRatioMap = new Dictionary(); double sumGain = 0; @@ -434,7 +434,7 @@ private void BuildTree(PointTree tree, List aggDims, Point point, string } } - private BestDimension FindBestDimension(Dictionary valueMap, Dictionary valueRatioMap, double meanGain, bool isLeavesLevel = true) + private BestDimension FindBestDimension(SortedDictionary valueMap, Dictionary valueRatioMap, double meanGain, bool isLeavesLevel = true) { BestDimension best = null; foreach (KeyValuePair dimension in valueMap) @@ -443,7 +443,7 @@ private BestDimension FindBestDimension(Dictionary valueM { if (dimension.Key.AnomalyDis.Count > 1) { - if (best == null || (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))) + if (best == null || (!Double.IsNaN(valueRatioMap[best]) && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))) { best = dimension.Key; } @@ -456,7 +456,7 @@ private BestDimension FindBestDimension(Dictionary valueM } else { - if ((isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) + if (!Double.IsNaN(valueRatioMap[best]) && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) { best = dimension.Key; } @@ -681,7 +681,7 @@ public static PointTree CreateDefaultInstance() } } - public sealed class BestDimension + public sealed class BestDimension: IComparable { public string DimensionKey; public Dictionary AnomalyDis; @@ -695,6 +695,16 @@ public static BestDimension CreateDefaultInstance() instance.PointDis = new Dictionary(); return instance; } + + public int CompareTo(object obj) { + if (obj == null) return 1; + + BestDimension other = obj as BestDimension; + if (other != null) + return DimensionKey.CompareTo(other.DimensionKey); + else + throw new ArgumentException("Object is not a BestDimension"); + } } public class RootCauseScore diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs index 8759fb9d03..8f92110939 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs @@ -93,12 +93,16 @@ public RootCause() public sealed class RootCauseLocalizationInput { + //When the anomaly incident occurs public DateTime AnomalyTimestamp { get; set; } + //Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause public Dictionary AnomalyDimension { get; set; } + //A list of points at different timestamp. If the slices don't contain point data corresponding to the anomaly timestamp, the root cause localization alogorithm will not calculate the root cause as no information at the anomaly timestamp is provided. public List Slices { get; set; } + //The aggregated symbol in the AnomalyDimension and point dimension should be consistent public AggregateType AggType { get; set; } public string AggSymbol { get; set; } @@ -217,6 +221,7 @@ public enum AnomalyDirection public sealed class RootCauseItem : IEquatable { public double Score; + //Path is a list of the dimension key that the libary selected for you. In this root cause localization library, for one time call for the library, the path will be obtained and the length of path list will always be 1. Different RootCauseItem obtained from one library call will have the same path as it is the best dimension selected for the input. public List Path; public Dictionary Dimension; public AnomalyDirection Direction; @@ -266,6 +271,7 @@ public sealed class Point : IEquatable public double Value { get; set; } public double ExpectedValue { get; set; } public bool IsAnomaly { get; set; } + //The value for this dictionary is an object, when the Dimension is used, the equals function for the Object will be used. If you have a customized class, you need to define the Equals function. public Dictionary Dimension { get; set; } public double Delta { get; set; } From 66b614a3da6568d8c24f66a2558e58b91b09e7da Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 23 Apr 2020 14:55:41 +0800 Subject: [PATCH 36/49] refine internal function --- src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index e7e6f4e671..fd72ab09d6 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -36,7 +36,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) dst.Items = new List(); DimensionInfo dimensionInfo = SeperateDimension(src.AnomalyDimension, src.AggSymbol); - Tuple> pointInfo = GetPointsInfo(src, dimensionInfo, src.AggSymbol); + Tuple> pointInfo = GetPointsInfo(src, dimensionInfo); PointTree pointTree = pointInfo.Item1; PointTree anomalyTree = pointInfo.Item2; Dictionary dimPointMapping = pointInfo.Item3; @@ -90,7 +90,7 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, return info; } - protected Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo, string aggSymbol) + protected Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) { PointTree pointTree = PointTree.CreateDefaultInstance(); PointTree anomalyTree = PointTree.CreateDefaultInstance(); @@ -109,11 +109,11 @@ protected Tuple> GetPointsInfo(R bool isValidPoint = point.IsAnomaly == true; if (ContainsAll(point.Dimension, subDim)) { - BuildTree(pointTree, dimensionInfo.AggDims, point, aggSymbol); + BuildTree(pointTree, dimensionInfo.AggDims, point, src.AggSymbol); if (isValidPoint) { - BuildTree(anomalyTree, dimensionInfo.AggDims, point, aggSymbol); + BuildTree(anomalyTree, dimensionInfo.AggDims, point, src.AggSymbol); } } } @@ -681,7 +681,7 @@ public static PointTree CreateDefaultInstance() } } - public sealed class BestDimension: IComparable + public sealed class BestDimension : IComparable { public string DimensionKey; public Dictionary AnomalyDis; @@ -696,7 +696,8 @@ public static BestDimension CreateDefaultInstance() return instance; } - public int CompareTo(object obj) { + public int CompareTo(object obj) + { if (obj == null) return 1; BestDimension other = obj as BestDimension; From 12e7e18a8088ff0e5ce8e9f8c594913294c084f6 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 24 Apr 2020 15:16:21 +0800 Subject: [PATCH 37/49] handle for infinity and nan --- .../DTRootCauseAnalyzer.cs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs index fd72ab09d6..90b96a3d9a 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs @@ -197,7 +197,7 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< Dictionary pointDistribution = new Dictionary(); UpdateDistribution(pointDistribution, totalPoints, dimKey); - anomalyPoints.OrderBy(x => x.Delta); + anomalyPoints = anomalyPoints.OrderBy(x => x.Delta).ToList(); if (root.Delta > 0) { @@ -251,9 +251,15 @@ protected BestDimension SelectBestDimension(List totalPoints, List double relativeEntropy = GetDimensionEntropy(dimension.PointDis, dimension.AnomalyDis); double gain = totalEntropy - relativeEntropy; + if (Double.IsNaN(gain)) { + gain = 0; + } entroyGainMap.Add(dimension, gain); double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis); + if (Double.IsInfinity(gainRatio)) { + gainRatio = 0; + } entroyGainRatioMap.Add(dimension, gainRatio); sumGain += gain; @@ -287,9 +293,16 @@ private BestDimension SelectBestDimension(Dictionary> pointC } double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count); + if (Double.IsNaN(entropy)) { + entropy = Double.MaxValue; + } entropyMap.Add(dimension, entropy); double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis); + + if (Double.IsInfinity(gainRatio)) { + gainRatio = 0; + } entropyRatioMap.Add(dimension, gainRatio); sumGain += entropy; From e2136159c03d8c91146be94f676f28660c9ab0a1 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sun, 26 Apr 2020 10:45:32 +0800 Subject: [PATCH 38/49] rename the algorithm by removing DT --- ...=> time-series-root-cause-localization.md} | 0 ...eRootCauseByDT.cs => LocalizeRootCause.cs} | 12 ++--- .../ExtensionsCatalog.cs | 8 +-- ...tCauseAnalyzer.cs => RootCauseAnalyzer.cs} | 4 +- ...calization.cs => RootCauseLocalization.cs} | 50 +++++++++---------- ...onType.cs => RootCauseLocalizationType.cs} | 0 .../TimeSeriesDirectApi.cs | 6 +-- 7 files changed, 40 insertions(+), 40 deletions(-) rename docs/api-reference/{time-series-root-cause-localization-dt.md => time-series-root-cause-localization.md} (100%) rename docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/{LocalizeRootCauseByDT.cs => LocalizeRootCause.cs} (92%) rename src/Microsoft.ML.TimeSeries/{DTRootCauseAnalyzer.cs => RootCauseAnalyzer.cs} (99%) rename src/Microsoft.ML.TimeSeries/{DTRootCauseLocalization.cs => RootCauseLocalization.cs} (81%) rename src/Microsoft.ML.TimeSeries/{DTRootCauseLocalizationType.cs => RootCauseLocalizationType.cs} (100%) diff --git a/docs/api-reference/time-series-root-cause-localization-dt.md b/docs/api-reference/time-series-root-cause-localization.md similarity index 100% rename from docs/api-reference/time-series-root-cause-localization-dt.md rename to docs/api-reference/time-series-root-cause-localization.md diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs similarity index 92% rename from docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs rename to docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs index 2d8f0a4e83..f51098c2e3 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCauseByDT.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs @@ -5,7 +5,7 @@ namespace Samples.Dynamic { - public static class LocalizeRootCauseByDT + public static class LocalizeRootCause { private static string AGG_SYMBOL = "##SUM##"; public static void Example() @@ -14,17 +14,17 @@ public static void Example() // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'DTRootCauseLocalization' API does not - // require training data as the estimator ('DTRootCauseLocalizationEstimator') - // created by 'DTRootCauseLocalization' API is not a trainable estimator. The + // Create an empty list as the dataset. The 'RootCauseLocalization' API does not + // require training data as the estimator ('RootCauseLocalizationEstimator') + // created by 'RootCauseLocalization' API is not a trainable estimator. The // empty list is only needed to pass input schema to the pipeline. var emptySamples = new List(); // Convert sample list to an empty IDataView. var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); - // A pipeline for localizeing root cause. - var localizePipeline = mlContext.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + // A pipeline for localizing root cause. + var localizePipeline = mlContext.Transforms.LocalizeRootCause(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); // Fit to data. var localizeTransformer = localizePipeline.Fit(emptyDataView); diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 6e591606b8..b41b4bbea5 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -147,7 +147,7 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Create , which localizes root causes using decision tree algorithm. + /// Create , which localizes root causes using decision tree algorithm. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . @@ -158,12 +158,12 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// /// /// /// /// - public static DTRootCauseLocalizationEstimator LocalizeRootCauseByDT(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta = 0.5) - => new DTRootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, beta); + public static RootCauseLocalizationEstimator LocalizeRootCause(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta = 0.5) + => new RootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, beta); /// /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs similarity index 99% rename from src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs rename to src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 90b96a3d9a..339ee4d9cd 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -9,7 +9,7 @@ namespace Microsoft.ML.TimeSeries { - public class DTRootCauseAnalyzer + public class RootCauseAnalyzer { private static double _anomalyRatioThreshold = 0.5; private static double _anomalyDeltaThreshold = 0.95; @@ -18,7 +18,7 @@ public class DTRootCauseAnalyzer private RootCauseLocalizationInput _src; private double _beta; - public DTRootCauseAnalyzer(RootCauseLocalizationInput src, double beta) + public RootCauseAnalyzer(RootCauseLocalizationInput src, double beta) { _src = src; _beta = beta; diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs similarity index 81% rename from src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs rename to src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs index 57becd0524..eea280f1e4 100644 --- a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs @@ -14,32 +14,32 @@ using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; -[assembly: LoadableClass(typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadModel), - DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] +[assembly: LoadableClass(typeof(RootCauseLocalizationTransformer), null, typeof(SignatureLoadModel), + RootCauseLocalizationTransformer.UserName, RootCauseLocalizationTransformer.LoaderSignature)] -[assembly: LoadableClass(typeof(IRowMapper), typeof(DTRootCauseLocalizationTransformer), null, typeof(SignatureLoadRowMapper), - DTRootCauseLocalizationTransformer.UserName, DTRootCauseLocalizationTransformer.LoaderSignature)] +[assembly: LoadableClass(typeof(IRowMapper), typeof(RootCauseLocalizationTransformer), null, typeof(SignatureLoadRowMapper), + RootCauseLocalizationTransformer.UserName, RootCauseLocalizationTransformer.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries { /// - /// resulting from fitting an . + /// resulting from fitting an . /// - public sealed class DTRootCauseLocalizationTransformer : OneToOneTransformerBase + public sealed class RootCauseLocalizationTransformer : OneToOneTransformerBase { internal const string Summary = "Localize root cause for anomaly."; - internal const string UserName = "DT Root Cause Localization Transform"; - internal const string LoaderSignature = "DTRootCauseTransform"; + internal const string UserName = "Root Cause Localization Transform"; + internal const string LoaderSignature = "RootCauseTransform"; private static VersionInfo GetVersionInfo() { return new VersionInfo( - modelSignature: "DTROOTCL", + modelSignature: "ROOTCAUSE", verWrittenCur: 0x00010001, // Initial verReadableCur: 0x00010001, verWeCanReadBack: 0x00010001, loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(DTRootCauseLocalizationTransformer).Assembly.FullName); + loaderAssemblyName: typeof(RootCauseLocalizationTransformer).Assembly.FullName); } private const string RegistrationName = "RootCauseLocalization"; @@ -70,7 +70,7 @@ internal class Options : TransformInputBase public string Output; [Argument(ArgumentType.AtMostOnce, HelpText = "Weight for getting the score for the root cause item.", ShortName = "Beta", SortOrder = 2)] - public double Beta = DTRootCauseLocalizationEstimator.Defaults.Beta; + public double Beta = RootCauseLocalizationEstimator.Defaults.Beta; } @@ -88,7 +88,7 @@ internal class Options : TransformInputBase /// Weight for generating score. /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). - internal DTRootCauseLocalizationTransformer(IHostEnvironment env, double beta = DTRootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) + internal RootCauseLocalizationTransformer(IHostEnvironment env, double beta = RootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns) { Host.CheckUserArg(beta >= 0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); @@ -97,16 +97,16 @@ internal DTRootCauseLocalizationTransformer(IHostEnvironment env, double beta = } // Factory method for SignatureLoadModel. - private static DTRootCauseLocalizationTransformer Create(IHostEnvironment env, ModelLoadContext ctx) + private static RootCauseLocalizationTransformer Create(IHostEnvironment env, ModelLoadContext ctx) { Contracts.CheckValue(env, nameof(env)); var host = env.Register(RegistrationName); host.CheckValue(ctx, nameof(ctx)); ctx.CheckAtModel(GetVersionInfo()); - return new DTRootCauseLocalizationTransformer(host, ctx); + return new RootCauseLocalizationTransformer(host, ctx); } - private DTRootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) + private RootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) : base(host, ctx) { // *** Binary format *** @@ -148,9 +148,9 @@ private protected override void CheckInputColumn(DataViewSchema inputSchema, int private sealed class Mapper : OneToOneMapperBase { - private readonly DTRootCauseLocalizationTransformer _parent; + private readonly RootCauseLocalizationTransformer _parent; - public Mapper(DTRootCauseLocalizationTransformer parent, DataViewSchema inputSchema) + public Mapper(RootCauseLocalizationTransformer parent, DataViewSchema inputSchema) : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) { _parent = parent; @@ -220,19 +220,19 @@ private void CheckRootCauseInput(RootCauseLocalizationInput src, IHost host) private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause dst) { - DTRootCauseAnalyzer analyzer = new DTRootCauseAnalyzer(src, _parent._beta); + RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, _parent._beta); dst = analyzer.Analyze(); } } } /// - /// for the . + /// for the . /// /// /// | /// | Exportable to ONNX | No | /// - /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-localization-dt.md)] + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-localization.md)] /// /// The resulting creates a new column, named as specified in the output column name parameters, and /// localize the root causes which contribute most to the anomaly. @@ -249,8 +249,8 @@ private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause ds /// ]]> /// /// - /// - public sealed class DTRootCauseLocalizationEstimator : TrivialEstimator + /// + public sealed class RootCauseLocalizationEstimator : TrivialEstimator { internal static class Defaults { @@ -265,8 +265,8 @@ internal static class Defaults /// Name of input column to run the root cause localization. /// The weight for generating score in output result. [BestFriend] - internal DTRootCauseLocalizationEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName, double beta = Defaults.Beta) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(DTRootCauseLocalizationEstimator)), new DTRootCauseLocalizationTransformer(env, beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) })) + internal RootCauseLocalizationEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName, double beta = Defaults.Beta) + : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RootCauseLocalizationEstimator)), new RootCauseLocalizationTransformer(env, beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) })) { } diff --git a/src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs similarity index 100% rename from src/Microsoft.ML.TimeSeries/DTRootCauseLocalizationType.cs rename to src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 5696c032f9..1e3817350b 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -549,7 +549,7 @@ public void AnomalyDetectionWithSrCnn() } [Fact] - public void RootCauseLocalizationWithDT() + public void RootCauseLocalization() { // Create an root cause localizatiom input list. var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, _aggSymbol) }; @@ -559,7 +559,7 @@ public void RootCauseLocalizationWithDT() var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCauseByDT(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); + var pipeline = ml.Transforms.LocalizeRootCause(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); // Fit the model. var model = pipeline.Fit(data); @@ -598,7 +598,7 @@ public void RootCauseLocalizationWithDT() var dummyData = ml.Data.LoadFromEnumerable(new List() { "Test"}); //Create path - var modelPath = "DTRootCauseLocalizationModel.zip"; + var modelPath = "RootCauseLocalizationModel.zip"; //Save model to a file ml.Model.Save(model, dummyData.Schema, modelPath); From 30915cd3e210407f992011d186903d8a11898fae Mon Sep 17 00:00:00 2001 From: Shakira Sun <61308292+suxi-ms@users.noreply.github.com> Date: Mon, 27 Apr 2020 09:58:54 +0800 Subject: [PATCH 39/49] Update src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs Co-Authored-By: Justin Ormont --- src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 339ee4d9cd..5518087ea6 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -349,7 +349,6 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap for (int i = 0; i < scoreList.Count; i++) { dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); - } } else if (dst.Items.Count == 1) From fda4ec7853055d5b08f77f8331bfcb756952d4f3 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 27 Apr 2020 10:27:45 +0800 Subject: [PATCH 40/49] fix type --- src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs | 5 +++-- src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs | 2 +- src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 339ee4d9cd..8962c3d7e6 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -349,7 +349,6 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap for (int i = 0; i < scoreList.Count; i++) { dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); - } } else if (dst.Items.Count == 1) @@ -398,7 +397,6 @@ private Point GetPointByDimension(Dictionary dimPointMapping, Dic if (count > 0) { return p; - } else { @@ -539,6 +537,7 @@ private double GetFinalScore(double surprise, double ep, double beta) a = (1 - Math.Pow(2, -surprise)); b = (1 - Math.Pow(2, -ep)); } + return beta * a + (1 - beta) * b; } @@ -639,6 +638,7 @@ public double Log2(double val) { return 0; } + return Math.Log(val) / Math.Log(2); } @@ -656,6 +656,7 @@ public static bool ContainsAll(Dictionary bigDic, Dictionary creates a new column, named as specified in the output column name parameters, and + /// The resulting creates a new column, named as specified in the output column name parameters, and /// localize the root causes which contribute most to the anomaly. /// Check the See Also section for links to usage examples. /// ]]> diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs index 8f92110939..78a48e5d57 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -15,7 +15,7 @@ namespace Microsoft.ML.TimeSeries public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute { /// - /// Create a root cause localizagin input type. + /// Create a root cause localizing input type. /// public RootCauseLocalizationInputTypeAttribute() { From 620ef58b53c60a068f98de0f085993eae06ddf29 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 27 Apr 2020 17:06:28 +0800 Subject: [PATCH 41/49] add an else branch when delta is negative --- src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 8962c3d7e6..5f29d3645a 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -203,7 +203,9 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< { anomalyPoints.Reverse(); } - + else { + anomalyPoints = anomalyPoints.FindAll(x => x.Delta < 0); + } if (anomalyPoints.Count == 1) { return anomalyPoints; From 7f89feaacec54e795bbb59cdb4fdb994d73e1cba Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Tue, 28 Apr 2020 14:25:50 +0800 Subject: [PATCH 42/49] update model signature --- src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs index d90ce65d96..bfbc4af904 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs @@ -34,7 +34,7 @@ public sealed class RootCauseLocalizationTransformer : OneToOneTransformerBase private static VersionInfo GetVersionInfo() { return new VersionInfo( - modelSignature: "ROOTCAUSE", + modelSignature: "ROOTCAUS", verWrittenCur: 0x00010001, // Initial verReadableCur: 0x00010001, verWeCanReadBack: 0x00010001, From 42dcbc2208534de54712ed2295e2806d3b89e3f4 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 7 May 2020 15:47:10 +0800 Subject: [PATCH 43/49] update rca interface by removing transformer --- .../time-series-root-cause-localization.md | 22 +- .../TimeSeries/LocalizeRootCause.cs | 56 +--- .../ExtensionsCatalog.cs | 52 +++- .../RootCauseAnalyzer.cs | 33 +- .../RootCauseLocalization.cs | 294 ------------------ .../RootCauseLocalizationType.cs | 20 +- .../TimeSeriesDirectApi.cs | 118 ++----- 7 files changed, 116 insertions(+), 479 deletions(-) delete mode 100644 src/Microsoft.ML.TimeSeries/RootCauseLocalization.cs diff --git a/docs/api-reference/time-series-root-cause-localization.md b/docs/api-reference/time-series-root-cause-localization.md index 16994b8c48..3eecc838e2 100644 --- a/docs/api-reference/time-series-root-cause-localization.md +++ b/docs/api-reference/time-series-root-cause-localization.md @@ -1,13 +1,15 @@ -At Mircosoft, we develop a decision tree based root cause localization method which helps to find out the root causes for an anomaly incident incrementally. +At Mircosoft, we develop a decision tree based root cause localization method which helps to find out the root causes for an anomaly incident at a specific timestamp incrementally. ## Multi-Dimensional Root Cause Localization -It's a common case that one measure are collected with many dimensions (*e.g.*, Province, ISP) whose values are categorical(*e.g.*, Beijing or Shanghai for dimension Province). When a measure's value deviates from its expected value, this measure encounters anomalies. In such case, operators would like to localize the root cause dimension combinations rapidly and accurately. Multi-dimensional root cause localization is critical to troubleshoot and mitigate such case. +It's a common case that one measure is collected with many dimensions (*e.g.*, Province, ISP) whose values are categorical(*e.g.*, Beijing or Shanghai for dimension Province). When a measure's value deviates from its expected value, this measure encounters anomalies. In such case, operators would like to localize the root cause dimension combinations rapidly and accurately. Multi-dimensional root cause localization is critical to troubleshoot and mitigate such case. ## Algorithm -The decision based root cause localization method is unsupervised, which means training step is no needed. It consists of the following major steps: -(1) Find best dimension which divides the anomaly and unanomaly data based on decision tree according to entropy gain and entropy gain ratio. -(2) Find the top anomaly points for the selected best dimension. +The decision tree based root cause localization method is unsupervised, which means training step is not needed. It consists of the following major steps: + +(1) Find the best dimension which divides the anomalous and regular data based on decision tree according to entropy gain and entropy gain ratio. + +(2) Find the top anomaly points which contribute the most to anomaly incident given the selected best dimension. ### Decision Tree @@ -15,11 +17,11 @@ The decision based root cause localization method is unsupervised, which means t #### Information Entropy -Information [entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) is a measure of disorder or uncertainty. You can think of it as a measure of purity as well.The less the value , the more pure of data D. +Information [entropy](https://en.wikipedia.org/wiki/Entropy_(information_theory)) is a measure of disorder or uncertainty. You can think of it as a measure of purity as well. The less the value , the more pure of data D. $$Ent(D) = - \sum_{k=1}^{|y|} p_k\log_2(p_k) $$ -where $p_k$ represents the probability of an element in dataset. In our case, there are only two classed, the anomaly points and the normaly points. $|y|$ is the count of total anomalies. +where $p_k$ represents the probability of an element in dataset. In our case, there are only two classes, the anomalous points and the regular points. $|y|$ is the count of total anomalies. #### Information Gain [Information gain](https://en.wikipedia.org/wiki/Information_gain_in_decision_trees) is a metric to measure the reduction of this disorder in our target class given additional information about it. Mathematically it can be written as: @@ -36,12 +38,12 @@ Information gain is biased toward variables with large number of distinct values $$Ratio(D, a) = \frac{Gain(D,a)} {IV(a)} $$ -where intrinsic value(IV) is the entropy of split (with respect to dimension $a$ on focus). +where intrinsic value($IV$) is the entropy of split (with respect to dimension $a$ on focus). $$IV(a) = -\sum_{v=1}^V\frac{|D^v|} {|D|} \log_2 \frac{|D^v|} {|D|} $$ -In out strategy, firstly, for all the aggration dimensions, we loop all the dimensions to find the dimension who's entropy gain is above mean entropy gain ration, then from the filtered dimensions, we select the dimension with highest entropy ratio as the best dimension. In the meanwhile, dimensions for which the anomaly value count is only one, we include it when calculation. +In our strategy, firstly, for all the aggregated dimensions, we loop the dimension to find the dimension whose entropy gain is above mean entropy gain, then from the filtered dimensions, we select the dimension with highest entropy ratio as the best dimension. In the meanwhile, dimensions for which the anomaly value count is only one, we include it when calculation. > [!Note] > 1. As our algorithm depends on the data you input, so if the input points is incorrect or incomplete, the calculated result will be unexpected. -> 2. Currently, the algorithm localize the root cause incrementally, which means at most one dimension with the values are detected. If you want to find out all the dimension that contributes to the anomaly, you can call this API recursively. +> 2. Currently, the algorithm localize the root cause incrementally, which means at most one dimension with the values are detected. If you want to find out all the dimensions that contribute to the anomaly, you can call this API recursively by updating the anomaly incident with the fixed dimension value. diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs index f51098c2e3..572f9a58be 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs @@ -14,34 +14,16 @@ public static void Example() // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an empty list as the dataset. The 'RootCauseLocalization' API does not - // require training data as the estimator ('RootCauseLocalizationEstimator') - // created by 'RootCauseLocalization' API is not a trainable estimator. The - // empty list is only needed to pass input schema to the pipeline. - var emptySamples = new List(); - - // Convert sample list to an empty IDataView. - var emptyDataView = mlContext.Data.LoadFromEnumerable(emptySamples); - - // A pipeline for localizing root cause. - var localizePipeline = mlContext.Transforms.LocalizeRootCause(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit to data. - var localizeTransformer = localizePipeline.Fit(emptyDataView); - - // Create the prediction engine to get the root cause result from the input data. - var predictionEngine = mlContext.Model.CreatePredictionEngine(localizeTransformer); - - // Call the prediction API. + // Create an root cause localizatin input instance. DateTime timestamp = GetTimestamp(); - var data = new RootCauseLocalizationData(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); + var data = new RootCauseLocalizationInput(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); - var prediction = predictionEngine.Predict(data); + // Get the root cause localization result + RootCause prediction = mlContext.AnomalyDetection.LocalizeRootCause(data); // Print the localization result. int count = 0; - foreach (RootCauseItem item in prediction.RootCause.Items) + foreach (RootCauseItem item in prediction.Items) { count++; Console.WriteLine($"Root cause item #{count} ..."); @@ -127,33 +109,5 @@ private static DateTime GetTimestamp() { return new DateTime(2020, 3, 23, 0, 0, 0); } - - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateType, - aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } } } \ No newline at end of file diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index b41b4bbea5..3bcb695bea 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,7 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; +using System.Reflection; using Microsoft.ML.Data; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; namespace Microsoft.ML @@ -147,14 +150,11 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); /// - /// Create , which localizes root causes using decision tree algorithm. + /// Create , which localizes root causes using decision tree algorithm. /// - /// The transform's catalog. - /// Name of the column resulting from the transformation of . - /// The column data is an instance of . - /// Name of the input column. - /// The column data is an instance of . - /// The weight parameter in score. The range of the parameter should be in [0,1]. + /// The anomaly detection catalog. + /// Root cause's input. The data is an instance of . + /// Beta is a weight parameter for user to choose. It is used when score is calculated for each root cause item. The range of beta should be in [0,1]. For a larger beta, root cause point which has a large difference between value and expected value will get a high score. On the contrary, for a small beta, root cause items which has a high relative change will get a high score. /// /// /// /// /// - public static RootCauseLocalizationEstimator LocalizeRootCause(this TransformsCatalog catalog, string outputColumnName, string inputColumnName = null, double beta = 0.5) - => new RootCauseLocalizationEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, inputColumnName ?? outputColumnName, beta); + public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5) + { + //check the root cause input + CheckRootCauseInput(src); + + //check beta + if (beta < 0 || beta > 1) { + throw new ArgumentException("Beta must be in [0,1]"); + } + + //find out the root cause + RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta); + RootCause dst = analyzer.Analyze(); + return dst; + } + + private static void CheckRootCauseInput(RootCauseLocalizationInput src) + { + if (src.Slices.Count < 1) + { + throw new ArgumentException("Length of Slices must be larger than 0"); + } + + bool containsAnomalyTimestamp = false; + foreach (MetricSlice slice in src.Slices) + { + if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) + { + containsAnomalyTimestamp = true; + } + } + if (!containsAnomalyTimestamp) + { + throw new ArgumentException("Has no points in the given anomaly timestamp"); + } + } /// /// Singular Spectrum Analysis (SSA) model for univariate time-series forecasting. diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 5f29d3645a..7701d3aeb5 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -71,7 +71,7 @@ protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInp return points; } - protected DimensionInfo SeperateDimension(Dictionary dimensions, string aggSymbol) + protected DimensionInfo SeperateDimension(Dictionary dimensions, Object aggSymbol) { DimensionInfo info = DimensionInfo.CreateDefaultInstance(); foreach (KeyValuePair entry in dimensions) @@ -322,13 +322,16 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) { return AnomalyDirection.Up; } - else + else if (rootCausePoint.ExpectedValue > rootCausePoint.Value) { return AnomalyDirection.Down; } + else { + return AnomalyDirection.Same; + } } - private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, string aggSymbol) + private void GetRootCauseDirectionAndScore(Dictionary dimPointMapping, Dictionary anomalyRoot, RootCause dst, double beta, PointTree pointTree, AggregateType aggType, Object aggSymbol) { Point anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); if (dst.Items.Count > 1) @@ -350,7 +353,13 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap //get final score for (int i = 0; i < scoreList.Count; i++) { - dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); + if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) + { + dst.Items[i].Score = 1; + } + else { + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); + } } } else if (dst.Items.Count == 1) @@ -359,13 +368,19 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap if (anomalyPoint != null && rootCausePoint != null) { Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); - dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); + if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) + { + dst.Items[0].Score = 1; + } + else { + dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); + } dst.Items[0].Direction = GetRootCauseDirection(rootCausePoint); } } } - private Point GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, string aggSymbol) + private Point GetPointByDimension(Dictionary dimPointMapping, Dictionary dimension, PointTree pointTree, AggregateType aggType, Object aggSymbol) { if (dimPointMapping.ContainsKey(GetDicCode(dimension))) { @@ -411,14 +426,14 @@ private static string GetDicCode(Dictionary dic) return string.Join(";", dic.Select(x => x.Key + "=" + (string)x.Value).ToArray()); } - private void BuildTree(PointTree tree, List aggDims, Point point, string aggSymbol) + private void BuildTree(PointTree tree, List aggDims, Point point, Object aggSymbol) { int aggNum = 0; string nextDim = null; foreach (string dim in aggDims) { - if (IsAggregationDimension((string)point.Dimension[dim], aggSymbol)) + if (IsAggregationDimension(point.Dimension[dim], aggSymbol)) { aggNum++; } @@ -662,7 +677,7 @@ public static bool ContainsAll(Dictionary bigDic, Dictionary - /// resulting from fitting an . - /// - public sealed class RootCauseLocalizationTransformer : OneToOneTransformerBase - { - internal const string Summary = "Localize root cause for anomaly."; - internal const string UserName = "Root Cause Localization Transform"; - internal const string LoaderSignature = "RootCauseTransform"; - - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "ROOTCAUS", - verWrittenCur: 0x00010001, // Initial - verReadableCur: 0x00010001, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(RootCauseLocalizationTransformer).Assembly.FullName); - } - - private const string RegistrationName = "RootCauseLocalization"; - - internal sealed class Column : OneToOneColumn - { - internal static Column Parse(string str) - { - var res = new Column(); - if (res.TryParse(str)) - return res; - return null; - } - - internal bool TryUnparse(StringBuilder sb) - { - Contracts.AssertValue(sb); - return TryUnparseCore(sb); - } - } - - internal class Options : TransformInputBase - { - [Argument(ArgumentType.Required, HelpText = "The name of the source column.", ShortName = "src", SortOrder = 1, Purpose = SpecialPurpose.ColumnName)] - public string Source; - - [Argument(ArgumentType.Required, HelpText = "The name of the output column.", SortOrder = 2)] - public string Output; - - [Argument(ArgumentType.AtMostOnce, HelpText = "Weight for getting the score for the root cause item.", ShortName = "Beta", SortOrder = 2)] - public double Beta = RootCauseLocalizationEstimator.Defaults.Beta; - - } - - /// - /// The input and output column pairs passed to this . - /// - internal IReadOnlyCollection<(string outputColumnName, string inputColumnName)> Columns => ColumnPairs.AsReadOnly(); - - private readonly double _beta; - - /// - /// Localization root cause for multi-dimensional anomaly. - /// - /// The estimator's local . - /// Weight for generating score. - /// The name of the columns (first item of the tuple), and the name of the resulting output column (second item of the tuple). - - internal RootCauseLocalizationTransformer(IHostEnvironment env, double beta = RootCauseLocalizationEstimator.Defaults.Beta, params (string outputColumnName, string inputColumnName)[] columns) - : base(Contracts.CheckRef(env, nameof(env)).Register(RegistrationName), columns) - { - Host.CheckUserArg(beta >= 0 && beta <= 1, nameof(Options.Beta), "Must be in [0,1]"); - - _beta = beta; - } - - // Factory method for SignatureLoadModel. - private static RootCauseLocalizationTransformer Create(IHostEnvironment env, ModelLoadContext ctx) - { - Contracts.CheckValue(env, nameof(env)); - var host = env.Register(RegistrationName); - host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(GetVersionInfo()); - return new RootCauseLocalizationTransformer(host, ctx); - } - - private RootCauseLocalizationTransformer(IHost host, ModelLoadContext ctx) - : base(host, ctx) - { - // *** Binary format *** - // - // double: beta - _beta = ctx.Reader.ReadDouble(); - Host.CheckDecode(_beta >= 0 && _beta <= 1); - } - - // Factory method for SignatureLoadDataTransform. - private static IDataTransform Create(IHostEnvironment env, ModelLoadContext ctx, IDataView input) - => Create(env, ctx).MakeDataTransform(input); - - // Factory method for SignatureLoadRowMapper. - private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, DataViewSchema inputSchema) - => Create(env, ctx).MakeRowMapper(inputSchema); - - private protected override void SaveModel(ModelSaveContext ctx) - { - Host.CheckValue(ctx, nameof(ctx)); - - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); - - // *** Binary format *** - // - base.SaveColumns(ctx); - // double: beta - ctx.Writer.Write(_beta); - } - - private protected override IRowMapper MakeRowMapper(DataViewSchema schema) => new Mapper(this, schema); - - private protected override void CheckInputColumn(DataViewSchema inputSchema, int col, int srcCol) - { - if (!(inputSchema[srcCol].Type is RootCauseLocalizationInputDataViewType)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", ColumnPairs[col].inputColumnName, "RootCauseLocalizationInputDataViewType", inputSchema[srcCol].Type.ToString()); - } - - private sealed class Mapper : OneToOneMapperBase - { - private readonly RootCauseLocalizationTransformer _parent; - - public Mapper(RootCauseLocalizationTransformer parent, DataViewSchema inputSchema) - : base(parent.Host.Register(nameof(Mapper)), parent, inputSchema) - { - _parent = parent; - } - - protected override DataViewSchema.DetachedColumn[] GetOutputColumnsCore() - { - var result = new DataViewSchema.DetachedColumn[_parent.ColumnPairs.Length]; - for (int i = 0; i < _parent.ColumnPairs.Length; i++) - { - InputSchema.TryGetColumnIndex(_parent.ColumnPairs[i].inputColumnName, out int colIndex); - Host.Assert(colIndex >= 0); - - DataViewType type; - type = new RootCauseDataViewType(); - - result[i] = new DataViewSchema.DetachedColumn(_parent.ColumnPairs[i].outputColumnName, type, null); - } - return result; - } - - protected override Delegate MakeGetter(DataViewRow input, int iinfo, Func activeOutput, out Action disposer) - { - Contracts.AssertValue(input); - Contracts.Assert(0 <= iinfo && iinfo < _parent.ColumnPairs.Length); - - var src = default(RootCauseLocalizationInput); - var getSrc = input.GetGetter(input.Schema[ColMapNewToOld[iinfo]]); - - disposer = null; - - ValueGetter del = - (ref RootCause dst) => - { - getSrc(ref src); - if (src == null) - return; - - CheckRootCauseInput(src, Host); - - LocalizeRootCauses(src, ref dst); - }; - - return del; - } - - private void CheckRootCauseInput(RootCauseLocalizationInput src, IHost host) - { - if (src.Slices.Count < 1) - { - throw host.Except($"Length of Slices must be larger than 0"); - } - - bool containsAnomalyTimestamp = false; - foreach (MetricSlice slice in src.Slices) - { - if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) - { - containsAnomalyTimestamp = true; - } - } - if (!containsAnomalyTimestamp) - { - throw host.Except($"Has no points in the given anomaly timestamp"); - } - } - - private void LocalizeRootCauses(RootCauseLocalizationInput src, ref RootCause dst) - { - RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, _parent._beta); - dst = analyzer.Analyze(); - } - } - } - - /// - /// for the . - /// - /// - /// | - /// | Output column data type | | - /// | Exportable to ONNX | No | - /// - /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-localization.md)] - /// - /// The resulting creates a new column, named as specified in the output column name parameters, and - /// localize the root causes which contribute most to the anomaly. - /// Check the See Also section for links to usage examples. - /// ]]> - /// - /// - /// - public sealed class RootCauseLocalizationEstimator : TrivialEstimator - { - internal static class Defaults - { - public const double Beta = 0.5; - } - - /// - /// Localize root cause. - /// - /// The estimator's local . - /// Name of output column to run the root cause localization. - /// Name of input column to run the root cause localization. - /// The weight for generating score in output result. - [BestFriend] - internal RootCauseLocalizationEstimator(IHostEnvironment env, string outputColumnName, string inputColumnName, double beta = Defaults.Beta) - : base(Contracts.CheckRef(env, nameof(env)).Register(nameof(RootCauseLocalizationEstimator)), new RootCauseLocalizationTransformer(env, beta, new[] { (outputColumnName, inputColumnName ?? outputColumnName) })) - { - } - - /// - /// Returns the of the schema which will be produced by the transformer. - /// Used for schema propagation and verification in a pipeline. - /// - public override SchemaShape GetOutputSchema(SchemaShape inputSchema) - { - Host.CheckValue(inputSchema, nameof(inputSchema)); - var result = inputSchema.ToDictionary(x => x.Name); - foreach (var colInfo in Transformer.Columns) - { - if (!inputSchema.TryFindColumn(colInfo.inputColumnName, out var col)) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName); - if (!(col.ItemType is RootCauseLocalizationInputDataViewType) || col.Kind != SchemaShape.Column.VectorKind.Scalar) - throw Host.ExceptSchemaMismatch(nameof(inputSchema), "input", colInfo.inputColumnName, new RootCauseLocalizationInputDataViewType().ToString(), col.GetTypeString()); - - result[colInfo.outputColumnName] = new SchemaShape.Column(colInfo.outputColumnName, SchemaShape.Column.VectorKind.Scalar, new RootCauseDataViewType(), false); - } - - return new SchemaShape(result.Values); - } - } -} diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs index 78a48e5d57..c165ac3131 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -105,9 +105,9 @@ public sealed class RootCauseLocalizationInput //The aggregated symbol in the AnomalyDimension and point dimension should be consistent public AggregateType AggType { get; set; } - public string AggSymbol { get; set; } + public Object AggSymbol { get; set; } - public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, AggregateType aggregateType, string aggregateSymbol) + public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, AggregateType aggregateType, Object aggregateSymbol) { AnomalyTimestamp = anomalyTimestamp; AnomalyDimension = anomalyDimension; @@ -181,11 +181,11 @@ public override string ToString() public enum AggregateType { /// - /// Make the aggregate type as sum. + /// Make the aggregate type as unknown type. /// Unknown = 0, /// - /// Make the aggregate type as sum. + /// Make the aggregate type as summation. /// Sum = 1, /// @@ -199,11 +199,7 @@ public enum AggregateType /// /// Make the aggregate type as max. /// - Max = 4, - /// - /// Make the aggregate type as count. - /// - Count = 5 + Max = 4 } public enum AnomalyDirection @@ -215,7 +211,11 @@ public enum AnomalyDirection /// /// the value is lower than expected value. /// - Down = 1 + Down = 1, + /// + /// the value is the same as expected value. + /// + Same = 2 } public sealed class RootCauseItem : IEquatable diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 1e3817350b..f605af9244 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -93,34 +93,8 @@ private sealed class SrCnnAnomalyDetection public double[] Prediction { get; set; } } - private static string _aggSymbol = "##SUM##"; + private static Object _rootCauseAggSymbol = "##SUM##"; - private class RootCauseLocalizationData - { - [RootCauseLocalizationInputType] - public RootCauseLocalizationInput Input { get; set; } - - public RootCauseLocalizationData() - { - Input = null; - } - - public RootCauseLocalizationData(DateTime anomalyTimestamp, Dictionary anomalyDimensions, List slices, AggregateType aggregateteType, string aggregateSymbol) - { - Input = new RootCauseLocalizationInput(anomalyTimestamp, anomalyDimensions, slices, aggregateteType, aggregateSymbol); - } - } - - private class RootCauseLocalizationTransformedData - { - [RootCauseType()] - public RootCause RootCause { get; set; } - - public RootCauseLocalizationTransformedData() - { - RootCause = null; - } - } [Fact] public void ChangeDetection() @@ -551,76 +525,28 @@ public void AnomalyDetectionWithSrCnn() [Fact] public void RootCauseLocalization() { - // Create an root cause localizatiom input list. - var rootCauseLocalizationData = new List() { new RootCauseLocalizationData(new DateTime(), new Dictionary(), new List() { new MetricSlice(new DateTime(), new List()) }, AggregateType.Sum, _aggSymbol) }; + // Create an root cause localizatiom input + var rootCauseLocalizationInput = new RootCauseLocalizationInput(GetRootCauseTimestamp(), GetRootCauseAnomalyDimension(), new List() { new MetricSlice(GetRootCauseTimestamp(), GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _rootCauseAggSymbol); var ml = new MLContext(1); - // Convert the list of root cause data to an IDataView object, which is consumable by ML.NET API. - var data = ml.Data.LoadFromEnumerable(rootCauseLocalizationData); - - // Create pipeline to localize root cause by decision tree. - var pipeline = ml.Transforms.LocalizeRootCause(nameof(RootCauseLocalizationTransformedData.RootCause), nameof(RootCauseLocalizationData.Input)); - - // Fit the model. - var model = pipeline.Fit(data); + RootCause rootCause = ml.AnomalyDetection.LocalizeRootCause(rootCauseLocalizationInput); - // Test path: input list -> IDataView -> Enumerable of RootCauseLocalizationInputs. - var transformedData = model.Transform(data); - - // Load input list in DataView back to Enumerable. - var transformedDataPoints = ml.Data.CreateEnumerable(transformedData, false); - - foreach (var dataPoint in transformedDataPoints) - { - var rootCause = dataPoint.RootCause; - Assert.NotNull(rootCause); - } - - var engine = ml.Model.CreatePredictionEngine(model); - - DateTime timeStamp = GetCurrentTimestamp(); - var newRootCauseInput = new RootCauseLocalizationData(timeStamp, GetAnomalyDimension(), new List() { new MetricSlice(timeStamp, GetRootCauseLocalizationPoints()) }, AggregateType.Sum, _aggSymbol); - var transformedRootCause = engine.Predict(newRootCauseInput); - - Assert.NotNull(transformedRootCause); - Assert.Equal(1, (int)transformedRootCause.RootCause.Items.Count); + Assert.NotNull(rootCause); + Assert.Equal(1, (int)rootCause.Items.Count); + Assert.Equal(3, (int)rootCause.Items[0].Dimension.Count); + Assert.Equal(AnomalyDirection.Up, rootCause.Items[0].Direction); + Assert.Equal(1, (int)rootCause.Items[0].Path.Count); + Assert.Equal("DataCenter", rootCause.Items[0].Path[0]); Dictionary expectedDim = new Dictionary(); expectedDim.Add("Country", "UK"); - expectedDim.Add("DeviceType", _aggSymbol); + expectedDim.Add("DeviceType", _rootCauseAggSymbol); expectedDim.Add("DataCenter", "DC1"); - foreach (KeyValuePair pair in transformedRootCause.RootCause.Items[0].Dimension) + foreach (KeyValuePair pair in rootCause.Items[0].Dimension) { Assert.Equal(expectedDim[pair.Key], pair.Value); } - - var dummyData = ml.Data.LoadFromEnumerable(new List() { "Test"}); - - //Create path - var modelPath = "RootCauseLocalizationModel.zip"; - //Save model to a file - ml.Model.Save(model, dummyData.Schema, modelPath); - - //Load model from a file - ITransformer serializedModel; - using (var file = File.OpenRead(modelPath)) - { - serializedModel = ml.Model.Load(file, out var serializedSchema); - TestCommon.CheckSameSchemas(dummyData.Schema, serializedSchema); - - var serializedEngine = ml.Model.CreatePredictionEngine(serializedModel); - var returnedRootCause = serializedEngine.Predict(newRootCauseInput); - - Assert.NotNull(returnedRootCause); - Assert.Equal(1, (int)returnedRootCause.RootCause.Items.Count); - - foreach (KeyValuePair pair in returnedRootCause.RootCause.Items[0].Dimension) - { - Assert.Equal(expectedDim[pair.Key], pair.Value); - } - DeleteOutputPath(modelPath); - } } private static List GetRootCauseLocalizationPoints() @@ -641,7 +567,7 @@ private static List GetRootCauseLocalizationPoints() Dictionary dic3 = new Dictionary(); dic3.Add("Country", "UK"); - dic3.Add("DeviceType", _aggSymbol); + dic3.Add("DeviceType", _rootCauseAggSymbol); dic3.Add("DataCenter", "DC1"); points.Add(new Point(1200, 200, true, dic3)); @@ -659,42 +585,42 @@ private static List GetRootCauseLocalizationPoints() Dictionary dic6 = new Dictionary(); dic6.Add("Country", "UK"); - dic6.Add("DeviceType", _aggSymbol); + dic6.Add("DeviceType", _rootCauseAggSymbol); dic6.Add("DataCenter", "DC2"); points.Add(new Point(300, 300, false, dic6)); Dictionary dic7 = new Dictionary(); dic7.Add("Country", "UK"); - dic7.Add("DeviceType", _aggSymbol); - dic7.Add("DataCenter", _aggSymbol); + dic7.Add("DeviceType", _rootCauseAggSymbol); + dic7.Add("DataCenter", _rootCauseAggSymbol); points.Add(new Point(1500, 500, true, dic7)); Dictionary dic8 = new Dictionary(); dic8.Add("Country", "UK"); dic8.Add("DeviceType", "Laptop"); - dic8.Add("DataCenter", _aggSymbol); + dic8.Add("DataCenter", _rootCauseAggSymbol); points.Add(new Point(300, 200, true, dic8)); Dictionary dic9 = new Dictionary(); dic9.Add("Country", "UK"); dic9.Add("DeviceType", "Mobile"); - dic9.Add("DataCenter", _aggSymbol); + dic9.Add("DataCenter", _rootCauseAggSymbol); points.Add(new Point(1200, 300, true, dic9)); return points; } - private static Dictionary GetAnomalyDimension() + private static Dictionary GetRootCauseAnomalyDimension() { Dictionary dim = new Dictionary(); dim.Add("Country", "UK"); - dim.Add("DeviceType", _aggSymbol); - dim.Add("DataCenter", _aggSymbol); + dim.Add("DeviceType", _rootCauseAggSymbol); + dim.Add("DataCenter", _rootCauseAggSymbol); return dim; } - private static DateTime GetCurrentTimestamp() + private static DateTime GetRootCauseTimestamp() { return new DateTime(2020, 3, 23, 0, 0, 0); } From 9893fadd44e7c5ebd7ad714a7e2b467b36ba15b0 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Thu, 7 May 2020 16:45:27 +0800 Subject: [PATCH 44/49] add more documents --- .../time-series-root-cause-surprise-score.md | 6 ++++++ .../RootCauseAnalyzer.cs | 21 ++++++++++++++----- .../RootCauseLocalizationType.cs | 3 +++ 3 files changed, 25 insertions(+), 5 deletions(-) create mode 100644 docs/api-reference/time-series-root-cause-surprise-score.md diff --git a/docs/api-reference/time-series-root-cause-surprise-score.md b/docs/api-reference/time-series-root-cause-surprise-score.md new file mode 100644 index 0000000000..b3b5ccb339 --- /dev/null +++ b/docs/api-reference/time-series-root-cause-surprise-score.md @@ -0,0 +1,6 @@ +Surprise score is used to capture the relative change for the root cause item. +$$S_i(m) = 0.5( p_i\log_2(\frac{2p_i} {p_i+q_i}) + q_i \log_2(\frac{2q_i}{p_i+q_i}) )$$ +$$p_i(m)= \frac{F_i(m)} {F(m)} $$ +$$q_i(m)= \frac{A_i(m)} {A(m)} $$ +where $F_i$ is the forecasted value for root cause item $i$, $A_i$ is the actual value for root cause item $i$, $F$ is the forecasted value for the anomly point and $A$ is the actual value for anomaly point. +For details of the surprise score, refer to [this document](https://www.usenix.org/system/files/conference/nsdi14/nsdi14-paper-bhagwan.pdf) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 7701d3aeb5..6dec5eef90 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -344,7 +344,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap Point rootCausePoint = GetPointByDimension(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { - Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + Tuple scores = GetSupriseAndExplanatoryScore(rootCausePoint, anomalyPoint); scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); item.Direction = GetRootCauseDirection(rootCausePoint); } @@ -367,7 +367,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap Point rootCausePoint = GetPointByDimension(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { - Tuple scores = GetSupriseAndExplainaryScore(rootCausePoint, anomalyPoint); + Tuple scores = GetSupriseAndExplanatoryScore(rootCausePoint, anomalyPoint); if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) { dst.Items[0].Score = 1; @@ -496,6 +496,17 @@ private BestDimension FindBestDimension(SortedDictionary return best; } + /// + /// Calculate the surprise score according to root cause point and anomaly point + /// + /// A point which has been detected as root cause + /// The anomaly point + /// + /// + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-root-cause-surprise-score.md)] + /// + /// + /// Surprise score private double GetSurpriseScore(Point rootCausePoint, Point anomalyPoint) { double p; @@ -558,7 +569,7 @@ private double GetFinalScore(double surprise, double ep, double beta) return beta * a + (1 - beta) * b; } - private Tuple GetSupriseAndExplainaryScore(Point rootCausePoint, Point anomalyPoint) + private Tuple GetSupriseAndExplanatoryScore(Point rootCausePoint, Point anomalyPoint) { double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); @@ -649,7 +660,7 @@ private void UpdateDistribution(Dictionary distribution, List bigDic, Dictionary smallDic) + private static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) { foreach (var item in smallDic) { diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs index c165ac3131..8e7de7d1aa 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -220,10 +220,13 @@ public enum AnomalyDirection public sealed class RootCauseItem : IEquatable { + //The score is a value to evaluate the contribution to the anomaly incident. The range is between [0,1]. The larger the score, the root cause contributes the most to the anomaly. The parameter beta has an influence on this score. For how the score is calculated, you can refer to the source code. public double Score; //Path is a list of the dimension key that the libary selected for you. In this root cause localization library, for one time call for the library, the path will be obtained and the length of path list will always be 1. Different RootCauseItem obtained from one library call will have the same path as it is the best dimension selected for the input. public List Path; + //The dimension for the detected root cause point public Dictionary Dimension; + //The direction for the detected root cause point public AnomalyDirection Direction; public RootCauseItem(Dictionary rootCause) From c831e439710bc401bac287221a7081a685e6fac9 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Fri, 8 May 2020 21:12:21 +0800 Subject: [PATCH 45/49] update --- .../TimeSeries/LocalizeRootCause.cs | 4 +- .../ExtensionsCatalog.cs | 16 +-- .../RootCauseAnalyzer.cs | 121 ++++++++---------- 3 files changed, 65 insertions(+), 76 deletions(-) diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs index 572f9a58be..3879159491 100644 --- a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/LocalizeRootCause.cs @@ -14,11 +14,11 @@ public static void Example() // exception tracking and logging, as well as the source of randomness. var mlContext = new MLContext(); - // Create an root cause localizatin input instance. + // Create an root cause localization input instance. DateTime timestamp = GetTimestamp(); var data = new RootCauseLocalizationInput(timestamp, GetAnomalyDimension(), new List() { new MetricSlice(timestamp, GetPoints()) }, AggregateType.Sum, AGG_SYMBOL); - // Get the root cause localization result + // Get the root cause localization result. RootCause prediction = mlContext.AnomalyDetection.LocalizeRootCause(data); // Print the localization result. diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 3bcb695bea..da711f0ea6 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -5,6 +5,7 @@ using System; using System.Reflection; using Microsoft.ML.Data; +using Microsoft.ML.Runtime; using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; @@ -164,12 +165,14 @@ public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog /// public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, RootCauseLocalizationInput src, double beta = 0.5) { + IHostEnvironment host = CatalogUtils.GetEnvironment(catalog); + //check the root cause input - CheckRootCauseInput(src); + CheckRootCauseInput(host, src); //check beta if (beta < 0 || beta > 1) { - throw new ArgumentException("Beta must be in [0,1]"); + host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]"); } //find out the root cause @@ -178,11 +181,11 @@ public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, return dst; } - private static void CheckRootCauseInput(RootCauseLocalizationInput src) + private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src) { if (src.Slices.Count < 1) { - throw new ArgumentException("Length of Slices must be larger than 0"); + host.CheckUserArg(src.Slices.Count > 1 , nameof(src.Slices), "Must has more than one item"); } bool containsAnomalyTimestamp = false; @@ -193,10 +196,7 @@ private static void CheckRootCauseInput(RootCauseLocalizationInput src) containsAnomalyTimestamp = true; } } - if (!containsAnomalyTimestamp) - { - throw new ArgumentException("Has no points in the given anomaly timestamp"); - } + host.CheckUserArg(containsAnomalyTimestamp, nameof(src.Slices), "Has no points in the given anomaly timestamp"); } /// diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 6dec5eef90..6672bd4b55 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -5,6 +5,7 @@ using System; using System.Collections.Generic; using System.Linq; +using System.Runtime.CompilerServices; using Microsoft.ML.Internal.Utilities; namespace Microsoft.ML.TimeSeries @@ -29,7 +30,7 @@ public RootCause Analyze() return AnalyzeOneLayer(_src); } - //This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. For full result, call this function recursively + //This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) { RootCause dst = new RootCause(); @@ -42,11 +43,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) Dictionary dimPointMapping = pointInfo.Item3; //which means there is no anomaly point with the anomaly dimension or no point under anomaly dimension - if (anomalyTree.ParentNode == null) - { - return dst; - } - if (dimPointMapping.Count == 0) + if (anomalyTree.ParentNode == null || dimPointMapping.Count == 0) { return dst; } @@ -59,16 +56,8 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) { - List points = new List(); - foreach (MetricSlice slice in src.Slices) - { - if (slice.TimeStamp.Equals(src.AnomalyTimestamp)) - { - points = slice.Points; - } - } - - return points; + MetricSlice slice = src.Slices.Single(slice => slice.TimeStamp.Equals(src.AnomalyTimestamp)); + return slice.Points; } protected DimensionInfo SeperateDimension(Dictionary dimensions, Object aggSymbol) @@ -92,8 +81,8 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, protected Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) { - PointTree pointTree = PointTree.CreateDefaultInstance(); - PointTree anomalyTree = PointTree.CreateDefaultInstance(); + PointTree pointTree = new PointTree(); + PointTree anomalyTree = new PointTree(); Dictionary dimPointMapping = new Dictionary(); List totalPoints = GetTotalPointsForAnomalyTimestamp(src); @@ -125,12 +114,8 @@ protected Tuple> GetPointsInfo(R protected Dictionary GetSubDim(Dictionary dimension, List keyList) { - Dictionary subDim = new Dictionary(); - foreach (string dim in keyList) - { - subDim.Add(dim, dimension[dim]); - } - return subDim; + return new Dictionary(keyList.Select(dim => new KeyValuePair(dim, dimension[dim])).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)); + } protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) @@ -203,7 +188,8 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< { anomalyPoints.Reverse(); } - else { + else + { anomalyPoints = anomalyPoints.FindAll(x => x.Delta < 0); } if (anomalyPoints.Count == 1) @@ -245,7 +231,7 @@ protected BestDimension SelectBestDimension(List totalPoints, List foreach (string dimKey in aggDim) { - BestDimension dimension = BestDimension.CreateDefaultInstance(); + BestDimension dimension = new BestDimension(); dimension.DimensionKey = dimKey; UpdateDistribution(dimension.PointDis, totalPoints, dimKey); @@ -253,13 +239,15 @@ protected BestDimension SelectBestDimension(List totalPoints, List double relativeEntropy = GetDimensionEntropy(dimension.PointDis, dimension.AnomalyDis); double gain = totalEntropy - relativeEntropy; - if (Double.IsNaN(gain)) { + if (Double.IsNaN(gain)) + { gain = 0; } entroyGainMap.Add(dimension, gain); double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis); - if (Double.IsInfinity(gainRatio)) { + if (Double.IsInfinity(gainRatio)) + { gainRatio = 0; } entroyGainRatioMap.Add(dimension, gainRatio); @@ -282,7 +270,7 @@ private BestDimension SelectBestDimension(Dictionary> pointC foreach (string dimKey in aggDim) { - BestDimension dimension = BestDimension.CreateDefaultInstance(); + BestDimension dimension = new BestDimension(); dimension.DimensionKey = dimKey; if (pointChildren.ContainsKey(dimKey)) @@ -295,14 +283,16 @@ private BestDimension SelectBestDimension(Dictionary> pointC } double entropy = GetEntropy(dimension.PointDis.Count, dimension.AnomalyDis.Count); - if (Double.IsNaN(entropy)) { + if (Double.IsNaN(entropy)) + { entropy = Double.MaxValue; } entropyMap.Add(dimension, entropy); double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis); - if (Double.IsInfinity(gainRatio)) { + if (Double.IsInfinity(gainRatio)) + { gainRatio = 0; } entropyRatioMap.Add(dimension, gainRatio); @@ -326,7 +316,8 @@ private AnomalyDirection GetRootCauseDirection(Point rootCausePoint) { return AnomalyDirection.Down; } - else { + else + { return AnomalyDirection.Same; } } @@ -357,7 +348,8 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap { dst.Items[i].Score = 1; } - else { + else + { dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); } } @@ -372,7 +364,8 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap { dst.Items[0].Score = 1; } - else { + else + { dst.Items[0].Score = GetFinalScore(scores.Item1, scores.Item2, beta); } dst.Items[0].Direction = GetRootCauseDirection(rootCausePoint); @@ -469,25 +462,33 @@ private BestDimension FindBestDimension(SortedDictionary { if (dimension.Key.AnomalyDis.Count == 1 || (isLeavesLevel ? dimension.Value >= meanGain : dimension.Value <= meanGain)) { - if (dimension.Key.AnomalyDis.Count > 1) + if (best == null) { - if (best == null || (!Double.IsNaN(valueRatioMap[best]) && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)))) - { - best = dimension.Key; - } + best = dimension.Key; } else { - if (best == null || best.AnomalyDis.Count > 1) + bool isRatioNan = Double.IsNaN(valueRatioMap[best]); + if (dimension.Key.AnomalyDis.Count > 1) { - best = dimension.Key; + if (!isRatioNan && (best.AnomalyDis.Count != 1 && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0))) + { + best = dimension.Key; + } } else { - if (!Double.IsNaN(valueRatioMap[best]) && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) + if (best.AnomalyDis.Count > 1) { best = dimension.Key; } + else + { + if (!isRatioNan && (isLeavesLevel ? valueRatioMap[best].CompareTo(dimension.Value) <= 0 : valueRatioMap[best].CompareTo(dimension.Value) >= 0)) + { + best = dimension.Key; + } + } } } } @@ -660,26 +661,19 @@ private void UpdateDistribution(Dictionary distribution, List Double.IsNaN(val) ? 0 : Math.Log(val) / Math.Log(2); - return Math.Log(val) / Math.Log(2); - } - - private static bool ContainsAll(Dictionary bigDic, Dictionary smallDic) + private static bool ContainsAll(Dictionary bigDictionary, Dictionary smallDictionary) { - foreach (var item in smallDic) + foreach (var item in smallDictionary) { - if (!bigDic.ContainsKey(item.Key)) + if (!bigDictionary.ContainsKey(item.Key)) { return false; } - if (bigDic.ContainsKey(item.Key) && !bigDic[item.Key].Equals(smallDic[item.Key])) + if (bigDictionary.ContainsKey(item.Key) && !bigDictionary[item.Key].Equals(smallDictionary[item.Key])) { return false; } @@ -714,12 +708,10 @@ public class PointTree public Dictionary> ChildrenNodes; public List Leaves; - public static PointTree CreateDefaultInstance() + public PointTree() { - PointTree instance = new PointTree(); - instance.Leaves = new List(); - instance.ChildrenNodes = new Dictionary>(); - return instance; + Leaves = new List(); + ChildrenNodes = new Dictionary>(); } } @@ -729,13 +721,10 @@ public sealed class BestDimension : IComparable public Dictionary AnomalyDis; public Dictionary PointDis; - public BestDimension() { } - public static BestDimension CreateDefaultInstance() + public BestDimension() { - BestDimension instance = new BestDimension(); - instance.AnomalyDis = new Dictionary(); - instance.PointDis = new Dictionary(); - return instance; + AnomalyDis = new Dictionary(); + PointDis = new Dictionary(); } public int CompareTo(object obj) From 16f5b334da8455e5ace6fa17e245364168c72c76 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sat, 9 May 2020 10:43:18 +0800 Subject: [PATCH 46/49] update --- src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs | 11 +++-------- src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs | 8 +------- 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index da711f0ea6..2cc2aa3b0a 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -171,22 +171,17 @@ public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog, CheckRootCauseInput(host, src); //check beta - if (beta < 0 || beta > 1) { - host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]"); - } + host.CheckUserArg(beta >= 0 && beta <= 1, nameof(beta), "Must be in [0,1]"); //find out the root cause RootCauseAnalyzer analyzer = new RootCauseAnalyzer(src, beta); - RootCause dst = analyzer.Analyze(); + RootCause dst = analyzer.Analyze(); return dst; } private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src) { - if (src.Slices.Count < 1) - { - host.CheckUserArg(src.Slices.Count > 1 , nameof(src.Slices), "Must has more than one item"); - } + host.CheckUserArg(src.Slices.Count >= 1, nameof(src.Slices), "Must has more than one item"); bool containsAnomalyTimestamp = false; foreach (MetricSlice slice in src.Slices) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 6672bd4b55..6395d4284d 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -115,7 +115,6 @@ protected Tuple> GetPointsInfo(R protected Dictionary GetSubDim(Dictionary dimension, List keyList) { return new Dictionary(keyList.Select(dim => new KeyValuePair(dim, dimension[dim])).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)); - } protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) @@ -668,12 +667,7 @@ private static bool ContainsAll(Dictionary bigDictionary, Dictio { foreach (var item in smallDictionary) { - if (!bigDictionary.ContainsKey(item.Key)) - { - return false; - } - - if (bigDictionary.ContainsKey(item.Key) && !bigDictionary[item.Key].Equals(smallDictionary[item.Key])) + if (!bigDictionary.ContainsKey(item.Key) || !bigDictionary[item.Key].Equals(smallDictionary[item.Key])) { return false; } From 9cd8739cb6313b3fc8965d265a73710a16e7d4ef Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sat, 9 May 2020 14:35:59 +0800 Subject: [PATCH 47/49] update the constructor --- .../RootCauseAnalyzer.cs | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 6395d4284d..64ae351f8f 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -54,15 +54,15 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) return dst; } - protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) + private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) { MetricSlice slice = src.Slices.Single(slice => slice.TimeStamp.Equals(src.AnomalyTimestamp)); return slice.Points; } - protected DimensionInfo SeperateDimension(Dictionary dimensions, Object aggSymbol) + private DimensionInfo SeperateDimension(Dictionary dimensions, Object aggSymbol) { - DimensionInfo info = DimensionInfo.CreateDefaultInstance(); + DimensionInfo info = new DimensionInfo(); foreach (KeyValuePair entry in dimensions) { string key = entry.Key; @@ -79,7 +79,7 @@ protected DimensionInfo SeperateDimension(Dictionary dimensions, return info; } - protected Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) + private Tuple> GetPointsInfo(RootCauseLocalizationInput src, DimensionInfo dimensionInfo) { PointTree pointTree = new PointTree(); PointTree anomalyTree = new PointTree(); @@ -117,7 +117,7 @@ protected Dictionary GetSubDim(Dictionary dimens return new Dictionary(keyList.Select(dim => new KeyValuePair(dim, dimension[dim])).ToDictionary(kvp => kvp.Key, kvp => kvp.Value)); } - protected List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) + private List LocalizeRootCauseByDimension(PointTree anomalyTree, PointTree pointTree, Dictionary anomalyDimension, List aggDims) { BestDimension best = null; if (anomalyTree.ChildrenNodes.Count == 0) @@ -682,25 +682,23 @@ private bool IsAggregationDimension(Object val, Object aggSymbol) } } - public class DimensionInfo + internal class DimensionInfo { - public List DetailDims { get; set; } - public List AggDims { get; set; } + internal List DetailDims { get; set; } + internal List AggDims { get; set; } - public static DimensionInfo CreateDefaultInstance() + public DimensionInfo() { - DimensionInfo instance = new DimensionInfo(); - instance.DetailDims = new List(); - instance.AggDims = new List(); - return instance; + DetailDims = new List(); + AggDims = new List(); } } - public class PointTree + internal class PointTree { - public Point ParentNode; - public Dictionary> ChildrenNodes; - public List Leaves; + internal Point ParentNode; + internal Dictionary> ChildrenNodes; + internal List Leaves; public PointTree() { @@ -709,11 +707,11 @@ public PointTree() } } - public sealed class BestDimension : IComparable + public class BestDimension : IComparable { - public string DimensionKey; - public Dictionary AnomalyDis; - public Dictionary PointDis; + internal string DimensionKey; + internal Dictionary AnomalyDis; + internal Dictionary PointDis; public BestDimension() { @@ -733,10 +731,10 @@ public int CompareTo(object obj) } } - public class RootCauseScore + internal class RootCauseScore { - public double Surprise; - public double ExplainaryScore; + internal double Surprise; + internal double ExplainaryScore; public RootCauseScore(double surprise, double explainaryScore) { From f80c200798401cbe8e87f40e6f458d0248564506 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Sat, 9 May 2020 15:12:11 +0800 Subject: [PATCH 48/49] update comments --- .../RootCauseAnalyzer.cs | 14 +- .../RootCauseLocalizationType.cs | 184 +++++------------- 2 files changed, 60 insertions(+), 138 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 64ae351f8f..906f899b75 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -30,7 +30,9 @@ public RootCause Analyze() return AnalyzeOneLayer(_src); } - //This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. + /// + /// This is a function for analyze one layer for root cause, we select one dimension with values who contributes the most to the anomaly. + /// private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) { RootCause dst = new RootCause(); @@ -54,7 +56,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) return dst; } - private List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) + protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInput src) { MetricSlice slice = src.Slices.Single(slice => slice.TimeStamp.Equals(src.AnomalyTimestamp)); return slice.Points; @@ -220,7 +222,9 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< return null; } - //Use leaves point information to select best dimension + /// + /// Use leaves point information to select best dimension + /// protected BestDimension SelectBestDimension(List totalPoints, List anomalyPoints, List aggDim) { double totalEntropy = GetEntropy(totalPoints.Count, anomalyPoints.Count); @@ -260,7 +264,9 @@ protected BestDimension SelectBestDimension(List totalPoints, List return best; } - //Use children point information to select best dimension + /// + /// Use children point information to select best dimension + /// private BestDimension SelectBestDimension(Dictionary> pointChildren, Dictionary> anomalyChildren, List aggDim) { SortedDictionary entropyMap = new SortedDictionary(); diff --git a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs index 8e7de7d1aa..97fb63a8bf 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseLocalizationType.cs @@ -8,82 +8,11 @@ namespace Microsoft.ML.TimeSeries { - /// - /// Allows a member to be marked as a , primarily allowing one to set - /// root cause localization input. - /// - public sealed class RootCauseLocalizationInputTypeAttribute : DataViewTypeAttribute - { - /// - /// Create a root cause localizing input type. - /// - public RootCauseLocalizationInputTypeAttribute() - { - } - - /// - /// Equal function. - /// - public override bool Equals(DataViewTypeAttribute other) - { - if (!(other is RootCauseLocalizationInputTypeAttribute otherAttribute)) - return false; - return true; - } - - /// - /// Produce the same hash code for all RootCauseLocalizationInputTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseLocalizationInputDataViewType(), typeof(RootCauseLocalizationInput), this); - } - } - - /// - /// Allows a member to be marked as a , primarily allowing one to set - /// root cause result. - /// - public sealed class RootCauseTypeAttribute : DataViewTypeAttribute + public sealed class RootCause { /// - /// Create an root cause type. - /// - public RootCauseTypeAttribute() - { - } - - /// - /// RootCauseTypeAttribute with the same type should equal. + /// A List for root cause item. Instance of the item should be /// - public override bool Equals(DataViewTypeAttribute other) - { - if (other is RootCauseTypeAttribute otherAttribute) - return true; - return false; - } - - /// - /// Produce the same hash code for all RootCauseTypeAttribute. - /// - public override int GetHashCode() - { - return 0; - } - - public override void Register() - { - DataViewTypeManager.Register(new RootCauseDataViewType(), typeof(RootCause), this); - } - } - - public sealed class RootCause - { public List Items { get; set; } public RootCause() { @@ -93,18 +22,29 @@ public RootCause() public sealed class RootCauseLocalizationInput { - //When the anomaly incident occurs + /// + /// When the anomaly incident occurs + /// public DateTime AnomalyTimestamp { get; set; } - //Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause + /// + /// Point with the anomaly dimension must exist in the slice list at the anomaly timestamp, or the libary will not calculate the root cause + /// public Dictionary AnomalyDimension { get; set; } - //A list of points at different timestamp. If the slices don't contain point data corresponding to the anomaly timestamp, the root cause localization alogorithm will not calculate the root cause as no information at the anomaly timestamp is provided. + /// + /// A list of points at different timestamp. If the slices don't contain point data corresponding to the anomaly timestamp, the root cause localization alogorithm will not calculate the root cause as no information at the anomaly timestamp is provided. + /// public List Slices { get; set; } - //The aggregated symbol in the AnomalyDimension and point dimension should be consistent + /// + /// The aggregated type, the type should be + /// public AggregateType AggType { get; set; } + /// + /// The string you defined as a aggregated symbol in the AnomalyDimension and point dimension. + /// public Object AggSymbol { get; set; } public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary anomalyDimension, List slices, AggregateType aggregateType, Object aggregateSymbol) @@ -126,58 +66,6 @@ public RootCauseLocalizationInput(DateTime anomalyTimestamp, Dictionary @@ -220,13 +108,21 @@ public enum AnomalyDirection public sealed class RootCauseItem : IEquatable { - //The score is a value to evaluate the contribution to the anomaly incident. The range is between [0,1]. The larger the score, the root cause contributes the most to the anomaly. The parameter beta has an influence on this score. For how the score is calculated, you can refer to the source code. + /// + ///The score is a value to evaluate the contribution to the anomaly incident. The range is between [0,1]. The larger the score, the root cause contributes the most to the anomaly. The parameter beta has an influence on this score. For how the score is calculated, you can refer to the source code. + /// public double Score; - //Path is a list of the dimension key that the libary selected for you. In this root cause localization library, for one time call for the library, the path will be obtained and the length of path list will always be 1. Different RootCauseItem obtained from one library call will have the same path as it is the best dimension selected for the input. + /// + /// Path is a list of the dimension key that the libary selected for you. In this root cause localization library, for one time call for the library, the path will be obtained and the length of path list will always be 1. Different RootCauseItem obtained from one library call will have the same path as it is the best dimension selected for the input. + /// public List Path; - //The dimension for the detected root cause point + /// + /// The dimension for the detected root cause point + /// public Dictionary Dimension; - //The direction for the detected root cause point + /// + /// The direction for the detected root cause point, should be + /// public AnomalyDirection Direction; public RootCauseItem(Dictionary rootCause) @@ -259,7 +155,13 @@ public bool Equals(RootCauseItem other) public sealed class MetricSlice { + /// + /// Timestamp for the point list + /// public DateTime TimeStamp { get; set; } + /// + /// A list of points + /// public List Points { get; set; } public MetricSlice(DateTime timeStamp, List points) @@ -271,11 +173,25 @@ public MetricSlice(DateTime timeStamp, List points) public sealed class Point : IEquatable { + /// + /// Value of a time series point + /// public double Value { get; set; } + /// + /// Forecasted value for the time series point + /// public double ExpectedValue { get; set; } + /// + /// Whether the point is an anomaly point + /// public bool IsAnomaly { get; set; } - //The value for this dictionary is an object, when the Dimension is used, the equals function for the Object will be used. If you have a customized class, you need to define the Equals function. + /// + /// Dimension information for the point. For example, City = New York City, Dataceter = DC1. The value for this dictionary is an object, when the Dimension is used, the equals function for the Object will be used. If you have a customized class, you need to define the Equals function. + /// public Dictionary Dimension { get; set; } + /// + /// Difference between value and expected value + /// public double Delta { get; set; } public Point(Dictionary dimension) From 7c1c348cd51b9f7c980994909dc1df31232b2280 Mon Sep 17 00:00:00 2001 From: suxi-ms Date: Mon, 11 May 2020 20:45:53 +0800 Subject: [PATCH 49/49] fix typo --- .../RootCauseAnalyzer.cs | 38 +++++++++---------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs index 906f899b75..9eaaeb630d 100644 --- a/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs +++ b/src/Microsoft.ML.TimeSeries/RootCauseAnalyzer.cs @@ -38,7 +38,7 @@ private RootCause AnalyzeOneLayer(RootCauseLocalizationInput src) RootCause dst = new RootCause(); dst.Items = new List(); - DimensionInfo dimensionInfo = SeperateDimension(src.AnomalyDimension, src.AggSymbol); + DimensionInfo dimensionInfo = SeparateDimension(src.AnomalyDimension, src.AggSymbol); Tuple> pointInfo = GetPointsInfo(src, dimensionInfo); PointTree pointTree = pointInfo.Item1; PointTree anomalyTree = pointInfo.Item2; @@ -62,7 +62,7 @@ protected List GetTotalPointsForAnomalyTimestamp(RootCauseLocalizationInp return slice.Points; } - private DimensionInfo SeperateDimension(Dictionary dimensions, Object aggSymbol) + private DimensionInfo SeparateDimension(Dictionary dimensions, Object aggSymbol) { DimensionInfo info = new DimensionInfo(); foreach (KeyValuePair entry in dimensions) @@ -214,7 +214,7 @@ protected List GetTopAnomaly(List anomalyPoints, Point root, List< } int pointSize = isLeaveslevel ? pointDistribution.Count : GetTotalNumber(pointDistribution); - if (ShouldSeperateAnomaly(delta, root.Delta, pointSize, causeList.Count)) + if (ShouldSeparateAnomaly(delta, root.Delta, pointSize, causeList.Count)) { return causeList; } @@ -248,7 +248,7 @@ protected BestDimension SelectBestDimension(List totalPoints, List } entroyGainMap.Add(dimension, gain); - double gainRatio = gain / GetDimensionInstrinsicValue(dimension.PointDis); + double gainRatio = gain / GetDimensionIntrinsicValue(dimension.PointDis); if (Double.IsInfinity(gainRatio)) { gainRatio = 0; @@ -294,7 +294,7 @@ private BestDimension SelectBestDimension(Dictionary> pointC } entropyMap.Add(dimension, entropy); - double gainRatio = entropy / GetDimensionInstrinsicValue(dimension.PointDis); + double gainRatio = entropy / GetDimensionIntrinsicValue(dimension.PointDis); if (Double.IsInfinity(gainRatio)) { @@ -332,7 +332,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap Point anomalyPoint = GetPointByDimension(dimPointMapping, anomalyRoot, pointTree, aggType, aggSymbol); if (dst.Items.Count > 1) { - //get surprise value and explanary power value + //get surprise value and explanatory power value List scoreList = new List(); foreach (RootCauseItem item in dst.Items) @@ -340,7 +340,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap Point rootCausePoint = GetPointByDimension(dimPointMapping, item.Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { - Tuple scores = GetSupriseAndExplanatoryScore(rootCausePoint, anomalyPoint); + Tuple scores = GetSurpriseAndExplanatoryScore(rootCausePoint, anomalyPoint); scoreList.Add(new RootCauseScore(scores.Item1, scores.Item2)); item.Direction = GetRootCauseDirection(rootCausePoint); } @@ -355,7 +355,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap } else { - dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplainaryScore), beta); + dst.Items[i].Score = GetFinalScore(scoreList[i].Surprise, Math.Abs(scoreList[i].ExplanatoryScore), beta); } } } @@ -364,7 +364,7 @@ private void GetRootCauseDirectionAndScore(Dictionary dimPointMap Point rootCausePoint = GetPointByDimension(dimPointMapping, dst.Items[0].Dimension, pointTree, aggType, aggSymbol); if (anomalyPoint != null && rootCausePoint != null) { - Tuple scores = GetSupriseAndExplanatoryScore(rootCausePoint, anomalyPoint); + Tuple scores = GetSurpriseAndExplanatoryScore(rootCausePoint, anomalyPoint); if (aggType.Equals(AggregateType.Max) || aggType.Equals(AggregateType.Min)) { dst.Items[0].Score = 1; @@ -387,7 +387,7 @@ private Point GetPointByDimension(Dictionary dimPointMapping, Dic int count = 0; Point p = new Point(dimension); - DimensionInfo dimensionInfo = SeperateDimension(dimension, aggSymbol); + DimensionInfo dimensionInfo = SeparateDimension(dimension, aggSymbol); Dictionary subDim = GetSubDim(dimension, dimensionInfo.DetailDims); foreach (Point leave in pointTree.Leaves) @@ -575,7 +575,7 @@ private double GetFinalScore(double surprise, double ep, double beta) return beta * a + (1 - beta) * b; } - private Tuple GetSupriseAndExplanatoryScore(Point rootCausePoint, Point anomalyPoint) + private Tuple GetSurpriseAndExplanatoryScore(Point rootCausePoint, Point anomalyPoint) { double surprise = GetSurpriseScore(rootCausePoint, anomalyPoint); @@ -601,7 +601,7 @@ private bool StopAnomalyComparison(double preTotal, double parent, double curren return Math.Abs(pre) / Math.Abs(current) > _anomalyPreDeltaThreshold; } - private bool ShouldSeperateAnomaly(double total, double parent, int totalSize, int size) + private bool ShouldSeparateAnomaly(double total, double parent, int totalSize, int size) { if (Math.Abs(total) < Math.Abs(parent) * _anomalyDeltaThreshold) { @@ -630,17 +630,17 @@ private double GetDimensionEntropy(Dictionary pointDis, Dictionary< return entropy; } - private double GetDimensionInstrinsicValue(Dictionary pointDis) + private double GetDimensionIntrinsicValue(Dictionary pointDis) { int total = GetTotalNumber(pointDis); - double instrinsicValue = 0; + double intrinsicValue = 0; foreach (string key in pointDis.Keys) { - instrinsicValue -= Log2((double)pointDis[key] / total) * (double)pointDis[key] / total; + intrinsicValue -= Log2((double)pointDis[key] / total) * (double)pointDis[key] / total; } - return instrinsicValue; + return intrinsicValue; } private int GetTotalNumber(Dictionary distribution) @@ -740,12 +740,12 @@ public int CompareTo(object obj) internal class RootCauseScore { internal double Surprise; - internal double ExplainaryScore; + internal double ExplanatoryScore; - public RootCauseScore(double surprise, double explainaryScore) + public RootCauseScore(double surprise, double explanatoryScore) { Surprise = surprise; - ExplainaryScore = explainaryScore; + ExplanatoryScore = explanatoryScore; } } }