From d8cd40169d6dcc1109eb17e0afcdec00560e70ef Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Fri, 19 Apr 2019 15:13:04 -0700 Subject: [PATCH 1/5] Draft --- .../ExtensionsCatalog.cs | 24 ++++----- .../IidAnomalyDetectionBase.cs | 2 +- .../IidChangePointDetector.cs | 52 +++++++++++++++++-- .../IidSpikeDetector.cs | 46 ++++++++++++++-- .../SsaAnomalyDetectionBase.cs | 4 +- .../SsaChangePointDetector.cs | 52 ++++++++++++++++++- .../SsaSpikeDetector.cs | 44 +++++++++++++++- 7 files changed, 200 insertions(+), 24 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index 681d53fe40..a47b61d4e8 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -10,14 +10,14 @@ namespace Microsoft.ML public static class TimeSeriesCatalog { /// - /// Create a new instance of that detects a change of in an - /// independent identically distributed (i.i.d.) time series. - /// Detection is based on adaptive kernel density estimations and martingale scores. + /// Create , which predicts change points in an + /// independent identically distributed (i.i.d.) + /// time series based on adaptive kernel density estimations and martingale scores. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . /// Column is a vector of type double and size 4. The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. - /// Name of column to transform. If set to , the value of the will be used as source. + /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. /// The confidence for change point detection in the range [0, 100]. /// The length of the sliding window on p-values for computing the martingale score. /// The martingale used for scoring. @@ -34,13 +34,13 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo => new IidChangePointEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, changeHistoryLength, inputColumnName, martingale, eps); /// - /// Create a new instance of that detects a spike in an - /// independent identically distributed (i.i.d.) time series. - /// Detection is based on adaptive kernel density estimations and martingale scores. + /// Create , which predicts spikes in + /// independent identically distributed (i.i.d.) + /// time series based on adaptive kernel density estimations and martingale scores. /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// Name of column to transform. If set to , the value of the will be used as source. + /// Name of column to transform. The column data must be . The column data must be . If set to , the value of the will be used as source. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The argument that determines whether to detect positive or negative anomalies, or both. @@ -56,13 +56,13 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s => new IidSpikeEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, confidence, pvalueHistoryLength, inputColumnName, side); /// - /// Create a new instance of for detecting a change in a time series signal + /// Create , which predicts change points in time series /// using Singular Spectrum Analysis (SSA). /// /// The transform's catalog. /// Name of the column resulting from the transformation of . /// Column is a vector of type double and size 4. The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. - /// Name of column to transform. If set to , the value of the will be used as source. + /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. /// The confidence for change point detection in the range [0, 100]. /// The number of points from the beginning of the sequence used for training. /// The size of the sliding window for computing the p-value. @@ -94,12 +94,12 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata }); /// - /// Create a new instance of for detecting a spike in a time series signal + /// Create , which predicts spikes in time series /// using Singular Spectrum Analysis (SSA). /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// Name of column to transform. If set to , the value of the will be used as source. + /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The number of points from the beginning of the sequence used for training. diff --git a/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs index b42320dd76..7cc669a635 100644 --- a/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs @@ -23,7 +23,7 @@ public class IidAnomalyDetectionBaseWrapper : IStatefulTransformer, ICanSaveMode bool ITransformer.IsRowToRowMapper => ((ITransformer)InternalTransform).IsRowToRowMapper; /// - /// Creates a clone of the transfomer. Used for taking the snapshot of the state. + /// Creates a clone of the transformer. Used for taking the snapshot of the state. /// /// IStatefulTransformer IStatefulTransformer.Clone() => InternalTransform.Clone(); diff --git a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs index 1fa4b7ce6e..7799bf72f9 100644 --- a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs @@ -191,10 +191,56 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat } /// - /// The for detecting a signal change on an - /// independent identically distributed (i.i.d.) time series. - /// Detection is based on adaptive kernel density estimation and martingales. + /// The to detect a signal change on an + /// independent identically distributed (i.i.d.) + /// time series based on adaptive kernel density estimation and martingales. /// + /// + /// . + /// + /// | Output Column Name | Column Type | Description| + /// | -- | -- | -- | + /// | All input columns | Any | All input columns would pass by without being modified. + /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, p-value, and martingale value. + /// + /// ### Estimator Characteristics + /// | | | + /// | -- | -- | + /// | Machine learning task | Time series analysis | + /// | Is normalization required? | No | + /// | Is caching required? | No | + /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | + /// + /// ### Training Algorithm Details + /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). + /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. + /// + /// ### Anomaly Scorer + /// Once the raw score at a timestamp is computed, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. + /// There are two statistics involved in this scorer, p-value and martingale score. + /// + /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. + /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. + /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. + /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. + /// + /// The martingale score is an extra level of scoring that is built upon the p-value scores. + /// The idea is based on the [Exchangeability Martingales](https://arxiv.org/pdf/1204.3251.pdf) that detect a change of distribution over a stream of i.i.d. values. + /// In short, the value of the martingale score starts increasing significantly when a sequence of small p-values detected in a row; this indicates the change of the distribution of the underlying data generation process. + /// For this very reason, the martingale score is used for change point detection. + /// Given a sequence of most recently observed p-values, $p1, \dots, p_n$, the martingale score is computed as:​ $s(p1, \dots, p_n) = \prod_{i=1}^n \beta(p_i)$. + /// There are two choices of $\beta$: $\beta(p) = e p^{\epsilon - 1}$ for $0 < \epsilon < 1$ or $\beta(p) = \int_{0}^1 \epsilon p^{\epsilon - 1} d\epsilon$. + /// + /// If the martingle score exceeds $s(q_1, \dots, q_n)$ where $q_i=1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value. + /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// ]]> + /// + /// + /// public sealed class IidChangePointEstimator : TrivialEstimator { /// diff --git a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs index 8805731c35..d62c20aad9 100644 --- a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs @@ -171,10 +171,50 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat } /// - /// The for detecting a signal spike on an - /// independent identically distributed (i.i.d.) time series. - /// Detection is based on adaptive kernel density estimation. + /// The to detect a signal spike on an + /// independent identically distributed (i.i.d.) + /// time series based on adaptive kernel density estimation. /// + /// + /// . + /// + /// | Output Column Name | Column Type | Description| + /// | -- | -- | -- | + /// | All input columns | Any | All input columns would pass by without being modified. + /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, and p-value. + /// + /// ### Estimator Characteristics + /// | | | + /// | -- | -- | + /// | Machine learning task | Time series analysis | + /// | Is normalization required? | No | + /// | Is caching required? | No | + /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | + /// + /// ### Training Algorithm Details + /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). + /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. + /// + /// ### Anomaly Scorer + /// Once the raw score at a timestamp is computed, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. + /// There are two statistics involved in this scorer, p-value and martingale score. + /// + /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. + /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. + /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. + /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. + /// This means that the p-value score is a good measure of spikiness and therefore it is used for spike detection + /// + /// If the p-value score exceeds $1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value, which means a spike point is detected. + /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// ]]> + /// + /// + /// public sealed class IidSpikeEstimator : TrivialEstimator { /// diff --git a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs index 86b0589a71..42dabcda12 100644 --- a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs @@ -92,7 +92,7 @@ public class SsaAnomalyDetectionBaseWrapper : IStatefulTransformer, ICanSaveMode bool ITransformer.IsRowToRowMapper => ((ITransformer)InternalTransform).IsRowToRowMapper; /// - /// Creates a clone of the transfomer. Used for taking the snapshot of the state. + /// Creates a clone of the transformer. Used for taking the snapshot of the state. /// /// IStatefulTransformer IStatefulTransformer.Clone() => InternalTransform.Clone(); @@ -340,7 +340,7 @@ private protected override void InitializeAnomalyDetector() private protected override double ComputeRawAnomalyScore(ref Single input, FixedSizeQueue windowedBuffer, long iteration) { - // Get the prediction for the next point opn the series + // Get the prediction for the next point in the series Single expectedValue = 0; _model.PredictNext(ref expectedValue); diff --git a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs index ff5cb4a423..48a08d1baf 100644 --- a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs @@ -26,7 +26,7 @@ namespace Microsoft.ML.Transforms.TimeSeries { /// - /// produced by fitting the to an . + /// produced by fitting the to an . /// public sealed class SsaChangePointDetector : SsaAnomalyDetectionBaseWrapper, IStatefulTransformer { @@ -200,8 +200,56 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat } /// - /// The for detecting a signal change through Singular Spectrum Analysis (SSA) of time series. + /// The to predict change points in time series using Singular Spectrum Analysis. /// + /// + /// . + /// + /// | Output Column Name | Column Type | Description| + /// | -- | -- | -- | + /// | All input columns | Any | All input columns would pass by without being modified. + /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, p-value, and martingale value. + /// + /// ### Estimator Characteristics + /// | | | + /// | -- | -- | + /// | Machine learning task | Time series analysis | + /// | Is normalization required? | No | + /// | Is caching required? | No | + /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | + /// + /// ### Training Algorithm Details + /// This class implements the general anomaly detection transform based on [Singular Spectrum Analysis (SSA)](https://en.wikipedia.org/wiki/Singular_spectrum_analysis). + /// SSA is a powerful framework for decomposing the time-series into trend, seasonality and noise components as well as forecasting the future values of the time-series. + /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. + /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). + /// + /// ### Anomaly Scorer + /// Once the raw score at a timestamp is computed by SSA, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. + /// There are two statistics involved in this scorer, p-value and martingale score. + /// + /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. + /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. + /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. + /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. + /// + /// The martingale score is an extra level of scoring that is built upon the p-value scores. + /// The idea is based on the [Exchangeability Martingales](https://arxiv.org/pdf/1204.3251.pdf) that detect a change of distribution over a stream of i.i.d. values. + /// In short, the value of the martingale score starts increasing significantly when a sequence of small p-values detected in a row; this indicates the change of the distribution of the underlying data generation process. + /// For this very reason, the martingale score is used for change point detection. + /// Given a sequence of most recently observed p-values, $p1, \dots, p_n$, the martingale score is computed as:​ $s(p1, \dots, p_n) = \prod_{i=1}^n \beta(p_i)$. + /// There are two choices of $\beta$: $\beta(p) = e p^{\epsilon - 1}$ for $0 < \epsilon < 1$ or $\beta(p) = \int_{0}^1 \epsilon p^{\epsilon - 1} d\epsilon$. + /// + /// If the martingle score exceeds $s(q_1, \dots, q_n)$ where $q_i=1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value. + /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// ]]> + /// + /// + /// public sealed class SsaChangePointEstimator : IEstimator { private readonly IHost _host; diff --git a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs index f8af5d99dc..a8d180c4ae 100644 --- a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs @@ -181,8 +181,50 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat } /// - /// The for detecting a signal spike through Singular Spectrum Analysis (SSA) of time series. + /// The to predict spikes in time series using Singular Spectrum Analysis. /// + /// + /// . + /// + /// | Output Column Name | Column Type | Description| + /// | -- | -- | -- | + /// | All input columns | Any | All input columns would pass by without being modified. + /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, and p-value. + /// + /// ### Estimator Characteristics + /// | | | + /// | -- | -- | + /// | Machine learning task | Time series analysis | + /// | Is normalization required? | No | + /// | Is caching required? | No | + /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | + /// + /// ### Training Algorithm Details + /// This class implements the general anomaly detection transform based on [Singular Spectrum Analysis (SSA)](https://en.wikipedia.org/wiki/Singular_spectrum_analysis). + /// SSA is a powerful framework for decomposing the time-series into trend, seasonality and noise components as well as forecasting the future values of the time-series. + /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. + /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). + /// + /// ### Anomaly Scorer + /// Once the raw score at a timestamp is computed by SSA, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. + /// There are two statistics involved in this scorer, p-value and martingale score. + /// + /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. + /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. + /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. + /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. + /// This means that the p-value score is a good measure of spikiness and therefore it is used for spike detection + /// + /// If the p-value score exceeds $1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value, which means a spike point is detected. + /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// ]]> + /// + /// + /// public sealed class SsaSpikeEstimator : IEstimator { private readonly IHost _host; From cc0edfa54a8a562362bb36e2c6ec00fbcb283176 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Fri, 19 Apr 2019 15:31:58 -0700 Subject: [PATCH 2/5] Polishment --- docs/api-reference/time-series-scorer.md | 26 +++++++++++++++++++ .../IidChangePointDetector.cs | 19 +------------- .../IidSpikeDetector.cs | 13 +--------- .../SsaChangePointDetector.cs | 19 +------------- .../SsaSpikeDetector.cs | 13 +--------- 5 files changed, 30 insertions(+), 60 deletions(-) create mode 100644 docs/api-reference/time-series-scorer.md diff --git a/docs/api-reference/time-series-scorer.md b/docs/api-reference/time-series-scorer.md new file mode 100644 index 0000000000..48cc8813bc --- /dev/null +++ b/docs/api-reference/time-series-scorer.md @@ -0,0 +1,26 @@ +### Anomaly Scorer +Once the raw score at a timestamp is computed, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. +There are two statistics involved in this scorer, p-value and martingale score. + +#### Spike detection based on p-value +The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. +Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. +More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) +with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. +The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier (also known as a spike). +If the p-value score exceeds $1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value in spike detection, which means a spike point is detected. +Note that $\text{confidence}$ is defined in the signatures of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)) +and [DetectIidChangePoint](xref:Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + + +#### Change point detection based on martingale score +The martingale score is an extra level of scoring that is built upon the p-value scores. +The idea is based on the [Exchangeability Martingales](https://arxiv.org/pdf/1204.3251.pdf) that detect a change of distribution over a stream of i.i.d. values. +In short, the value of the martingale score starts increasing significantly when a sequence of small p-values detected in a row; this indicates the change of the distribution of the underlying data generation process. +Thus, the martingale score is used for change point detection. +Given a sequence of most recently observed p-values, $p1, \dots, p_n$, the martingale score is computed as:? $s(p1, \dots, p_n) = \prod_{i=1}^n \beta(p_i)$. +There are two choices of $\beta$: $\beta(p) = e p^{\epsilon - 1}$ for $0 < \epsilon < 1$ or $\beta(p) = \int_{0}^1 \epsilon p^{\epsilon - 1} d\epsilon$. + +If the martingle score exceeds $s(q_1, \dots, q_n)$ where $q_i=1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value for change point detection. +Note that $\text{confidence}$ is defined in the signatures of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)) or +[DetectIidChangePoint](xref:Microsoft.ML.TimeSeriesCatalog.DetectIidChangePoint(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). diff --git a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs index 7799bf72f9..591c3d1f4c 100644 --- a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs @@ -219,24 +219,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. /// - /// ### Anomaly Scorer - /// Once the raw score at a timestamp is computed, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. - /// There are two statistics involved in this scorer, p-value and martingale score. - /// - /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. - /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. - /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. - /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. - /// - /// The martingale score is an extra level of scoring that is built upon the p-value scores. - /// The idea is based on the [Exchangeability Martingales](https://arxiv.org/pdf/1204.3251.pdf) that detect a change of distribution over a stream of i.i.d. values. - /// In short, the value of the martingale score starts increasing significantly when a sequence of small p-values detected in a row; this indicates the change of the distribution of the underlying data generation process. - /// For this very reason, the martingale score is used for change point detection. - /// Given a sequence of most recently observed p-values, $p1, \dots, p_n$, the martingale score is computed as:​ $s(p1, \dots, p_n) = \prod_{i=1}^n \beta(p_i)$. - /// There are two choices of $\beta$: $\beta(p) = e p^{\epsilon - 1}$ for $0 < \epsilon < 1$ or $\beta(p) = \int_{0}^1 \epsilon p^{\epsilon - 1} d\epsilon$. - /// - /// If the martingle score exceeds $s(q_1, \dots, q_n)$ where $q_i=1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value. - /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> /// /// diff --git a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs index d62c20aad9..43219b9830 100644 --- a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs @@ -199,18 +199,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. /// - /// ### Anomaly Scorer - /// Once the raw score at a timestamp is computed, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. - /// There are two statistics involved in this scorer, p-value and martingale score. - /// - /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. - /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. - /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. - /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. - /// This means that the p-value score is a good measure of spikiness and therefore it is used for spike detection - /// - /// If the p-value score exceeds $1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value, which means a spike point is detected. - /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> /// /// diff --git a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs index 48a08d1baf..bdcf106070 100644 --- a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs @@ -228,24 +228,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). /// - /// ### Anomaly Scorer - /// Once the raw score at a timestamp is computed by SSA, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. - /// There are two statistics involved in this scorer, p-value and martingale score. - /// - /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. - /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. - /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. - /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. - /// - /// The martingale score is an extra level of scoring that is built upon the p-value scores. - /// The idea is based on the [Exchangeability Martingales](https://arxiv.org/pdf/1204.3251.pdf) that detect a change of distribution over a stream of i.i.d. values. - /// In short, the value of the martingale score starts increasing significantly when a sequence of small p-values detected in a row; this indicates the change of the distribution of the underlying data generation process. - /// For this very reason, the martingale score is used for change point detection. - /// Given a sequence of most recently observed p-values, $p1, \dots, p_n$, the martingale score is computed as:​ $s(p1, \dots, p_n) = \prod_{i=1}^n \beta(p_i)$. - /// There are two choices of $\beta$: $\beta(p) = e p^{\epsilon - 1}$ for $0 < \epsilon < 1$ or $\beta(p) = \int_{0}^1 \epsilon p^{\epsilon - 1} d\epsilon$. - /// - /// If the martingle score exceeds $s(q_1, \dots, q_n)$ where $q_i=1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value. - /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> /// /// diff --git a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs index a8d180c4ae..36c3ecdbe6 100644 --- a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs @@ -209,18 +209,7 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). /// - /// ### Anomaly Scorer - /// Once the raw score at a timestamp is computed by SSA, it is fed to the anomaly scorer component to calculate the final anomaly score at that timestamp. - /// There are two statistics involved in this scorer, p-value and martingale score. - /// - /// The p-value score indicates the p-value of the current computed raw score according to a distribution of raw scores. - /// Here, the distribution is estimated based on the most recent raw score values up to certain depth back in the history. - /// More specifically, this distribution is estimated using [kernel density estimation](https://en.wikipedia.org/wiki/Kernel_density_estimation) with the Gaussian [kernels](https://en.wikipedia.org/wiki/Kernel_(statistics)#In_non-parametric_statistics) of adaptive bandwidth. - /// The p-value score is always in $[0, 1]$, and the lower its value, the more likely the current point is an outlier. - /// This means that the p-value score is a good measure of spikiness and therefore it is used for spike detection - /// - /// If the p-value score exceeds $1 - \frac{\text{confidence}}{100}$, the associated timestamp may get a non-zero alert value, which means a spike point is detected. - /// Note that $\text{confidence}$ is defined in the signature of [DetectChangePointBySsa](xref:Microsoft.ML.TimeSeriesCatalog.DetectChangePointBySsa(Microsoft.ML.TransformsCatalog,System.String,System.String,System.Int32,System.Int32,System.Int32,System.Int32,Microsoft.ML.Transforms.TimeSeries.ErrorFunction,Microsoft.ML.Transforms.TimeSeries.MartingaleType,System.Double)). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> /// /// From d73a0fb7e6bff9fe69d5ff55fdb7e902a889e59b Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sat, 20 Apr 2019 14:41:37 -0700 Subject: [PATCH 3/5] Address some comments --- .../ExtensionsCatalog.cs | 20 +++++++++++-------- .../IidAnomalyDetectionBase.cs | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index a47b61d4e8..e4a3da6761 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -16,7 +16,7 @@ public static class TimeSeriesCatalog /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// Column is a vector of type double and size 4. The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. + /// The column data is a vector of . The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score. /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. /// The confidence for change point detection in the range [0, 100]. /// The length of the sliding window on p-values for computing the martingale score. @@ -39,8 +39,10 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo /// time series based on adaptive kernel density estimations and martingale scores. /// /// The transform's catalog. - /// Name of the column resulting from the transformation of . - /// Name of column to transform. The column data must be . The column data must be . If set to , the value of the will be used as source. + /// Name of the column resulting from the transformation of . + /// The column data is a vector of . The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value. + /// Name of column to transform. The column data must be . + /// If set to , the value of the will be used as source. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The argument that determines whether to detect positive or negative anomalies, or both. @@ -61,8 +63,9 @@ public static IidSpikeEstimator DetectIidSpike(this TransformsCatalog catalog, s /// /// The transform's catalog. /// Name of the column resulting from the transformation of . - /// Column is a vector of type double and size 4. The vector contains Alert, Raw Score, P-Value and Martingale score as first four values. - /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. + /// The column data is a vector of . The vector contains 4 elements: alert (non-zero value means a change point), raw score, p-Value and martingale score. + /// Name of column to transform. The column data must be . + /// If set to , the value of the will be used as source. /// The confidence for change point detection in the range [0, 100]. /// The number of points from the beginning of the sequence used for training. /// The size of the sliding window for computing the p-value. @@ -98,13 +101,14 @@ public static SsaChangePointEstimator DetectChangePointBySsa(this TransformsCata /// using Singular Spectrum Analysis (SSA). /// /// The transform's catalog. - /// Name of the column resulting from the transformation of . - /// Name of column to transform. The column data must be . If set to , the value of the will be used as source. + /// Name of the column resulting from the transformation of . + /// The column data is a vector of . The vector contains 3 elements: alert (non-zero value means a spike), raw score, and p-value. + /// Name of column to transform. The column data must be . + /// If set to , the value of the will be used as source. /// The confidence for spike detection in the range [0, 100]. /// The size of the sliding window for computing the p-value. /// The number of points from the beginning of the sequence used for training. /// An upper bound on the largest relevant seasonality in the input time-series. - /// The vector contains Alert, Raw Score, P-Value as first three values. /// The argument that determines whether to detect positive or negative anomalies, or both. /// The function used to compute the error between the expected and the observed value. /// diff --git a/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs index 7cc669a635..36f569b991 100644 --- a/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/IidAnomalyDetectionBase.cs @@ -23,7 +23,7 @@ public class IidAnomalyDetectionBaseWrapper : IStatefulTransformer, ICanSaveMode bool ITransformer.IsRowToRowMapper => ((ITransformer)InternalTransform).IsRowToRowMapper; /// - /// Creates a clone of the transformer. Used for taking the snapshot of the state. + /// Create a clone of the transformer. Used for taking the snapshot of the state. /// /// IStatefulTransformer IStatefulTransformer.Clone() => InternalTransform.Clone(); From 93a97f0ea9dc2101ed09745d89f63a803e08ef03 Mon Sep 17 00:00:00 2001 From: Wei-Sheng Chin Date: Sat, 20 Apr 2019 15:16:07 -0700 Subject: [PATCH 4/5] Address comments --- .../io-time-series-change-point.md | 7 ++++++ docs/api-reference/io-time-series-spike.md | 7 ++++++ docs/api-reference/time-series-iid.md | 4 ++++ docs/api-reference/time-series-props.md | 7 ++++++ docs/api-reference/time-series-ssa.md | 5 +++++ .../IidChangePointDetector.cs | 20 +++-------------- .../IidSpikeDetector.cs | 20 +++-------------- .../SsaChangePointDetector.cs | 22 +++---------------- .../SsaSpikeDetector.cs | 22 +++---------------- 9 files changed, 42 insertions(+), 72 deletions(-) create mode 100644 docs/api-reference/io-time-series-change-point.md create mode 100644 docs/api-reference/io-time-series-spike.md create mode 100644 docs/api-reference/time-series-iid.md create mode 100644 docs/api-reference/time-series-props.md create mode 100644 docs/api-reference/time-series-ssa.md diff --git a/docs/api-reference/io-time-series-change-point.md b/docs/api-reference/io-time-series-change-point.md new file mode 100644 index 0000000000..8f74950119 --- /dev/null +++ b/docs/api-reference/io-time-series-change-point.md @@ -0,0 +1,7 @@ +### Input and Output Columns +There is only one input column and its type is . +This estimator adds the following outputs columns: + +| Output Column Name | Column Type | Description| +| -- | -- | -- | +| `Prediction` | 4-element vector of | It sequentially contains alert level (non-zero value means a change point), score, p-value, and martingale value. | diff --git a/docs/api-reference/io-time-series-spike.md b/docs/api-reference/io-time-series-spike.md new file mode 100644 index 0000000000..35d6ea2918 --- /dev/null +++ b/docs/api-reference/io-time-series-spike.md @@ -0,0 +1,7 @@ +### Input and Output Columns +There is only one input column and its type is . +This estimator adds the following outputs columns: + +| Output Column Name | Column Type | Description| +| -- | -- | -- | +| `Prediction` | 3-element vector of | It sequentially contains alert level (non-zero value means a change point), score, and p-value. | diff --git a/docs/api-reference/time-series-iid.md b/docs/api-reference/time-series-iid.md new file mode 100644 index 0000000000..b366c5e2d2 --- /dev/null +++ b/docs/api-reference/time-series-iid.md @@ -0,0 +1,4 @@ +### Training Algorithm Details +This trainer assumes that data points collected in the time series are independently sampled from the same distribution (independent identically distributed). +Thus, the value at the current timestamp can be viewed as the value at the next timestamp in expectation. +If the observed value at timestamp $t-1$ is $p$, the predicted value at $t$ timestamp would be $p$ as well. diff --git a/docs/api-reference/time-series-props.md b/docs/api-reference/time-series-props.md new file mode 100644 index 0000000000..6bab332edf --- /dev/null +++ b/docs/api-reference/time-series-props.md @@ -0,0 +1,7 @@ +### Estimator Characteristics +| | | +| -- | -- | +| Machine learning task | Anomaly detection | +| Is normalization required? | No | +| Is caching required? | No | +| Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | diff --git a/docs/api-reference/time-series-ssa.md b/docs/api-reference/time-series-ssa.md new file mode 100644 index 0000000000..c5df2ef6eb --- /dev/null +++ b/docs/api-reference/time-series-ssa.md @@ -0,0 +1,5 @@ +### Training Algorithm Details +This class implements the general anomaly detection transform based on [Singular Spectrum Analysis (SSA)](https://en.wikipedia.org/wiki/Singular_spectrum_analysis). +SSA is a powerful framework for decomposing the time-series into trend, seasonality and noise components as well as forecasting the future values of the time-series. +In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. +For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). diff --git a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs index 591c3d1f4c..b9aac25f90 100644 --- a/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidChangePointDetector.cs @@ -199,25 +199,11 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// . + /// [!include[io](~/../docs/samples/docs/api-reference/io-time-series-change-point.md)] /// - /// | Output Column Name | Column Type | Description| - /// | -- | -- | -- | - /// | All input columns | Any | All input columns would pass by without being modified. - /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, p-value, and martingale value. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// - /// ### Estimator Characteristics - /// | | | - /// | -- | -- | - /// | Machine learning task | Time series analysis | - /// | Is normalization required? | No | - /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | - /// - /// ### Training Algorithm Details - /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). - /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-iid.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> diff --git a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs index 43219b9830..90f57b6311 100644 --- a/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/IidSpikeDetector.cs @@ -179,25 +179,11 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// . + /// [!include[io](~/../docs/samples/docs/api-reference/io-time-series-spike.md)] /// - /// | Output Column Name | Column Type | Description| - /// | -- | -- | -- | - /// | All input columns | Any | All input columns would pass by without being modified. - /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, and p-value. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// - /// ### Estimator Characteristics - /// | | | - /// | -- | -- | - /// | Machine learning task | Time series analysis | - /// | Is normalization required? | No | - /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | - /// - /// ### Training Algorithm Details - /// This trainer assumes that data points collected in the considered time series are independently sampled from the same distribution (independent identically distributed). - /// Thus, the value at the current timestamp can be viewed as the predicted value, raw score, at the next timestamp in expectation. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-iid.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> diff --git a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs index bdcf106070..e6c72d4399 100644 --- a/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaChangePointDetector.cs @@ -206,27 +206,11 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// . + /// [!include[io](~/../docs/samples/docs/api-reference/io-time-series-change-point.md)] /// - /// | Output Column Name | Column Type | Description| - /// | -- | -- | -- | - /// | All input columns | Any | All input columns would pass by without being modified. - /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, p-value, and martingale value. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// - /// ### Estimator Characteristics - /// | | | - /// | -- | -- | - /// | Machine learning task | Time series analysis | - /// | Is normalization required? | No | - /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | - /// - /// ### Training Algorithm Details - /// This class implements the general anomaly detection transform based on [Singular Spectrum Analysis (SSA)](https://en.wikipedia.org/wiki/Singular_spectrum_analysis). - /// SSA is a powerful framework for decomposing the time-series into trend, seasonality and noise components as well as forecasting the future values of the time-series. - /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. - /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> diff --git a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs index 36c3ecdbe6..43f40570b1 100644 --- a/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs +++ b/src/Microsoft.ML.TimeSeries/SsaSpikeDetector.cs @@ -187,27 +187,11 @@ private static IRowMapper Create(IHostEnvironment env, ModelLoadContext ctx, Dat /// . + /// [!include[io](~/../docs/samples/docs/api-reference/io-time-series-spike.md)] /// - /// | Output Column Name | Column Type | Description| - /// | -- | -- | -- | - /// | All input columns | Any | All input columns would pass by without being modified. - /// | `Prediction` | Known-sized vector of | It contains alert level (non-zero value means a change point), score, and p-value. + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-props.md)] /// - /// ### Estimator Characteristics - /// | | | - /// | -- | -- | - /// | Machine learning task | Time series analysis | - /// | Is normalization required? | No | - /// | Is caching required? | No | - /// | Required NuGet in addition to Microsoft.ML | Microsoft.ML.TimeSeries | - /// - /// ### Training Algorithm Details - /// This class implements the general anomaly detection transform based on [Singular Spectrum Analysis (SSA)](https://en.wikipedia.org/wiki/Singular_spectrum_analysis). - /// SSA is a powerful framework for decomposing the time-series into trend, seasonality and noise components as well as forecasting the future values of the time-series. - /// In principle, SSA performs spectral analysis on the input time-series where each component in the spectrum corresponds to a trend, seasonal or noise component in the time-series. - /// For details of the Singular Spectrum Analysis (SSA), refer to [this document](http://arxiv.org/pdf/1206.6910.pdf). + /// [!include[io](~/../docs/samples/docs/api-reference/time-series-ssa.md)] /// /// [!include[io](~/../docs/samples/docs/api-reference/time-series-scorer.md)] /// ]]> From 8c41fa88a7fecf0c14545dbbcd1bb7d769229068 Mon Sep 17 00:00:00 2001 From: Shauheen Date: Sat, 20 Apr 2019 15:43:33 -0700 Subject: [PATCH 5/5] Fixing typos --- docs/api-reference/io-time-series-change-point.md | 2 +- docs/api-reference/io-time-series-spike.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/api-reference/io-time-series-change-point.md b/docs/api-reference/io-time-series-change-point.md index 8f74950119..b33bde3345 100644 --- a/docs/api-reference/io-time-series-change-point.md +++ b/docs/api-reference/io-time-series-change-point.md @@ -1,6 +1,6 @@ ### Input and Output Columns There is only one input column and its type is . -This estimator adds the following outputs columns: +This estimator adds the following output columns: | Output Column Name | Column Type | Description| | -- | -- | -- | diff --git a/docs/api-reference/io-time-series-spike.md b/docs/api-reference/io-time-series-spike.md index 35d6ea2918..877672f7c3 100644 --- a/docs/api-reference/io-time-series-spike.md +++ b/docs/api-reference/io-time-series-spike.md @@ -1,6 +1,6 @@ ### Input and Output Columns There is only one input column and its type is . -This estimator adds the following outputs columns: +This estimator adds the following output columns: | Output Column Name | Column Type | Description| | -- | -- | -- |