diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs new file mode 100644 index 0000000000..8ea98c8d99 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/Forecasting.cs @@ -0,0 +1,94 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Transforms.TimeSeries; +using Microsoft.ML.TimeSeries; + +namespace Samples.Dynamic +{ + public static class Forecasting + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot) and then + // does forecasting. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern + const int SeasonalitySize = 5; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup arguments. + var inputColumnName = nameof(TimeSeriesData.Value); + + // Instantiate the forecasting model. + var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, + 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + + // Train. + model.Train(dataView); + + // Forecast next five values. + var forecast = model.Forecast(5); + Console.WriteLine($"Forecasted values:"); + Console.WriteLine("[{0}]", string.Join(", ", forecast)); + // Forecasted values: + // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + + // Update with new observations. + dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); + model.Update(dataView); + + // Checkpoint. + ml.Model.SaveForecastingModel(model, "model.zip"); + + // Load the checkpointed model from disk. + var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + + // Forecast with the checkpointed model loaded from disk. + forecast = modelCopy.Forecast(5); + Console.WriteLine("[{0}]", string.Join(", ", forecast)); + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + + // Forecast with the original model(that was checkpointed to disk). + forecast = model.Forecast(5); + Console.WriteLine("[{0}]", string.Join(", ", forecast)); + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs new file mode 100644 index 0000000000..b5c3c5c9c2 --- /dev/null +++ b/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/ForecastingWithConfidenceInterval.cs @@ -0,0 +1,113 @@ +using System; +using System.Collections.Generic; +using Microsoft.ML; +using Microsoft.ML.Transforms.TimeSeries; +using Microsoft.ML.TimeSeries; + +namespace Samples.Dynamic +{ + public static class ForecastingWithConfidenceInternal + { + // This example creates a time series (list of Data with the i-th element corresponding to the i-th time slot) and then + // does forecasting. + public static void Example() + { + // Create a new ML context, for ML.NET operations. It can be used for exception tracking and logging, + // as well as the source of randomness. + var ml = new MLContext(); + + // Generate sample series data with a recurring pattern + const int SeasonalitySize = 5; + var data = new List() + { + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + + new TimeSeriesData(0), + new TimeSeriesData(1), + new TimeSeriesData(2), + new TimeSeriesData(3), + new TimeSeriesData(4), + }; + + // Convert data to IDataView. + var dataView = ml.Data.LoadFromEnumerable(data); + + // Setup arguments. + var inputColumnName = nameof(TimeSeriesData.Value); + + // Instantiate forecasting model. + var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler(inputColumnName, data.Count, SeasonalitySize + 1, SeasonalitySize, + 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + + // Train. + model.Train(dataView); + + // Forecast next five values with confidence internal. + float[] forecast; + float[] confidenceIntervalLowerBounds; + float[] confidenceIntervalUpperBounds; + model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); + PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); + // Forecasted values: + // [2.452744, 2.589339, 2.729183, 2.873005, 3.028931] + // Confidence intervals: + // [-0.2235315 - 5.12902] [-0.08777174 - 5.266451] [0.05076938 - 5.407597] [0.1925406 - 5.553469] [0.3469928 - 5.71087] + + // Update with new observations. + dataView = ml.Data.LoadFromEnumerable(new List() { new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0), new TimeSeriesData(0) }); + model.Update(dataView); + + // Checkpoint. + ml.Model.SaveForecastingModel(model, "model.zip"); + + // Load the checkpointed model from disk. + var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + + // Forecast with the checkpointed model loaded from disk. + modelCopy.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); + PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); + // Forecasted values: + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + // Confidence intervals: + // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + + // Forecast with the original model(that was checkpointed to disk). + model.ForecastWithConfidenceIntervals(5, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds); + PrintForecastValuesAndIntervals(forecast, confidenceIntervalLowerBounds, confidenceIntervalUpperBounds); + // Forecasted values: + // [0.8681176, 0.8185108, 0.8069275, 0.84405, 0.9455081] + // Confidence intervals: + // [-1.808158 - 3.544394] [-1.8586 - 3.495622] [-1.871486 - 3.485341] [-1.836414 - 3.524514] [-1.736431 - 3.627447] + } + + static void PrintForecastValuesAndIntervals(float[] forecast, float[] confidenceIntervalLowerBounds, float[] confidenceIntervalUpperBounds) + { + Console.WriteLine($"Forecasted values:"); + Console.WriteLine("[{0}]", string.Join(", ", forecast)); + Console.WriteLine($"Confidence intervals:"); + for (int index = 0; index < forecast.Length; index++) + Console.Write($"[{confidenceIntervalLowerBounds[index]} - {confidenceIntervalUpperBounds[index]}] "); + Console.WriteLine(); + } + + class TimeSeriesData + { + public float Value; + + public TimeSeriesData(float value) + { + Value = value; + } + } + } +} diff --git a/src/Microsoft.ML.Data/MLContext.cs b/src/Microsoft.ML.Data/MLContext.cs index 5dbf747927..77838e8829 100644 --- a/src/Microsoft.ML.Data/MLContext.cs +++ b/src/Microsoft.ML.Data/MLContext.cs @@ -46,6 +46,11 @@ public sealed class MLContext : IHostEnvironment /// public AnomalyDetectionCatalog AnomalyDetection { get; } + /// + /// Trainers and tasks specific to forecasting problems. + /// + public ForecastingCatalog Forecasting { get; } + /// /// Data processing operations. /// @@ -89,6 +94,7 @@ public MLContext(int? seed = null) Clustering = new ClusteringCatalog(_env); Ranking = new RankingCatalog(_env); AnomalyDetection = new AnomalyDetectionCatalog(_env); + Forecasting = new ForecastingCatalog(_env); Transforms = new TransformsCatalog(_env); Model = new ModelOperationsCatalog(_env); Data = new DataOperationsCatalog(_env); diff --git a/src/Microsoft.ML.Data/TrainCatalog.cs b/src/Microsoft.ML.Data/TrainCatalog.cs index a1280ce090..2331b51792 100644 --- a/src/Microsoft.ML.Data/TrainCatalog.cs +++ b/src/Microsoft.ML.Data/TrainCatalog.cs @@ -704,4 +704,31 @@ public AnomalyDetectionMetrics Evaluate(IDataView data, string labelColumnName = return eval.Evaluate(data, labelColumnName, scoreColumnName, predictedLabelColumnName); } } + + /// + /// Class used by to create instances of forecasting components. + /// + public sealed class ForecastingCatalog : TrainCatalogBase + { + /// + /// The list of trainers for performing forecasting. + /// + public Forecasters Trainers { get; } + + internal ForecastingCatalog(IHostEnvironment env) : base(env, nameof(ForecastingCatalog)) + { + Trainers = new Forecasters(this); + } + + /// + /// Class used by to create instances of forecasting trainers. + /// + public sealed class Forecasters : CatalogInstantiatorBase + { + internal Forecasters(ForecastingCatalog catalog) + : base(catalog) + { + } + } + } } diff --git a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs index f6b230dce2..da14de2bcb 100644 --- a/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs +++ b/src/Microsoft.ML.TimeSeries/AdaptiveSingularSpectrumSequenceModeler.cs @@ -10,10 +10,17 @@ using Microsoft.ML.Internal.CpuMath; using Microsoft.ML.Internal.Utilities; using Microsoft.ML.Runtime; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; -[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler), typeof(AdaptiveSingularSpectrumSequenceModeler), null, typeof(SignatureLoadModel), +[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), + typeof(AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal), null, typeof(SignatureLoadModel), "SSA Sequence Modeler", + AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal.LoaderSignature)] + +[assembly: LoadableClass(typeof(AdaptiveSingularSpectrumSequenceModeler), + typeof(AdaptiveSingularSpectrumSequenceModeler), null, typeof(SignatureLoadModel), + "SSA Sequence Modeler Wrapper", AdaptiveSingularSpectrumSequenceModeler.LoaderSignature)] namespace Microsoft.ML.Transforms.TimeSeries @@ -22,10 +29,11 @@ namespace Microsoft.ML.Transforms.TimeSeries /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. /// - internal sealed class AdaptiveSingularSpectrumSequenceModeler : SequenceModelerBase + public sealed class AdaptiveSingularSpectrumSequenceModeler : ICanForecast { - internal const string LoaderSignature = "SSAModel"; - + /// + /// Ranking selection method for the signal. + /// public enum RankSelectionMethod { Fixed, @@ -33,24 +41,17 @@ public enum RankSelectionMethod Fast } - internal sealed class SsaForecastResult : ForecastResultBase - { - public VBuffer ForecastStandardDeviation; - public VBuffer UpperBound; - public VBuffer LowerBound; - public Single ConfidenceLevel; - - internal bool CanComputeForecastIntervals; - internal Single BoundOffset; - - public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } - } - + /// + /// Growth ratio. Defined as Growth^(1/TimeSpan). + /// public struct GrowthRatio { private int _timeSpan; private Double _growth; + /// + /// Time span of growth ratio. Must be strictly positive. + /// public int TimeSpan { get @@ -64,6 +65,9 @@ public int TimeSpan } } + /// + /// Growth. Must be non-negative. + /// public Double Growth { get @@ -89,1462 +93,1612 @@ public GrowthRatio(int timeSpan = 1, double growth = Double.PositiveInfinity) public Double Ratio { get { return Math.Pow(_growth, 1d / _timeSpan); } } } - public sealed class ModelInfo + private AdaptiveSingularSpectrumSequenceModelerInternal _modeler; + + private readonly string _inputColumnName; + + internal const string LoaderSignature = "ForecastModel"; + + private readonly IHost _host; + + private static VersionInfo GetVersionInfo() { - /// - /// The singular values of the SSA of the input time-series - /// - public Single[] Spectrum; + return new VersionInfo( + modelSignature: "SSAMODLW", + verWrittenCur: 0x00010001, // Initial + verReadableCur: 0x00010001, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModeler).Assembly.FullName); + } - /// - /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsAfterStabilization; + internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, + bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + _host.CheckParam(!string.IsNullOrEmpty(inputColumnName), nameof(inputColumnName)); - /// - /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) - /// - public Complex[] RootsBeforeStabilization; + _inputColumnName = inputColumnName; + _modeler = new AdaptiveSingularSpectrumSequenceModelerInternal(env, trainSize, seriesLength, windowSize, discountFactor, + rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); + } - /// - /// The rank of the model - /// - public int Rank; + internal AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, ModelLoadContext ctx) + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + _inputColumnName = ctx.Reader.ReadString(); + ctx.LoadModel(_host, out _modeler, "ForecastWrapper"); + } - /// - /// The window size used to compute the SSA model - /// - public int WindowSize; + /// + /// This class implements basic Singular Spectrum Analysis (SSA) model for modeling univariate time-series. + /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + internal sealed class AdaptiveSingularSpectrumSequenceModelerInternal : SequenceModelerBase + { + internal const string LoaderSignature = "SSAModel"; - /// - /// The auto-regressive coefficients learned by the model - /// - public Single[] AutoRegressiveCoefficients; + internal sealed class SsaForecastResult : ForecastResultBase + { + public VBuffer ForecastStandardDeviation; + public VBuffer UpperBound; + public VBuffer LowerBound; + public Single ConfidenceLevel; - /// - /// The flag indicating whether the model has been trained - /// - public bool IsTrained; + internal bool CanComputeForecastIntervals; + internal Single BoundOffset; - /// - /// The flag indicating a naive model is trained instead of SSA - /// - public bool IsNaiveModelTrained; + public bool IsVarianceValid { get { return CanComputeForecastIntervals; } } + } + + public sealed class ModelInfo + { + /// + /// The singular values of the SSA of the input time-series + /// + public Single[] Spectrum; + + /// + /// The roots of the characteristic polynomial after stabilization (meaningful only if the model is stabilized.) + /// + public Complex[] RootsAfterStabilization; + + /// + /// The roots of the characteristic polynomial before stabilization (meaningful only if the model is stabilized.) + /// + public Complex[] RootsBeforeStabilization; + + /// + /// The rank of the model + /// + public int Rank; + + /// + /// The window size used to compute the SSA model + /// + public int WindowSize; + + /// + /// The auto-regressive coefficients learned by the model + /// + public Single[] AutoRegressiveCoefficients; + + /// + /// The flag indicating whether the model has been trained + /// + public bool IsTrained; + + /// + /// The flag indicating a naive model is trained instead of SSA + /// + public bool IsNaiveModelTrained; + + /// + /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) + /// + public bool IsExponentialTrendPresent; + + /// + /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) + /// + public bool IsPolynomialTrendPresent; + + /// + /// The flag indicating whether the learned model has been stabilized + /// + public bool IsStabilized; + + /// + /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed + /// (meaningful only if the model is stabilized.) + /// + public bool IsArtificialSeasonalityRemoved; + + /// + /// The exponential trend magnitude (meaningful only if the model is stabilized.) + /// + public Double ExponentialTrendFactor; + } + + private ModelInfo _info; /// - /// The flag indicating whether the learned model has an exponential trend (meaningful only if the model is stabilized.) + /// Returns the meta information about the learned model. /// - public bool IsExponentialTrendPresent; + public ModelInfo Info + { + get { return _info; } + } + + private Single[] _alpha; + private Single[] _state; + private readonly FixedSizeQueue _buffer; + private CpuAlignedVector _x; + private CpuAlignedVector _xSmooth; + private int _windowSize; + private readonly int _seriesLength; + private readonly RankSelectionMethod _rankSelectionMethod; + private readonly Single _discountFactor; + private readonly int _trainSize; + private int _maxRank; + private readonly Double _maxTrendRatio; + private readonly bool _shouldStablize; + private readonly bool _shouldMaintainInfo; + + private readonly IHost _host; + + private CpuAlignedMatrixRow _wTrans; + private Single _observationNoiseVariance; + private Single _observationNoiseMean; + private Single _autoregressionNoiseVariance; + private Single _autoregressionNoiseMean; + + private int _rank; + private CpuAlignedVector _y; + private Single _nextPrediction; /// - /// The flag indicating whether the learned model has a polynomial trend (meaningful only if the model is stabilized.) + /// Determines whether the confidence interval required for forecasting. /// - public bool IsPolynomialTrendPresent; + public bool ShouldComputeForecastIntervals { get; set; } /// - /// The flag indicating whether the learned model has been stabilized + /// Returns the rank of the subspace used for SSA projection (parameter r). /// - public bool IsStabilized; + public int Rank { get { return _rank; } } /// - /// The flag indicating whether any artificial seasonality (a seasonality with period greater than the window size) is removed - /// (meaningful only if the model is stabilized.) + /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. /// - public bool IsArtificialSeasonalityRemoved; + public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } /// - /// The exponential trend magnitude (meaningful only if the model is stabilized.) + /// Returns the last series observation fed to the model. /// - public Double ExponentialTrendFactor; - } - - private ModelInfo _info; - - /// - /// Returns the meta information about the learned model. - /// - public ModelInfo Info - { - get { return _info; } - } + public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } - private Single[] _alpha; - private Single[] _state; - private readonly FixedSizeQueue _buffer; - private CpuAlignedVector _x; - private CpuAlignedVector _xSmooth; - private int _windowSize; - private readonly int _seriesLength; - private readonly RankSelectionMethod _rankSelectionMethod; - private readonly Single _discountFactor; - private readonly int _trainSize; - private int _maxRank; - private readonly Double _maxTrendRatio; - private readonly bool _shouldStablize; - private readonly bool _shouldMaintainInfo; + private static VersionInfo GetVersionInfo() + { + return new VersionInfo( + modelSignature: "SSAMODLR", + //verWrittenCur: 0x00010001, // Initial + verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction + verReadableCur: 0x00010002, + verWeCanReadBack: 0x00010001, + loaderSignature: LoaderSignature, + loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModelerInternal).Assembly.FullName); + } - private readonly IHost _host; + private const int VersionSavingStateAndPrediction = 0x00010002; - private CpuAlignedMatrixRow _wTrans; - private Single _observationNoiseVariance; - private Single _observationNoiseMean; - private Single _autoregressionNoiseVariance; - private Single _autoregressionNoiseMean; + /// + /// The constructor for Adaptive SSA model. + /// + /// The exception context. + /// The length of series from the begining used for training. + /// The length of series that is kept in buffer for modeling (parameter N). + /// The length of the window on the series for building the trajectory matrix (parameter L). + /// The discount factor in [0,1] used for online updates (default = 1). + /// The rank selection method (default = Exact). + /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. + /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. + /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, + RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, + bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) + : base() + { + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on + _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); + _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); + _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); + + if (maxRank != null) + { + _maxRank = (int)maxRank; + _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), + "The max rank should be in [1, windowSize)."); + } + else + _maxRank = windowSize - 1; - private int _rank; - private CpuAlignedVector _y; - private Single _nextPrediction; + _rankSelectionMethod = rankSelectionMethod; + if (_rankSelectionMethod == RankSelectionMethod.Fixed) + { + if (rank != null) + { + _rank = (int)rank; + _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); + } + else + _rank = _maxRank; + } - /// - /// Determines whether the confidence interval required for forecasting. - /// - public bool ShouldComputeForecastIntervals { get; set; } + _seriesLength = seriesLength; + _windowSize = windowSize; + _trainSize = trainSize; + _discountFactor = discountFactor; - /// - /// Returns the rank of the subspace used for SSA projection (parameter r). - /// - public int Rank { get { return _rank; } } + _buffer = new FixedSizeQueue(seriesLength); - /// - /// Returns the smoothed (via SSA projection) version of the last series observation fed to the model. - /// - public Single LastSmoothedValue { get { return _state[_windowSize - 2]; } } + _alpha = new Single[windowSize - 1]; + _state = new Single[windowSize - 1]; + _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); + ShouldComputeForecastIntervals = shouldComputeForecastIntervals; - /// - /// Returns the last series observation fed to the model. - /// - public Single LastValue { get { return _buffer.Count > 0 ? _buffer[_buffer.Count - 1] : Single.NaN; } } + _observationNoiseVariance = 0; + _autoregressionNoiseVariance = 0; + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + _shouldStablize = shouldstablize; + _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; - private static VersionInfo GetVersionInfo() - { - return new VersionInfo( - modelSignature: "SSAMODLR", - //verWrittenCur: 0x00010001, // Initial - verWrittenCur: 0x00010002, // Added saving _state and _nextPrediction - verReadableCur: 0x00010002, - verWeCanReadBack: 0x00010001, - loaderSignature: LoaderSignature, - loaderAssemblyName: typeof(AdaptiveSingularSpectrumSequenceModeler).Assembly.FullName); - } + _shouldMaintainInfo = shouldMaintainInfo; + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } + } - private const int VersionSavingStateAndPrediction = 0x00010002; + /// + /// The copy constructor. + /// + /// An object whose contents are copied to the current object. + private AdaptiveSingularSpectrumSequenceModelerInternal(AdaptiveSingularSpectrumSequenceModelerInternal model) + { + _host = model._host.Register(LoaderSignature); + _host.Assert(model._windowSize >= 2); + _host.Assert(model._seriesLength > model._windowSize); + _host.Assert(model._trainSize > 2 * model._windowSize); + _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); + _host.Assert(1 <= model._rank && model._rank < model._windowSize); + + _rank = model._rank; + _maxRank = model._maxRank; + _rankSelectionMethod = model._rankSelectionMethod; + _seriesLength = model._seriesLength; + _windowSize = model._windowSize; + _trainSize = model._trainSize; + _discountFactor = model._discountFactor; + ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; + _observationNoiseVariance = model._observationNoiseVariance; + _autoregressionNoiseVariance = model._autoregressionNoiseVariance; + _observationNoiseMean = model._observationNoiseMean; + _autoregressionNoiseMean = model._autoregressionNoiseMean; + _nextPrediction = model._nextPrediction; + _maxTrendRatio = model._maxTrendRatio; + _shouldStablize = model._shouldStablize; + _shouldMaintainInfo = model._shouldMaintainInfo; + _info = model._info; + _buffer = model._buffer.Clone(); + _alpha = new Single[_windowSize - 1]; + Array.Copy(model._alpha, _alpha, _windowSize - 1); + _state = new Single[_windowSize - 1]; + Array.Copy(model._state, _state, _windowSize - 1); - /// - /// The constructor for Adaptive SSA model. - /// - /// The exception context. - /// The length of series from the begining used for training. - /// The length of series that is kept in buffer for modeling (parameter N). - /// The length of the window on the series for building the trajectory matrix (parameter L). - /// The discount factor in [0,1] used for online updates (default = 1). - /// The rank selection method (default = Exact). - /// The desired rank of the subspace used for SSA projection (parameter r). This parameter should be in the range in [1, windowSize]. - /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) - /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to windowSize - 1. - /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = true) - /// The flag determining whether the model should be stabilized. - /// The flag determining whether the meta information for the model needs to be maintained. - /// The maximum growth on the exponential trend - public AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, - RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, int? rank = null, int? maxRank = null, - bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) - : base() - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); - _host.CheckParam(windowSize >= 2, nameof(windowSize), "The window size should be at least 2."); // ...because otherwise we have nothing to autoregress on - _host.CheckParam(seriesLength > windowSize, nameof(seriesLength), "The series length should be greater than the window size."); - _host.Check(trainSize > 2 * windowSize, "The input series length for training should be greater than twice the window size."); - _host.CheckParam(0 <= discountFactor && discountFactor <= 1, nameof(discountFactor), "The discount factor should be in [0,1]."); + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - if (maxRank != null) - { - _maxRank = (int)maxRank; - _host.CheckParam(1 <= _maxRank && _maxRank < windowSize, nameof(maxRank), - "The max rank should be in [1, windowSize)."); + if (model._wTrans != null) + { + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + _wTrans.CopyFrom(model._wTrans); + } } - else - _maxRank = windowSize - 1; - _rankSelectionMethod = rankSelectionMethod; - if (_rankSelectionMethod == RankSelectionMethod.Fixed) + public AdaptiveSingularSpectrumSequenceModelerInternal(IHostEnvironment env, ModelLoadContext ctx) { - if (rank != null) + Contracts.CheckValue(env, nameof(env)); + _host = env.Register(LoaderSignature); + + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + _seriesLength = ctx.Reader.ReadInt32(); + // Do an early check. We'll have the stricter check later. + _host.CheckDecode(_seriesLength > 2); + + _windowSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_windowSize >= 2); + _host.CheckDecode(_seriesLength > _windowSize); + + _trainSize = ctx.Reader.ReadInt32(); + _host.CheckDecode(_trainSize > 2 * _windowSize); + + _rank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _rank && _rank < _windowSize); + + _discountFactor = ctx.Reader.ReadSingle(); + _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); + + byte temp; + temp = ctx.Reader.ReadByte(); + _rankSelectionMethod = (RankSelectionMethod)temp; + bool isWeightSet = ctx.Reader.ReadBoolByte(); + + _alpha = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); + + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) { - _rank = (int)rank; - _host.CheckParam(1 <= _rank && _rank < windowSize, nameof(rank), "The rank should be in [1, windowSize)."); + _state = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); } else - _rank = _maxRank; - } + _state = new Single[_windowSize - 1]; - _seriesLength = seriesLength; - _windowSize = windowSize; - _trainSize = trainSize; - _discountFactor = discountFactor; + ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); - _buffer = new FixedSizeQueue(seriesLength); + _observationNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_observationNoiseVariance >= 0); - _alpha = new Single[windowSize - 1]; - _state = new Single[windowSize - 1]; - _x = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(windowSize, CpuMathUtils.GetVectorAlignment()); - ShouldComputeForecastIntervals = shouldComputeForecastIntervals; + _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); + _host.CheckDecode(_autoregressionNoiseVariance >= 0); - _observationNoiseVariance = 0; - _autoregressionNoiseVariance = 0; - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - _shouldStablize = shouldstablize; - _maxTrendRatio = maxGrowth == null ? Double.PositiveInfinity : ((GrowthRatio)maxGrowth).Ratio; + _observationNoiseMean = ctx.Reader.ReadSingle(); + _autoregressionNoiseMean = ctx.Reader.ReadSingle(); + if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) + _nextPrediction = ctx.Reader.ReadSingle(); - _shouldMaintainInfo = shouldMaintainInfo; - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } - } + _maxRank = ctx.Reader.ReadInt32(); + _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); - /// - /// The copy constructor. - /// - /// An object whose contents are copied to the current object. - private AdaptiveSingularSpectrumSequenceModeler(AdaptiveSingularSpectrumSequenceModeler model) - { - _host = model._host.Register(LoaderSignature); - _host.Assert(model._windowSize >= 2); - _host.Assert(model._seriesLength > model._windowSize); - _host.Assert(model._trainSize > 2 * model._windowSize); - _host.Assert(0 <= model._discountFactor && model._discountFactor <= 1); - _host.Assert(1 <= model._rank && model._rank < model._windowSize); - - _rank = model._rank; - _maxRank = model._maxRank; - _rankSelectionMethod = model._rankSelectionMethod; - _seriesLength = model._seriesLength; - _windowSize = model._windowSize; - _trainSize = model._trainSize; - _discountFactor = model._discountFactor; - ShouldComputeForecastIntervals = model.ShouldComputeForecastIntervals; - _observationNoiseVariance = model._observationNoiseVariance; - _autoregressionNoiseVariance = model._autoregressionNoiseVariance; - _observationNoiseMean = model._observationNoiseMean; - _autoregressionNoiseMean = model._autoregressionNoiseMean; - _nextPrediction = model._nextPrediction; - _maxTrendRatio = model._maxTrendRatio; - _shouldStablize = model._shouldStablize; - _shouldMaintainInfo = model._shouldMaintainInfo; - _info = model._info; - _buffer = model._buffer.Clone(); - _alpha = new Single[_windowSize - 1]; - Array.Copy(model._alpha, _alpha, _windowSize - 1); - _state = new Single[_windowSize - 1]; - Array.Copy(model._state, _state, _windowSize - 1); - - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - - if (model._wTrans != null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - _wTrans.CopyFrom(model._wTrans); - } - } + _shouldStablize = ctx.Reader.ReadBoolByte(); + _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - public AdaptiveSingularSpectrumSequenceModeler(IHostEnvironment env, ModelLoadContext ctx) - { - Contracts.CheckValue(env, nameof(env)); - _host = env.Register(LoaderSignature); + _info.IsStabilized = _shouldStablize; + _info.Rank = _rank; + _info.WindowSize = _windowSize; + } - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - _seriesLength = ctx.Reader.ReadInt32(); - // Do an early check. We'll have the stricter check later. - _host.CheckDecode(_seriesLength > 2); - - _windowSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_windowSize >= 2); - _host.CheckDecode(_seriesLength > _windowSize); - - _trainSize = ctx.Reader.ReadInt32(); - _host.CheckDecode(_trainSize > 2 * _windowSize); - - _rank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _rank && _rank < _windowSize); - - _discountFactor = ctx.Reader.ReadSingle(); - _host.CheckDecode(0 <= _discountFactor && _discountFactor <= 1); - - byte temp; - temp = ctx.Reader.ReadByte(); - _rankSelectionMethod = (RankSelectionMethod)temp; - bool isWeightSet = ctx.Reader.ReadBoolByte(); - - _alpha = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_alpha) == _windowSize - 1); - - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) - { - _state = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(_state) == _windowSize - 1); + _maxTrendRatio = ctx.Reader.ReadDouble(); + _host.CheckDecode(_maxTrendRatio >= 0); + + if (isWeightSet) + { + var tempArray = ctx.Reader.ReadFloatArray(); + _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + int i = 0; + _wTrans.CopyFrom(tempArray, ref i); + tempArray = ctx.Reader.ReadFloatArray(); + i = 0; + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _y.CopyFrom(tempArray, ref i); + } + + _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); } - else - _state = new Single[_windowSize - 1]; - ShouldComputeForecastIntervals = ctx.Reader.ReadBoolByte(); + private protected override void SaveModel(ModelSaveContext ctx) + { + _host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + + _host.Assert(_windowSize >= 2); + _host.Assert(_seriesLength > _windowSize); + _host.Assert(_trainSize > 2 * _windowSize); + _host.Assert(0 <= _discountFactor && _discountFactor <= 1); + _host.Assert(1 <= _rank && _rank < _windowSize); + _host.Assert(Utils.Size(_alpha) == _windowSize - 1); + _host.Assert(_observationNoiseVariance >= 0); + _host.Assert(_autoregressionNoiseVariance >= 0); + _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); + _host.Assert(_maxTrendRatio >= 0); + + // *** Binary format *** + // int: _seriesLength + // int: _windowSize + // int: _trainSize + // int: _rank + // float: _discountFactor + // RankSelectionMethod: _rankSelectionMethod + // bool: _isWeightSet + // float[]: _alpha + // float[]: _state + // bool: ShouldComputeForecastIntervals + // float: _observationNoiseVariance + // float: _autoregressionNoiseVariance + // float: _observationNoiseMean + // float: _autoregressionNoiseMean + // float: _nextPrediction + // int: _maxRank + // bool: _shouldStablize + // bool: _shouldMaintainInfo + // double: _maxTrendRatio + // float[]: _wTrans (only if _isWeightSet == true) + + ctx.Writer.Write(_seriesLength); + ctx.Writer.Write(_windowSize); + ctx.Writer.Write(_trainSize); + ctx.Writer.Write(_rank); + ctx.Writer.Write(_discountFactor); + ctx.Writer.Write((byte)_rankSelectionMethod); + ctx.Writer.WriteBoolByte(_wTrans != null); + ctx.Writer.WriteSingleArray(_alpha); + ctx.Writer.WriteSingleArray(_state); + ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); + ctx.Writer.Write(_observationNoiseVariance); + ctx.Writer.Write(_autoregressionNoiseVariance); + ctx.Writer.Write(_observationNoiseMean); + ctx.Writer.Write(_autoregressionNoiseMean); + ctx.Writer.Write(_nextPrediction); + ctx.Writer.Write(_maxRank); + ctx.Writer.WriteBoolByte(_shouldStablize); + ctx.Writer.WriteBoolByte(_shouldMaintainInfo); + ctx.Writer.Write(_maxTrendRatio); + + if (_wTrans != null) + { + // REVIEW: this may not be the most efficient way for serializing an aligned matrix. + var tempArray = new Single[_rank * _windowSize]; + int iv = 0; + _wTrans.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); + tempArray = new float[_rank]; + iv = 0; + _y.CopyTo(tempArray, ref iv); + ctx.Writer.WriteSingleArray(tempArray); + } - _observationNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_observationNoiseVariance >= 0); + TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); + } - _autoregressionNoiseVariance = ctx.Reader.ReadSingle(); - _host.CheckDecode(_autoregressionNoiseVariance >= 0); + private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) + { + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); - _observationNoiseMean = ctx.Reader.ReadSingle(); - _autoregressionNoiseMean = ctx.Reader.ReadSingle(); - if (ctx.Header.ModelVerReadable >= VersionSavingStateAndPrediction) - _nextPrediction = ctx.Reader.ReadSingle(); + var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); - _maxRank = ctx.Reader.ReadInt32(); - _host.CheckDecode(1 <= _maxRank && _maxRank <= _windowSize - 1); + var v = new Single[k]; + int i; - _shouldStablize = ctx.Reader.ReadBoolByte(); - _shouldMaintainInfo = ctx.Reader.ReadBoolByte(); - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + for (i = 0; i < tMat.SeriesLength; ++i) + output[i] = 0; - _info.IsStabilized = _shouldStablize; - _info.Rank = _rank; - _info.WindowSize = _windowSize; + for (i = 0; i < rank; ++i) + { + // Reconstructing the i-th eigen triple component and adding it to output + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); + } } - _maxTrendRatio = ctx.Reader.ReadDouble(); - _host.CheckDecode(_maxTrendRatio >= 0); - - if (isWeightSet) + private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) { - var tempArray = ctx.Reader.ReadFloatArray(); - _host.CheckDecode(Utils.Size(tempArray) == _rank * _windowSize); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - int i = 0; - _wTrans.CopyFrom(tempArray, ref i); - tempArray = ctx.Reader.ReadFloatArray(); - i = 0; - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _y.CopyFrom(tempArray, ref i); - } + Contracts.Assert(tMat != null); + Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - _buffer = TimeSeriesUtils.DeserializeFixedSizeQueueSingle(ctx.Reader, _host); - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - } + int len = 2 * tMat.WindowSize - 1; + Contracts.Assert(Utils.Size(output) >= len); - private protected override void SaveModel(ModelSaveContext ctx) - { - _host.CheckValue(ctx, nameof(ctx)); - ctx.CheckAtModel(); - ctx.SetVersionInfo(GetVersionInfo()); + Single v; + /*var k = tMat.SeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + var v = new Single[k];*/ - _host.Assert(_windowSize >= 2); - _host.Assert(_seriesLength > _windowSize); - _host.Assert(_trainSize > 2 * _windowSize); - _host.Assert(0 <= _discountFactor && _discountFactor <= 1); - _host.Assert(1 <= _rank && _rank < _windowSize); - _host.Assert(Utils.Size(_alpha) == _windowSize - 1); - _host.Assert(_observationNoiseVariance >= 0); - _host.Assert(_autoregressionNoiseVariance >= 0); - _host.Assert(1 <= _maxRank && _maxRank <= _windowSize - 1); - _host.Assert(_maxTrendRatio >= 0); - - // *** Binary format *** - // int: _seriesLength - // int: _windowSize - // int: _trainSize - // int: _rank - // float: _discountFactor - // RankSelectionMethod: _rankSelectionMethod - // bool: _isWeightSet - // float[]: _alpha - // float[]: _state - // bool: ShouldComputeForecastIntervals - // float: _observationNoiseVariance - // float: _autoregressionNoiseVariance - // float: _observationNoiseMean - // float: _autoregressionNoiseMean - // float: _nextPrediction - // int: _maxRank - // bool: _shouldStablize - // bool: _shouldMaintainInfo - // double: _maxTrendRatio - // float[]: _wTrans (only if _isWeightSet == true) - - ctx.Writer.Write(_seriesLength); - ctx.Writer.Write(_windowSize); - ctx.Writer.Write(_trainSize); - ctx.Writer.Write(_rank); - ctx.Writer.Write(_discountFactor); - ctx.Writer.Write((byte)_rankSelectionMethod); - ctx.Writer.WriteBoolByte(_wTrans != null); - ctx.Writer.WriteSingleArray(_alpha); - ctx.Writer.WriteSingleArray(_state); - ctx.Writer.WriteBoolByte(ShouldComputeForecastIntervals); - ctx.Writer.Write(_observationNoiseVariance); - ctx.Writer.Write(_autoregressionNoiseVariance); - ctx.Writer.Write(_observationNoiseMean); - ctx.Writer.Write(_autoregressionNoiseMean); - ctx.Writer.Write(_nextPrediction); - ctx.Writer.Write(_maxRank); - ctx.Writer.WriteBoolByte(_shouldStablize); - ctx.Writer.WriteBoolByte(_shouldMaintainInfo); - ctx.Writer.Write(_maxTrendRatio); - - if (_wTrans != null) - { - // REVIEW: this may not be the most efficient way for serializing an aligned matrix. - var tempArray = new Single[_rank * _windowSize]; - int iv = 0; - _wTrans.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - tempArray = new float[_rank]; - iv = 0; - _y.CopyTo(tempArray, ref iv); - ctx.Writer.WriteSingleArray(tempArray); - } + Single temp; + int i; + int j; + int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; + int offset2 = tMat.SeriesLength - tMat.WindowSize; - TimeSeriesUtils.SerializeFixedSizeQueue(_buffer, ctx.Writer); - } + for (i = 0; i < len; ++i) + output[i] = 0; - private static void ReconstructSignal(TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) - { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); - Contracts.Assert(Utils.Size(output) >= tMat.SeriesLength); + for (i = 0; i < rank; ++i) + { + // Reconstructing the i-th eigen triple component and adding it to output + v = 0; + for (j = offset1; j < offset1 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; - var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); + for (j = 0; j < tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * i + j]; - var v = new Single[k]; - int i; + temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; - for (i = 0; i < tMat.SeriesLength; ++i) - output[i] = 0; + v = 0; + for (j = offset2; j < offset2 + tMat.WindowSize; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, output, true, tMat.WindowSize * i, 0, 0); - } - } + for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) + output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; - private static void ReconstructSignalTailFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, int rank, Single[] output) - { - Contracts.Assert(tMat != null); - Contracts.Assert(1 <= rank && rank <= tMat.WindowSize); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * rank); + temp += v * singularVectors[tMat.WindowSize * i]; + output[tMat.WindowSize - 1] += (temp / 2); + } + } - int len = 2 * tMat.WindowSize - 1; - Contracts.Assert(Utils.Size(output) >= len); + private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, + out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) + { + Contracts.Assert(Utils.Size(alpha) > 0); + Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. + Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); - Single v; - /*var k = tMat.SeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - var v = new Single[k];*/ + var signalLength = Utils.Size(signal); + var windowSize = Utils.Size(alpha) + 1; + var k = signalLength - windowSize + 1; + Contracts.Assert(k > 0); - Single temp; - int i; - int j; - int offset1 = tMat.SeriesLength - 2 * tMat.WindowSize + 1; - int offset2 = tMat.SeriesLength - tMat.WindowSize; + var y = new Single[k]; + int i; - for (i = 0; i < len; ++i) - output[i] = 0; + observationNoiseMean = 0; + observationNoiseVariance = 0; + autoregressionNoiseMean = 0; + autoregressionNoiseVariance = 0; - for (i = 0; i < rank; ++i) - { - // Reconstructing the i-th eigen triple component and adding it to output - v = 0; - for (j = offset1; j < offset1 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset1 + j]; + // Computing the observation noise moments + for (i = 0; i < signalLength; ++i) + observationNoiseMean += (series[i + startIndex] - signal[i]); + observationNoiseMean /= signalLength; - for (j = 0; j < tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * i + j]; + for (i = 0; i < signalLength; ++i) + observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); + observationNoiseVariance /= signalLength; + observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); - temp = v * singularVectors[tMat.WindowSize * (i + 1) - 1]; + // Computing the auto-regression noise moments + TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); + xTM.MultiplyTranspose(alpha, y); - v = 0; - for (j = offset2; j < offset2 + tMat.WindowSize; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset2 + j]; + for (i = 0; i < k; ++i) + autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); + autoregressionNoiseMean /= k; - for (j = tMat.WindowSize; j < 2 * tMat.WindowSize - 1; ++j) - output[j] += v * singularVectors[tMat.WindowSize * (i - 1) + j + 1]; + for (i = 0; i < k; ++i) + { + autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * + (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); + } - temp += v * singularVectors[tMat.WindowSize * i]; - output[tMat.WindowSize - 1] += (temp / 2); + autoregressionNoiseVariance /= (k - windowSize + 1); + Contracts.Assert(autoregressionNoiseVariance >= 0); } - } - private static void ComputeNoiseMoments(Single[] series, Single[] signal, Single[] alpha, out Single observationNoiseVariance, out Single autoregressionNoiseVariance, - out Single observationNoiseMean, out Single autoregressionNoiseMean, int startIndex = 0) - { - Contracts.Assert(Utils.Size(alpha) > 0); - Contracts.Assert(Utils.Size(signal) > 2 * Utils.Size(alpha)); // To assure that the autoregression noise variance is unbiased. - Contracts.Assert(Utils.Size(series) >= Utils.Size(signal) + startIndex); - - var signalLength = Utils.Size(signal); - var windowSize = Utils.Size(alpha) + 1; - var k = signalLength - windowSize + 1; - Contracts.Assert(k > 0); - - var y = new Single[k]; - int i; - - observationNoiseMean = 0; - observationNoiseVariance = 0; - autoregressionNoiseMean = 0; - autoregressionNoiseVariance = 0; - - // Computing the observation noise moments - for (i = 0; i < signalLength; ++i) - observationNoiseMean += (series[i + startIndex] - signal[i]); - observationNoiseMean /= signalLength; - - for (i = 0; i < signalLength; ++i) - observationNoiseVariance += (series[i + startIndex] - signal[i]) * (series[i + startIndex] - signal[i]); - observationNoiseVariance /= signalLength; - observationNoiseVariance -= (observationNoiseMean * observationNoiseMean); - - // Computing the auto-regression noise moments - TrajectoryMatrix xTM = new TrajectoryMatrix(null, signal, windowSize - 1, signalLength - 1); - xTM.MultiplyTranspose(alpha, y); - - for (i = 0; i < k; ++i) - autoregressionNoiseMean += (signal[windowSize - 1 + i] - y[i]); - autoregressionNoiseMean /= k; - - for (i = 0; i < k; ++i) + private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, + Single[] outputSignal, int maxRank) { - autoregressionNoiseVariance += (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean) * - (signal[windowSize - 1 + i] - y[i] - autoregressionNoiseMean); - } - - autoregressionNoiseVariance /= (k - windowSize + 1); - Contracts.Assert(autoregressionNoiseVariance >= 0); - } + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[inputSeriesLength]; + var y = new Single[k]; + var alpha = new Single[tMat.WindowSize - 1]; + var v = new Single[k]; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); + + int i; + int j; + int n; + Single temp; + Double error; + Double sumSingularVals = 0; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < tMat.WindowSize; ++i) + sumSingularVals += singularValues[i]; + + for (i = 0; i < maxRank; ++i) + { + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - private static int DetermineSignalRank(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, - Single[] outputSignal, int maxRank) - { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(outputSignal) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[inputSeriesLength]; - var y = new Single[k]; - var alpha = new Single[tMat.WindowSize - 1]; - var v = new Single[k]; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - TrajectoryMatrix xTM = new TrajectoryMatrix(null, x, tMat.WindowSize - 1, inputSeriesLength - 1); - - int i; - int j; - int n; - Single temp; - Double error; - Double sumSingularVals = 0; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < tMat.WindowSize; ++i) - sumSingularVals += singularValues[i]; - - for (i = 0; i < maxRank; ++i) - { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; + // Updating nu + nu += lambda * lambda; - // Updating nu - nu += lambda * lambda; + // Reconstructing the i-th eigen triple component and adding it to x + tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); + tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); - // Reconstructing the i-th eigen triple component and adding it to x - tMat.MultiplyTranspose(singularVectors, v, false, tMat.WindowSize * i, 0); - tMat.RankOneHankelization(singularVectors, v, 1, x, true, tMat.WindowSize * i, 0, 0); + observationNoiseMean = 0; + for (j = 0; j < inputSeriesLength; ++j) + observationNoiseMean += (series[j] - x[j]); + observationNoiseMean /= inputSeriesLength; - observationNoiseMean = 0; - for (j = 0; j < inputSeriesLength; ++j) - observationNoiseMean += (series[j] - x[j]); - observationNoiseMean /= inputSeriesLength; + for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) + window.AddLast(x[j]); - for (j = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; j < inputSeriesLength - evaluationLength; ++j) - window.AddLast(x[j]); + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) + { + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; + } - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; + if (error < minErr) + { + minErr = error; + minIndex = i; + Array.Copy(x, outputSignal, inputSeriesLength); + } } - if (error < minErr) - { - minErr = error; - minIndex = i; - Array.Copy(x, outputSignal, inputSeriesLength); - } + return minIndex + 1; } - return minIndex + 1; - } - - internal override void InitState() - { - for (int i = 0; i < _windowSize - 2; ++i) - _state[i] = 0; + internal override void InitState() + { + for (int i = 0; i < _windowSize - 2; ++i) + _state[i] = 0; - _buffer.Clear(); - } + _buffer.Clear(); + } - private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) - { - Contracts.Assert(tMat != null); - Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); - Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); - Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); - Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); - - var inputSeriesLength = tMat.SeriesLength; - var k = inputSeriesLength - tMat.WindowSize + 1; - Contracts.Assert(k > 0); - - var x = new Single[tMat.WindowSize - 1]; - var alpha = new Single[tMat.WindowSize - 1]; - Single v; - - Single nu = 0; - Double minErr = Double.MaxValue; - int minIndex = maxRank - 1; - int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); - - int i; - int j; - int n; - int offset; - Single temp; - Double error; - Single lambda; - Single observationNoiseMean; - - FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); - - for (i = 0; i < maxRank; ++i) + private static int DetermineSignalRankFast(Single[] series, TrajectoryMatrix tMat, Single[] singularVectors, Single[] singularValues, int maxRank) { - // Updating the auto-regressive coefficients - lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; + Contracts.Assert(tMat != null); + Contracts.Assert(Utils.Size(series) >= tMat.SeriesLength); + Contracts.Assert(Utils.Size(singularVectors) >= tMat.WindowSize * tMat.WindowSize); + Contracts.Assert(Utils.Size(singularValues) >= tMat.WindowSize); + Contracts.Assert(1 <= maxRank && maxRank <= tMat.WindowSize - 1); + + var inputSeriesLength = tMat.SeriesLength; + var k = inputSeriesLength - tMat.WindowSize + 1; + Contracts.Assert(k > 0); + + var x = new Single[tMat.WindowSize - 1]; + var alpha = new Single[tMat.WindowSize - 1]; + Single v; + + Single nu = 0; + Double minErr = Double.MaxValue; + int minIndex = maxRank - 1; + int evaluationLength = Math.Min(Math.Max(tMat.WindowSize, 200), k); + + int i; + int j; + int n; + int offset; + Single temp; + Double error; + Single lambda; + Single observationNoiseMean; + + FixedSizeQueue window = new FixedSizeQueue(tMat.WindowSize - 1); + + for (i = 0; i < maxRank; ++i) + { + // Updating the auto-regressive coefficients + lambda = singularVectors[tMat.WindowSize * i + tMat.WindowSize - 1]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + alpha[j] += lambda * singularVectors[tMat.WindowSize * i + j]; - // Updating nu - nu += lambda * lambda; + // Updating nu + nu += lambda * lambda; - // Reconstructing the i-th eigen triple component and adding it to x - v = 0; - offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; + // Reconstructing the i-th eigen triple component and adding it to x + v = 0; + offset = inputSeriesLength - evaluationLength - tMat.WindowSize + 1; - for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) - v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; + for (j = offset; j <= inputSeriesLength - evaluationLength; ++j) + v += series[j] * singularVectors[tMat.WindowSize * i - offset + j]; - for (j = 0; j < tMat.WindowSize - 1; ++j) - x[j] += v * singularVectors[tMat.WindowSize * i + j]; + for (j = 0; j < tMat.WindowSize - 1; ++j) + x[j] += v * singularVectors[tMat.WindowSize * i + j]; - // Computing the empirical observation noise mean - observationNoiseMean = 0; - for (j = offset; j < inputSeriesLength - evaluationLength; ++j) - observationNoiseMean += (series[j] - x[j - offset]); - observationNoiseMean /= (tMat.WindowSize - 1); + // Computing the empirical observation noise mean + observationNoiseMean = 0; + for (j = offset; j < inputSeriesLength - evaluationLength; ++j) + observationNoiseMean += (series[j] - x[j - offset]); + observationNoiseMean /= (tMat.WindowSize - 1); - for (j = 0; j < tMat.WindowSize - 1; ++j) - window.AddLast(x[j]); + for (j = 0; j < tMat.WindowSize - 1; ++j) + window.AddLast(x[j]); - error = 0; - for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) - { - temp = 0; - for (n = 0; n < tMat.WindowSize - 1; ++n) - temp += alpha[n] * window[n]; - - temp /= (1 - nu); - temp += observationNoiseMean; - window.AddLast(temp); - error += Math.Abs(series[j] - temp); - if (error > minErr) - break; + error = 0; + for (j = inputSeriesLength - evaluationLength; j < inputSeriesLength; ++j) + { + temp = 0; + for (n = 0; n < tMat.WindowSize - 1; ++n) + temp += alpha[n] * window[n]; + + temp /= (1 - nu); + temp += observationNoiseMean; + window.AddLast(temp); + error += Math.Abs(series[j] - temp); + if (error > minErr) + break; + } + + if (error < minErr) + { + minErr = error; + minIndex = i; + } } - if (error < minErr) + return minIndex + 1; + } + + private class SignalComponent + { + public Double Phase; + public int Index; + public int Cluster; + + public SignalComponent(Double phase, int index) { - minErr = error; - minIndex = i; + Phase = phase; + Index = index; } } - return minIndex + 1; - } + private bool Stabilize() + { + if (Utils.Size(_alpha) == 1) + { + if (_shouldMaintainInfo) + _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; - private class SignalComponent - { - public Double Phase; - public int Index; - public int Cluster; + if (_alpha[0] > 1) + _alpha[0] = 1; + else if (_alpha[0] < -1) + _alpha[0] = -1; - public SignalComponent(Double phase, int index) - { - Phase = phase; - Index = index; - } - } + if (_shouldMaintainInfo) + { + _info.IsStabilized = true; + _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; + _info.IsExponentialTrendPresent = false; + _info.IsPolynomialTrendPresent = false; + _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); + } + return true; + } - private bool Stabilize() - { - if (Utils.Size(_alpha) == 1) - { - if (_shouldMaintainInfo) - _info.RootsBeforeStabilization = new[] { new Complex(_alpha[0], 0) }; + var coeff = new Double[_windowSize - 1]; + Complex[] roots = null; + bool trendFound = false; + bool polynomialTrendFound = false; + Double maxTrendMagnitude = Double.NegativeInfinity; + Double maxNonTrendMagnitude = Double.NegativeInfinity; + Double eps = 1e-9; + Double highFrequenceyBoundry = Math.PI / 2; + var sortedComponents = new List(); + var trendComponents = new List(); + int i; + + // Computing the roots of the characteristic polynomial + for (i = 0; i < _windowSize - 1; ++i) + coeff[i] = -_alpha[i]; - if (_alpha[0] > 1) - _alpha[0] = 1; - else if (_alpha[0] < -1) - _alpha[0] = -1; + if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) + return false; if (_shouldMaintainInfo) { - _info.IsStabilized = true; - _info.RootsAfterStabilization = new[] { new Complex(_alpha[0], 0) }; - _info.IsExponentialTrendPresent = false; - _info.IsPolynomialTrendPresent = false; - _info.ExponentialTrendFactor = Math.Abs(_alpha[0]); + _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; + Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); } - return true; - } - - var coeff = new Double[_windowSize - 1]; - Complex[] roots = null; - bool trendFound = false; - bool polynomialTrendFound = false; - Double maxTrendMagnitude = Double.NegativeInfinity; - Double maxNonTrendMagnitude = Double.NegativeInfinity; - Double eps = 1e-9; - Double highFrequenceyBoundry = Math.PI / 2; - var sortedComponents = new List(); - var trendComponents = new List(); - int i; - - // Computing the roots of the characteristic polynomial - for (i = 0; i < _windowSize - 1; ++i) - coeff[i] = -_alpha[i]; - - if (!PolynomialUtils.FindPolynomialRoots(coeff, ref roots)) - return false; - - if (_shouldMaintainInfo) - { - _info.RootsBeforeStabilization = new Complex[_windowSize - 1]; - Array.Copy(roots, _info.RootsBeforeStabilization, _windowSize - 1); - } - // Detecting trend components - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) - trendComponents.Add(i); - } + // Detecting trend components + for (i = 0; i < _windowSize - 1; ++i) + { + if (roots[i].Magnitude > 1 && (Math.Abs(roots[i].Phase) <= eps || Math.Abs(Math.Abs(roots[i].Phase) - Math.PI) <= eps)) + trendComponents.Add(i); + } - // Removing unobserved seasonalities and consequently introducing polynomial trend - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Phase != 0) + // Removing unobserved seasonalities and consequently introducing polynomial trend + for (i = 0; i < _windowSize - 1; ++i) { - if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) + if (roots[i].Phase != 0) { - /*if (roots[i].Real > 1) + if (roots[i].Magnitude >= 1 && 2 * Math.PI / Math.Abs(roots[i].Phase) > _windowSize) { + /*if (roots[i].Real > 1) + { + polynomialTrendFound = true; + roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); + maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); + } + else + roots[i] = Complex.FromPolarCoordinates(1, 0); + //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ + + /* if (_maxTrendRatio > 1) + { + roots[i] = new Complex(roots[i].Real, 0); + polynomialTrendFound = true; + } + else + roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ + + roots[i] = new Complex(roots[i].Real, 0); polynomialTrendFound = true; - roots[i] = new Complex(Math.Max(1, roots[i].Magnitude), 0); - maxPolynomialTrendMagnitude = Math.Max(maxPolynomialTrendMagnitude, roots[i].Magnitude); + + if (_shouldMaintainInfo) + _info.IsArtificialSeasonalityRemoved = true; } - else - roots[i] = Complex.FromPolarCoordinates(1, 0); - //roots[i] = Complex.FromPolarCoordinates(0.99, roots[i].Phase);*/ - - /* if (_maxTrendRatio > 1) - { - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - } - else - roots[i] = roots[i].Imaginary > 0 ? new Complex(roots[i].Real, 0) : new Complex(1, 0);*/ - - roots[i] = new Complex(roots[i].Real, 0); - polynomialTrendFound = true; - - if (_shouldMaintainInfo) - _info.IsArtificialSeasonalityRemoved = true; + else if (roots[i].Magnitude > 1) + sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); } - else if (roots[i].Magnitude > 1) - sortedComponents.Add(new SignalComponent(roots[i].Phase, i)); } - } - if (_maxTrendRatio > 1) - { - // Combining the close exponential-seasonal components - if (sortedComponents.Count > 1 && polynomialTrendFound) + if (_maxTrendRatio > 1) { - sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); - var clusterNum = 0; - - for (i = 0; i < sortedComponents.Count - 1; ++i) + // Combining the close exponential-seasonal components + if (sortedComponents.Count > 1 && polynomialTrendFound) { - if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || - (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) + sortedComponents.Sort((a, b) => a.Phase.CompareTo(b.Phase)); + var clusterNum = 0; + + for (i = 0; i < sortedComponents.Count - 1; ++i) { - sortedComponents[i].Cluster = clusterNum; - if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) + if ((sortedComponents[i].Phase < 0 && sortedComponents[i + 1].Phase < 0) || + (sortedComponents[i].Phase > 0 && sortedComponents[i + 1].Phase > 0)) + { + sortedComponents[i].Cluster = clusterNum; + if (Math.Abs(sortedComponents[i + 1].Phase - sortedComponents[i].Phase) > 0.05) + clusterNum++; + sortedComponents[i + 1].Cluster = clusterNum; + } + else clusterNum++; - sortedComponents[i + 1].Cluster = clusterNum; } - else - clusterNum++; - } - int start = 0; - bool polynomialSeasonalityFound = false; - Double largestSeasonalityPhase = 0; - for (i = 1; i < sortedComponents.Count; ++i) - { - if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) + int start = 0; + bool polynomialSeasonalityFound = false; + Double largestSeasonalityPhase = 0; + for (i = 1; i < sortedComponents.Count; ++i) { - if (i - start > 1) // There are more than one point in the cluster + if (sortedComponents[i].Cluster != sortedComponents[i - 1].Cluster) { - Double avgPhase = 0; - Double avgMagnitude = 0; - - for (var j = start; j < i; ++j) + if (i - start > 1) // There are more than one point in the cluster { - avgPhase += sortedComponents[j].Phase; - avgMagnitude += roots[sortedComponents[j].Index].Magnitude; + Double avgPhase = 0; + Double avgMagnitude = 0; + + for (var j = start; j < i; ++j) + { + avgPhase += sortedComponents[j].Phase; + avgMagnitude += roots[sortedComponents[j].Index].Magnitude; + } + avgPhase /= (i - start); + avgMagnitude /= (i - start); + + for (var j = start; j < i; ++j) + roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, + avgPhase); + + if (!polynomialSeasonalityFound && avgPhase > 0) + { + largestSeasonalityPhase = avgPhase; + polynomialSeasonalityFound = true; + } } - avgPhase /= (i - start); - avgMagnitude /= (i - start); - - for (var j = start; j < i; ++j) - roots[sortedComponents[j].Index] = Complex.FromPolarCoordinates(avgMagnitude, - avgPhase); - if (!polynomialSeasonalityFound && avgPhase > 0) - { - largestSeasonalityPhase = avgPhase; - polynomialSeasonalityFound = true; - } + start = i; } - - start = i; } } - } - - // Combining multiple exponential trends into polynomial ones - if (!polynomialTrendFound) - { - var ind1 = -1; - var ind2 = -1; - foreach (var ind in trendComponents) + // Combining multiple exponential trends into polynomial ones + if (!polynomialTrendFound) { - if (Math.Abs(roots[ind].Phase) <= eps) + var ind1 = -1; + var ind2 = -1; + + foreach (var ind in trendComponents) { - ind1 = ind; - break; + if (Math.Abs(roots[ind].Phase) <= eps) + { + ind1 = ind; + break; + } + } + + for (i = 0; i < _windowSize - 1; ++i) + { + if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) + { + ind2 = i; + break; + } + } + + if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) + { + roots[ind1] = Complex.FromPolarCoordinates(1, 0); + roots[ind2] = Complex.FromPolarCoordinates(1, 0); + polynomialTrendFound = true; } } + } + if (polynomialTrendFound) // Suppress the exponential trend + { + maxTrendMagnitude = Math.Min(1, _maxTrendRatio); + foreach (var ind in trendComponents) + roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); + } + else + { + // Spotting the exponential trend components and finding the max trend magnitude for (i = 0; i < _windowSize - 1; ++i) { - if (Math.Abs(roots[i].Phase) <= eps && 0.9 <= roots[i].Magnitude && i != ind1) + if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) { - ind2 = i; - break; + trendFound = true; + maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); } + else + maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - if (ind1 >= 0 && ind2 >= 0 && ind1 != ind2) - { - roots[ind1] = Complex.FromPolarCoordinates(1, 0); - roots[ind2] = Complex.FromPolarCoordinates(1, 0); - polynomialTrendFound = true; - } + if (!trendFound) + maxTrendMagnitude = 1; + + maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); } - } - if (polynomialTrendFound) // Suppress the exponential trend - { - maxTrendMagnitude = Math.Min(1, _maxTrendRatio); - foreach (var ind in trendComponents) - roots[ind] = Complex.FromPolarCoordinates(0.99, roots[ind].Phase); - } - else - { - // Spotting the exponential trend components and finding the max trend magnitude + // Squeezing all components below the maximum trend magnitude + var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); for (i = 0; i < _windowSize - 1; ++i) { - if (roots[i].Magnitude > 1 && Math.Abs(roots[i].Phase) <= eps) + if (roots[i].Magnitude >= maxTrendMagnitude) { - trendFound = true; - maxTrendMagnitude = Math.Max(maxTrendMagnitude, roots[i].Magnitude); + if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || + (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) + roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); + else + roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); } - else - maxNonTrendMagnitude = Math.Max(maxNonTrendMagnitude, roots[i].Magnitude); } - if (!trendFound) - maxTrendMagnitude = 1; - - maxTrendMagnitude = Math.Min(maxTrendMagnitude, _maxTrendRatio); - } + // Correcting all the other trend components + for (i = 0; i < _windowSize - 1; ++i) + { + var phase = roots[i].Phase; + if (Math.Abs(phase) <= eps) + roots[i] = new Complex(roots[i].Magnitude, 0); + else if (Math.Abs(phase - Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); + else if (Math.Abs(phase + Math.PI) <= eps) + roots[i] = new Complex(-roots[i].Magnitude, 0); + } - // Squeezing all components below the maximum trend magnitude - var smallTrendMagnitude = Math.Min(maxTrendMagnitude, (maxTrendMagnitude + 1) / 2); - for (i = 0; i < _windowSize - 1; ++i) - { - if (roots[i].Magnitude >= maxTrendMagnitude) + // Computing the characteristic polynomial from the modified roots + try { - if ((highFrequenceyBoundry < roots[i].Phase && roots[i].Phase < Math.PI - eps) || - (-Math.PI + eps < roots[i].Phase && roots[i].Phase < -highFrequenceyBoundry)) - roots[i] = Complex.FromPolarCoordinates(smallTrendMagnitude, roots[i].Phase); - else - roots[i] = Complex.FromPolarCoordinates(maxTrendMagnitude, roots[i].Phase); + if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) + return false; + } + catch (OverflowException) + { + return false; } - } - // Correcting all the other trend components - for (i = 0; i < _windowSize - 1; ++i) - { - var phase = roots[i].Phase; - if (Math.Abs(phase) <= eps) - roots[i] = new Complex(roots[i].Magnitude, 0); - else if (Math.Abs(phase - Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); - else if (Math.Abs(phase + Math.PI) <= eps) - roots[i] = new Complex(-roots[i].Magnitude, 0); - } + // Updating alpha + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = (Single)(-coeff[i]); - // Computing the characteristic polynomial from the modified roots - try - { - if (!PolynomialUtils.FindPolynomialCoefficients(roots, ref coeff)) - return false; - } - catch (OverflowException) - { - return false; - } + if (_shouldMaintainInfo) + { + _info.RootsAfterStabilization = roots; + _info.IsStabilized = true; + _info.IsPolynomialTrendPresent = polynomialTrendFound; + _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; + _info.ExponentialTrendFactor = maxTrendMagnitude; + } - // Updating alpha - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = (Single)(-coeff[i]); + return true; + } - if (_shouldMaintainInfo) + /// + /// Feeds the next observation on the series to the model and as a result changes the state of the model. + /// + /// The next observation on the series. + /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). + internal override void Consume(ref Single input, bool updateModel = false) { - _info.RootsAfterStabilization = roots; - _info.IsStabilized = true; - _info.IsPolynomialTrendPresent = polynomialTrendFound; - _info.IsExponentialTrendPresent = maxTrendMagnitude > 1; - _info.ExponentialTrendFactor = maxTrendMagnitude; - } + if (Single.IsNaN(input)) + return; - return true; - } + int i; - /// - /// Feeds the next observation on the series to the model and as a result changes the state of the model. - /// - /// The next observation on the series. - /// Determines whether the model parameters also need to be updated upon consuming the new observation (default = false). - internal override void Consume(ref Single input, bool updateModel = false) - { - if (Single.IsNaN(input)) - return; + if (_wTrans == null) + { + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + Single[] vecs = new Single[_rank * _windowSize]; - int i; + for (i = 0; i < _rank; ++i) + vecs[(_windowSize + 1) * i] = 1; - if (_wTrans == null) - { - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - Single[] vecs = new Single[_rank * _windowSize]; + i = 0; + _wTrans.CopyFrom(vecs, ref i); + } - for (i = 0; i < _rank; ++i) - vecs[(_windowSize + 1) * i] = 1; + // Forming vector x - i = 0; - _wTrans.CopyFrom(vecs, ref i); - } + if (_buffer.Count == 0) + { + for (i = 0; i < _windowSize - 1; ++i) + _buffer.AddLast(_state[i]); + } - // Forming vector x + int len = _buffer.Count; + for (i = 0; i < _windowSize - len - 1; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) + _x[i - len + _windowSize - 1] = _buffer[i]; + _x[_windowSize - 1] = input; - if (_buffer.Count == 0) - { - for (i = 0; i < _windowSize - 1; ++i) - _buffer.AddLast(_state[i]); - } + // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); - int len = _buffer.Count; - for (i = 0; i < _windowSize - len - 1; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize + 1); i < len; ++i) - _x[i - len + _windowSize - 1] = _buffer[i]; - _x[_windowSize - 1] = input; + // Updating the state vector + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - // Computing y: Eq. (11) in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + for (i = 0; i < _windowSize - 2; ++i) + { + _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); + _nextPrediction += _state[i] * _alpha[i]; + } + _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; + _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; - // Updating the state vector - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + if (updateModel) + { + // REVIEW: to be implemented in the next version based on the FAPI algorithm + // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. + } - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; - for (i = 0; i < _windowSize - 2; ++i) - { - _state[i] = ((_windowSize - 2 - i) * _state[i + 1] + _xSmooth[i + 1]) / (_windowSize - 1 - i); - _nextPrediction += _state[i] * _alpha[i]; + _buffer.AddLast(input); } - _state[_windowSize - 2] = _xSmooth[_windowSize - 1]; - _nextPrediction += _state[_windowSize - 2] * _alpha[_windowSize - 2]; - if (updateModel) + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(FixedSizeQueue data) { - // REVIEW: to be implemented in the next version based on the FAPI algorithm - // in https://hal-institut-mines-telecom.archives-ouvertes.fr/hal-00479772/file/twocolumns.pdf. - } + _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); + _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); - _buffer.AddLast(input); - } + Single[] dataArray = new Single[_trainSize]; - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(FixedSizeQueue data) - { - _host.CheckParam(data != null, nameof(data), "The input series for training cannot be null."); - _host.CheckParam(data.Count >= _trainSize, nameof(data), "The input series for training does not have enough points for training."); - - Single[] dataArray = new Single[_trainSize]; + int i; + int count; + for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) + if (!Single.IsNaN(data[i])) + dataArray[count++] = data[i]; - int i; - int count; - for (i = 0, count = 0; count < _trainSize && i < data.Count; ++i) - if (!Single.IsNaN(data[i])) - dataArray[count++] = data[i]; - - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } - if (count <= 2 * _windowSize) - { + if (count <= 2 * _windowSize) + { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning( - "Training cannot be completed because the input series for training does not have enough points."); + using (var ch = _host.Start("Train")) + ch.Warning( + "Training cannot be completed because the input series for training does not have enough points."); #endif - } - else - { - if (count != _trainSize) - Array.Resize(ref dataArray, count); + } + else + { + if (count != _trainSize) + Array.Resize(ref dataArray, count); - TrainCore(dataArray, count); + TrainCore(dataArray, count); + } } - } #if !TLCSSA - /// - /// Train the model parameters based on a training series. - /// - /// The training time-series. - internal override void Train(RoleMappedData data) - { - _host.CheckValue(data, nameof(data)); - _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); - var featureCol = data.Schema.Feature.Value; - if (featureCol.Type != NumberDataViewType.Single) - throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); + /// + /// Train the model parameters based on a training series. + /// + /// The training time-series. + internal override void Train(RoleMappedData data) + { + _host.CheckValue(data, nameof(data)); + _host.CheckParam(data.Schema.Feature.HasValue, nameof(data), "Must have features column."); + var featureCol = data.Schema.Feature.Value; + if (featureCol.Type != NumberDataViewType.Single) + throw _host.ExceptSchemaMismatch(nameof(data), "feature", featureCol.Name, "Single", featureCol.Type.ToString()); - Single[] dataArray = new Single[_trainSize]; + Single[] dataArray = new Single[_trainSize]; - int count = 0; - using (var cursor = data.Data.GetRowCursor(featureCol)) - { - var getVal = cursor.GetGetter(featureCol); - Single val = default; - while (cursor.MoveNext() && count < _trainSize) + int count = 0; + using (var cursor = data.Data.GetRowCursor(featureCol)) { - getVal(ref val); - if (!Single.IsNaN(val)) - dataArray[count++] = val; + var getVal = cursor.GetGetter(featureCol); + Single val = default; + while (cursor.MoveNext() && count < _trainSize) + { + getVal(ref val); + if (!Single.IsNaN(val)) + dataArray[count++] = val; + } } - } - if (_shouldMaintainInfo) - { - _info = new ModelInfo(); - _info.WindowSize = _windowSize; - } + if (_shouldMaintainInfo) + { + _info = new ModelInfo(); + _info.WindowSize = _windowSize; + } - if (count <= 2 * _windowSize) - { - using (var ch = _host.Start("Train")) - ch.Warning("Training cannot be completed because the input series for training does not have enough points."); - } - else - { - if (count != _trainSize) - Array.Resize(ref dataArray, count); + if (count <= 2 * _windowSize) + { + using (var ch = _host.Start("Train")) + ch.Warning("Training cannot be completed because the input series for training does not have enough points."); + } + else + { + if (count != _trainSize) + Array.Resize(ref dataArray, count); - TrainCore(dataArray, count); + TrainCore(dataArray, count); + } } - } #endif - private void TrainCore(Single[] dataArray, int originalSeriesLength) - { - _host.Assert(Utils.Size(dataArray) > 0); - Single[] singularVals; - Single[] leftSingularVecs; - var learnNaiveModel = false; - - var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; - var signal = new Single[signalLength]; - - int i; - // Creating the trajectory matrix for the series - TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); - - // Computing the SVD of the trajectory matrix - if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) - learnNaiveModel = true; - else + private void TrainCore(Single[] dataArray, int originalSeriesLength) { - for (i = 0; i < _windowSize * _maxRank; ++i) + _host.Assert(Utils.Size(dataArray) > 0); + Single[] singularVals; + Single[] leftSingularVecs; + var learnNaiveModel = false; + + var signalLength = _rankSelectionMethod == RankSelectionMethod.Exact ? originalSeriesLength : 2 * _windowSize - 1;//originalSeriesLength; + var signal = new Single[signalLength]; + + int i; + // Creating the trajectory matrix for the series + TrajectoryMatrix tMat = new TrajectoryMatrix(_host, dataArray, _windowSize, originalSeriesLength); + + // Computing the SVD of the trajectory matrix + if (!tMat.ComputeSvd(out singularVals, out leftSingularVecs)) + learnNaiveModel = true; + else { - if (Single.IsNaN(leftSingularVecs[i])) + for (i = 0; i < _windowSize * _maxRank; ++i) { - learnNaiveModel = true; - break; + if (Single.IsNaN(leftSingularVecs[i])) + { + learnNaiveModel = true; + break; + } } } - } - // Checking for standard eigenvectors, if found reduce the window size and reset training. - if (!learnNaiveModel) - { - for (i = 0; i < _windowSize; ++i) + // Checking for standard eigenvectors, if found reduce the window size and reset training. + if (!learnNaiveModel) { - var v = leftSingularVecs[(i + 1) * _windowSize - 1]; - if (v * v == 1) + for (i = 0; i < _windowSize; ++i) { - if (_windowSize > 2) + var v = leftSingularVecs[(i + 1) * _windowSize - 1]; + if (v * v == 1) { - _windowSize--; - _maxRank = _windowSize / 2; - _alpha = new Single[_windowSize - 1]; - _state = new Single[_windowSize - 1]; - _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); - - TrainCore(dataArray, originalSeriesLength); - return; - } - else - { - learnNaiveModel = true; - break; + if (_windowSize > 2) + { + _windowSize--; + _maxRank = _windowSize / 2; + _alpha = new Single[_windowSize - 1]; + _state = new Single[_windowSize - 1]; + _x = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + _xSmooth = new CpuAlignedVector(_windowSize, CpuMathUtils.GetVectorAlignment()); + + TrainCore(dataArray, originalSeriesLength); + return; + } + else + { + learnNaiveModel = true; + break; + } } } } - } - // Learn the naive (averaging) model in case the eigen decomposition is not possible - if (learnNaiveModel) - { + // Learn the naive (averaging) model in case the eigen decomposition is not possible + if (learnNaiveModel) + { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The precise SSA model cannot be trained."); + using (var ch = _host.Start("Train")) + ch.Warning("The precise SSA model cannot be trained."); #endif - _rank = 1; - var temp = (Single)(1f / Math.Sqrt(_windowSize)); - for (i = 0; i < _windowSize; ++i) - leftSingularVecs[i] = temp; - } - else - { - // Computing the signal rank - if (_rankSelectionMethod == RankSelectionMethod.Exact) - _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); - else if (_rankSelectionMethod == RankSelectionMethod.Fast) - _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); - } + _rank = 1; + var temp = (Single)(1f / Math.Sqrt(_windowSize)); + for (i = 0; i < _windowSize; ++i) + leftSingularVecs[i] = temp; + } + else + { + // Computing the signal rank + if (_rankSelectionMethod == RankSelectionMethod.Exact) + _rank = DetermineSignalRank(dataArray, tMat, leftSingularVecs, singularVals, signal, _maxRank); + else if (_rankSelectionMethod == RankSelectionMethod.Fast) + _rank = DetermineSignalRankFast(dataArray, tMat, leftSingularVecs, singularVals, _maxRank); + } - // Setting the the y vector - _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); + // Setting the the y vector + _y = new CpuAlignedVector(_rank, CpuMathUtils.GetVectorAlignment()); - // Setting the weight matrix - _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); - i = 0; - _wTrans.CopyFrom(leftSingularVecs, ref i); + // Setting the weight matrix + _wTrans = new CpuAlignedMatrixRow(_rank, _windowSize, CpuMathUtils.GetVectorAlignment()); + i = 0; + _wTrans.CopyFrom(leftSingularVecs, ref i); - // Setting alpha - Single nu = 0; - for (i = 0; i < _rank; ++i) - { - _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; - nu += _y[i] * _y[i]; - } + // Setting alpha + Single nu = 0; + for (i = 0; i < _rank; ++i) + { + _y[i] = leftSingularVecs[_windowSize * (i + 1) - 1]; + nu += _y[i] * _y[i]; + } - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - for (i = 0; i < _windowSize - 1; ++i) - _alpha[i] = _xSmooth[i] / (1 - nu); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + for (i = 0; i < _windowSize - 1; ++i) + _alpha[i] = _xSmooth[i] / (1 - nu); - // Stabilizing the model - if (_shouldStablize && !learnNaiveModel) - { - if (!Stabilize()) + // Stabilizing the model + if (_shouldStablize && !learnNaiveModel) { + if (!Stabilize()) + { #if !TLCSSA - using (var ch = _host.Start("Train")) - ch.Warning("The trained model cannot be stablized."); + using (var ch = _host.Start("Train")) + ch.Warning("The trained model cannot be stablized."); #endif + } + } + + // Computing the noise moments + if (ShouldComputeForecastIntervals) + { + if (_rankSelectionMethod != RankSelectionMethod.Exact) + ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); + + ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, + out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); + _observationNoiseMean = 0; + _autoregressionNoiseMean = 0; + } + + // Setting the state + _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + + if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method + { + int len = _buffer.Count; + for (i = 0; i < _windowSize - len; ++i) + _x[i] = 0; + for (i = Math.Max(0, len - _windowSize); i < len; ++i) + _x[i - len + _windowSize] = _buffer[i]; + } + else // use the training data points otherwise + { + for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) + _x[i - originalSeriesLength + _windowSize] = dataArray[i]; + } + + CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); + CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); + + for (i = 1; i < _windowSize; ++i) + { + _state[i - 1] = _xSmooth[i]; + _nextPrediction += _state[i - 1] * _alpha[i - 1]; + } + + if (_shouldMaintainInfo) + { + _info.IsTrained = true; + _info.WindowSize = _windowSize; + _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; + Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); + _info.Rank = _rank; + _info.IsNaiveModelTrained = learnNaiveModel; + _info.Spectrum = singularVals; } } - // Computing the noise moments - if (ShouldComputeForecastIntervals) + /// + /// Forecasts the future values of the series up to the given horizon. + /// + /// The forecast result. + /// The forecast horizon. + internal override void Forecast(ref ForecastResultBase result, int horizon = 1) { - if (_rankSelectionMethod != RankSelectionMethod.Exact) - ReconstructSignalTailFast(dataArray, tMat, leftSingularVecs, _rank, signal); + _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); + if (result == null) + result = new SsaForecastResult(); - ComputeNoiseMoments(dataArray, signal, _alpha, out _observationNoiseVariance, out _autoregressionNoiseVariance, - out _observationNoiseMean, out _autoregressionNoiseMean, originalSeriesLength - signalLength); - _observationNoiseMean = 0; - _autoregressionNoiseMean = 0; - } + var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); + _host.CheckParam(result is SsaForecastResult, nameof(result), str); - // Setting the state - _nextPrediction = _autoregressionNoiseMean + _observationNoiseMean; + var output = result as SsaForecastResult; - if (_buffer.Count > 0) // Use the buffer to set the state when there are data points pushed into the buffer using the Consume() method - { - int len = _buffer.Count; - for (i = 0; i < _windowSize - len; ++i) - _x[i] = 0; - for (i = Math.Max(0, len - _windowSize); i < len; ++i) - _x[i - len + _windowSize] = _buffer[i]; + var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); + + int i; + int j; + int k; + + // Computing the point forecasts + resEditor.Values[0] = _nextPrediction; + for (i = 1; i < horizon; ++i) + { + k = 0; + resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; + for (j = i; j < _windowSize - 1; ++j, ++k) + resEditor.Values[i] += _state[j] * _alpha[k]; + + for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) + resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; + } + + // Computing the forecast variances + if (ShouldComputeForecastIntervals) + { + var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); + var lastCol = new FixedSizeQueue(_windowSize - 1); + + for (i = 0; i < _windowSize - 3; ++i) + lastCol.AddLast(0); + lastCol.AddLast(1); + lastCol.AddLast(_alpha[_windowSize - 2]); + sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; + + for (i = 1; i < horizon; ++i) + { + Single temp = 0; + for (j = 0; j < _windowSize - 1; ++j) + temp += _alpha[j] * lastCol[j]; + lastCol.AddLast(temp); + + sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; + } + + for (i = 0; i < horizon; ++i) + sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); + + output.ForecastStandardDeviation = sdEditor.Commit(); + } + + result.PointForecast = resEditor.Commit(); + output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; + output.BoundOffset = 0; } - else // use the training data points otherwise + + /// + /// Predicts the next value on the series. + /// + /// The prediction result. + internal override void PredictNext(ref Single output) { - for (i = originalSeriesLength - _windowSize; i < originalSeriesLength; ++i) - _x[i - originalSeriesLength + _windowSize] = dataArray[i]; + output = _nextPrediction; } - CpuAligenedMathUtils.MatTimesSrc(_wTrans, _x, _y); - CpuAligenedMathUtils.MatTranTimesSrc(_wTrans, _y, _xSmooth); - - for (i = 1; i < _windowSize; ++i) + internal override SequenceModelerBase Clone() { - _state[i - 1] = _xSmooth[i]; - _nextPrediction += _state[i - 1] * _alpha[i - 1]; + return new AdaptiveSingularSpectrumSequenceModelerInternal(this); } - if (_shouldMaintainInfo) + /// + /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. + /// + /// The input forecast object + /// The confidence level in [0, 1) + internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) { - _info.IsTrained = true; - _info.WindowSize = _windowSize; - _info.AutoRegressiveCoefficients = new Single[_windowSize - 1]; - Array.Copy(_alpha, _info.AutoRegressiveCoefficients, _windowSize - 1); - _info.Rank = _rank; - _info.IsNaiveModelTrained = learnNaiveModel; - _info.Spectrum = singularVals; - } - } + Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); + Contracts.CheckValue(forecast, nameof(forecast)); + Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); - /// - /// Forecasts the future values of the series up to the given horizon. - /// - /// The forecast result. - /// The forecast horizon. - internal override void Forecast(ref ForecastResultBase result, int horizon = 1) - { - _host.CheckParam(horizon >= 1, nameof(horizon), "The horizon parameter should be greater than 0."); - if (result == null) - result = new SsaForecastResult(); + var meanForecast = forecast.PointForecast.GetValues(); + var horizon = meanForecast.Length; + var sdForecast = forecast.ForecastStandardDeviation.GetValues(); + Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); - var str = "The result argument must be of type " + typeof(SsaForecastResult).ToString(); - _host.CheckParam(result is SsaForecastResult, nameof(result), str); + forecast.ConfidenceLevel = confidenceLevel; + if (horizon == 0) + return; - var output = result as SsaForecastResult; + var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); + var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); - var resEditor = VBufferEditor.Create(ref result.PointForecast, horizon); + var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); + double temp; - int i; - int j; - int k; + for (int i = 0; i < horizon; ++i) + { + temp = z * sdForecast[i]; + upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); + lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); + } - // Computing the point forecasts - resEditor.Values[0] = _nextPrediction; - for (i = 1; i < horizon; ++i) + forecast.UpperBound = upper.Commit(); + forecast.LowerBound = lower.Commit(); + } + + public void Train(IDataView dataView, string inputColumnName) => Train(new RoleMappedData(dataView, null, inputColumnName)); + + public float[] Forecast(int horizon) { - k = 0; - resEditor.Values[i] = _autoregressionNoiseMean + _observationNoiseMean; - for (j = i; j < _windowSize - 1; ++j, ++k) - resEditor.Values[i] += _state[j] * _alpha[k]; + ForecastResultBase result = null; + Forecast(ref result, horizon); + return result.PointForecast.GetValues().ToArray(); + } - for (j = Math.Max(0, i - _windowSize + 1); j < i; ++j, ++k) - resEditor.Values[i] += resEditor.Values[j] * _alpha[k]; + public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) + { + ForecastResultBase result = null; + Forecast(ref result, horizon); + SsaForecastResult ssaResult = (SsaForecastResult)result; + ComputeForecastIntervals(ref ssaResult, confidenceLevel); + forecast = result.PointForecast.GetValues().ToArray(); + confidenceIntervalLowerBounds = ssaResult.LowerBound.GetValues().ToArray(); + confidenceIntervalUpperBounds = ssaResult.UpperBound.GetValues().ToArray(); } - // Computing the forecast variances - if (ShouldComputeForecastIntervals) + public void Update(IDataView dataView, string inputColumnName) { - var sdEditor = VBufferEditor.Create(ref output.ForecastStandardDeviation, horizon); - var lastCol = new FixedSizeQueue(_windowSize - 1); + _host.CheckParam(dataView != null, nameof(dataView), "The input series for updating cannot be null."); - for (i = 0; i < _windowSize - 3; ++i) - lastCol.AddLast(0); - lastCol.AddLast(1); - lastCol.AddLast(_alpha[_windowSize - 2]); - sdEditor.Values[0] = _autoregressionNoiseVariance + _observationNoiseVariance; + var data = new RoleMappedData(dataView, null, inputColumnName); + if (data.Schema.Feature.Value.Type != NumberDataViewType.Single) + throw _host.ExceptUserArg(nameof(data.Schema.Feature.Value.Name), "The time series input column has " + + "type '{0}', but must be a float.", data.Schema.Feature.Value.Type); - for (i = 1; i < horizon; ++i) + var col = data.Schema.Feature.Value; + using (var cursor = data.Data.GetRowCursor(data.Data.Schema)) { - Single temp = 0; - for (j = 0; j < _windowSize - 1; ++j) - temp += _alpha[j] * lastCol[j]; - lastCol.AddLast(temp); - - sdEditor.Values[i] = sdEditor.Values[i - 1] + _autoregressionNoiseVariance * temp * temp; + var getVal = cursor.GetGetter(col); + Single val = default(Single); + while (cursor.MoveNext()) + { + getVal(ref val); + if (!Single.IsNaN(val)) + Consume(ref val); + } } - - for (i = 0; i < horizon; ++i) - sdEditor.Values[i] = (float)Math.Sqrt(sdEditor.Values[i]); - - output.ForecastStandardDeviation = sdEditor.Commit(); } - - result.PointForecast = resEditor.Commit(); - output.CanComputeForecastIntervals = ShouldComputeForecastIntervals; - output.BoundOffset = 0; } /// - /// Predicts the next value on the series. + /// Train a forecasting model from an . /// - /// The prediction result. - internal override void PredictNext(ref Single output) - { - output = _nextPrediction; - } - - internal override SequenceModelerBase Clone() - { - return new AdaptiveSingularSpectrumSequenceModeler(this); - } + /// Reference to the + public void Train(IDataView dataView) => _modeler.Train(dataView, _inputColumnName); /// - /// Computes the forecast intervals for the input forecast object at the given confidence level. The results are stored in the forecast object. + /// Update a forecasting model with the new observations in the form of an . /// - /// The input forecast object - /// The confidence level in [0, 1) - internal static void ComputeForecastIntervals(ref SsaForecastResult forecast, Single confidenceLevel = 0.95f) - { - Contracts.CheckParam(0 <= confidenceLevel && confidenceLevel < 1, nameof(confidenceLevel), "The confidence level must be in [0, 1)."); - Contracts.CheckValue(forecast, nameof(forecast)); - Contracts.Check(forecast.CanComputeForecastIntervals, "The forecast intervals cannot be computed for this forecast object."); + /// Reference to the observations as an + /// Name of the input column to update from. If null then input column name specified at model initiation is taken as default. + public void Update(IDataView dataView, string inputColumnName = null) => _modeler.Update(dataView, inputColumnName ?? _inputColumnName); - var meanForecast = forecast.PointForecast.GetValues(); - var horizon = meanForecast.Length; - var sdForecast = forecast.ForecastStandardDeviation.GetValues(); - Contracts.Check(sdForecast.Length >= horizon, "The forecast standard deviation values are not available."); - - forecast.ConfidenceLevel = confidenceLevel; - if (horizon == 0) - return; - - var upper = VBufferEditor.Create(ref forecast.UpperBound, horizon); - var lower = VBufferEditor.Create(ref forecast.LowerBound, horizon); - - var z = ProbabilityFunctions.Probit(0.5 + confidenceLevel / 2.0); - double temp; - - for (int i = 0; i < horizon; ++i) - { - temp = z * sdForecast[i]; - upper.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset + temp); - lower.Values[i] = (Single)(meanForecast[i] + forecast.BoundOffset - temp); - } + /// + /// Perform forecasting until a particular . + /// + /// Number of values to forecast. + /// Forecasted values. + public float[] Forecast(int horizon) => _modeler.Forecast(horizon); - forecast.UpperBound = upper.Commit(); - forecast.LowerBound = lower.Commit(); + /// + /// For saving a model into a repository. + /// + public void Save(ModelSaveContext ctx) + { + _host.CheckValue(ctx, nameof(ctx)); + ctx.CheckAtModel(); + ctx.SetVersionInfo(GetVersionInfo()); + ctx.Writer.Write(_inputColumnName); + ctx.SaveModel(_modeler, "ForecastWrapper"); } + + /// + /// Perform forecasting until a particular and also computes confidence intervals. + /// For confidence intervals to be computed the model must be trained with + /// set to true. + /// + /// Number of values to forecast. + /// Forecasted values + /// Lower bound confidence intervals of forecasted values. + /// Upper bound confidence intervals of forecasted values. + /// Forecast confidence level. + public void ForecastWithConfidenceIntervals(int horizon, out float[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f) => + _modeler.ForecastWithConfidenceIntervals(horizon, out forecast, out confidenceIntervalLowerBounds, out confidenceIntervalUpperBounds, confidenceLevel); } } diff --git a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs index f5261db8cc..e36633d7b5 100644 --- a/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs +++ b/src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs @@ -2,8 +2,10 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System; using Microsoft.ML.Data; using Microsoft.ML.Transforms.TimeSeries; +using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; namespace Microsoft.ML { @@ -35,7 +37,7 @@ public static IidChangePointEstimator DetectIidChangePoint(this TransformsCatalo /// /// Create , which predicts spikes in - /// independent identically distributed (i.i.d.) + /// independent identically distributed (i.i.d.) /// time series based on adaptive kernel density estimations and martingale scores. /// /// The transform's catalog. @@ -130,10 +132,10 @@ public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, /// The column data is a vector of . The vector contains 3 elements: alert (1 means anomaly while 0 means normal), raw score, and magnitude of spectual residual. /// Name of column to transform. The column data must be . /// The size of the sliding window for computing spectral residual. - /// The number of points to add back of training window. No more than windowSize, usually keep default value. - /// The number of pervious points used in prediction. No more than windowSize, usually keep default value. - /// The size of sliding window to generate a saliency map for the series. No more than windowSize, usually keep default value. - /// The size of sliding window to calculate the anomaly score for each data point. No more than windowSize. + /// The number of points to add back of training window. No more than , usually keep default value. + /// The number of pervious points used in prediction. No more than , usually keep default value. + /// The size of sliding window to generate a saliency map for the series. No more than , usually keep default value. + /// The size of sliding window to calculate the anomaly score for each data point. No more than . /// The threshold to determine anomaly, score larger than the threshold is considered as anomaly. Should be in (0,1) /// /// @@ -145,5 +147,37 @@ public static SsaSpikeEstimator DetectSpikeBySsa(this TransformsCatalog catalog, public static SrCnnAnomalyEstimator DetectAnomalyBySrCnn(this TransformsCatalog catalog, string outputColumnName, string inputColumnName, int windowSize=64, int backAddWindowSize=5, int lookaheadWindowSize=5, int averageingWindowSize=3, int judgementWindowSize=21, double threshold=0.3) => new SrCnnAnomalyEstimator(CatalogUtils.GetEnvironment(catalog), outputColumnName, windowSize, backAddWindowSize, lookaheadWindowSize, averageingWindowSize, judgementWindowSize, threshold, inputColumnName); + + /// + /// Singular Spectrum Analysis (SSA) model for modeling univariate time-series. + /// For the details of the model, refer to http://arxiv.org/pdf/1206.6910.pdf. + /// + /// Catalog. + /// The name of the column on which forecasting needs to be performed. + /// The length of series from the begining used for training. + /// The length of series that is kept in buffer for modeling (parameter N from reference papar). + /// The length of the window on the series for building the trajectory matrix (parameter L from reference papar). + /// The discount factor in [0,1] used for online updates (default = 1). + /// The rank selection method (default = Exact). + /// The desired rank of the subspace used for SSA projection (parameter r from reference papar). This parameter should be in the range in [1, ]. + /// If set to null, the rank is automatically determined based on prediction error minimization. (default = null) + /// The maximum rank considered during the rank selection process. If not provided (i.e. set to null), it is set to - 1. + /// The flag determining whether the confidence bounds for the point forecasts should be computed. (default = ) + /// The flag determining whether the model should be stabilized. + /// The flag determining whether the meta information for the model needs to be maintained. + /// The maximum growth on the exponential trend + /// + /// + /// + /// + /// + public static AdaptiveSingularSpectrumSequenceModeler AdaptiveSingularSpectrumSequenceModeler(this ForecastingCatalog catalog, + string inputColumnName, int trainSize, int seriesLength, int windowSize, Single discountFactor = 1, RankSelectionMethod rankSelectionMethod = RankSelectionMethod.Exact, + int? rank = null, int? maxRank = null, bool shouldComputeForecastIntervals = true, bool shouldstablize = true, bool shouldMaintainInfo = false, GrowthRatio? maxGrowth = null) => + new AdaptiveSingularSpectrumSequenceModeler(CatalogUtils.GetEnvironment(catalog), inputColumnName, trainSize, seriesLength, windowSize, discountFactor, + rankSelectionMethod, rank, maxRank, shouldComputeForecastIntervals, shouldstablize, shouldMaintainInfo, maxGrowth); } } diff --git a/src/Microsoft.ML.TimeSeries/Forecast.cs b/src/Microsoft.ML.TimeSeries/Forecast.cs new file mode 100644 index 0000000000..9321d115f0 --- /dev/null +++ b/src/Microsoft.ML.TimeSeries/Forecast.cs @@ -0,0 +1,93 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System.IO; +using Microsoft.ML.Data; +using static Microsoft.ML.Transforms.TimeSeries.AdaptiveSingularSpectrumSequenceModeler; + +namespace Microsoft.ML.TimeSeries +{ + /// + /// Interface for forecasting models. + /// + /// The type of values that are forecasted. + public interface ICanForecast : ICanSaveModel + { + /// + /// Train a forecasting model from an . + /// + /// Training data. + void Train(IDataView dataView); + + /// + /// Update a forecasting model with the new observations in the form of an . + /// + /// Reference to the observations as an + /// Name of the input column to update from. + void Update(IDataView dataView, string inputColumnName = null); + + /// + /// Perform forecasting until a particular . + /// + /// Number of values to forecast. + /// Forecasted values. + T[] Forecast(int horizon); + + /// + /// Perform forecasting until a particular and also computes confidence intervals. + /// + /// Number of values to forecast. + /// Forecasted values + /// Lower bound confidence intervals of forecasted values. + /// Upper bound confidence intervals of forecasted values. + /// Forecast confidence level. + void ForecastWithConfidenceIntervals(int horizon, out T[] forecast, out float[] confidenceIntervalLowerBounds, out float[] confidenceIntervalUpperBounds, float confidenceLevel = 0.95f); + } + + public static class ForecastExtensions + { + /// + /// Load a model. + /// + /// The type of , usually float. + /// + /// File path to load the model from. + /// model. + public static ICanForecast LoadForecastingModel(this ModelOperationsCatalog catalog, string filePath) + { + var env = CatalogUtils.GetEnvironment(catalog); + using (var file = File.OpenRead(filePath)) + { + using (var rep = RepositoryReader.Open(file, env)) + { + ModelLoadContext.LoadModel, SignatureLoadModel>(env, out var model, rep, LoaderSignature); + return model; + } + } + } + + /// + /// Save a model to a file specified by + /// + /// + /// + /// model to save. + /// File path to save the model to. + public static void SaveForecastingModel(this ModelOperationsCatalog catalog, ICanForecast model, string filePath) + { + var env = CatalogUtils.GetEnvironment(catalog); + using (var file = File.Create(filePath)) + { + using (var ch = env.Start("Saving forecasting model.")) + { + using (var rep = RepositoryWriter.CreateNew(file, ch)) + { + ModelSaveContext.SaveModel(rep, model, LoaderSignature); + rep.Commit(); + } + } + } + } + } +} diff --git a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs index 42dabcda12..0c823f2de1 100644 --- a/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs +++ b/src/Microsoft.ML.TimeSeries/SsaAnomalyDetectionBase.cs @@ -201,7 +201,7 @@ public SsaAnomalyDetectionBase(SsaOptions options, string name, IHostEnvironment ErrorFunc = ErrorFunctionUtils.GetErrorFunction(ErrorFunction); IsAdaptive = options.IsAdaptive; // Creating the master SSA model - Model = new AdaptiveSingularSpectrumSequenceModeler(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, + Model = new AdaptiveSingularSpectrumSequenceModeler.AdaptiveSingularSpectrumSequenceModelerInternal(Host, options.InitialWindowSize, SeasonalWindowSize + 1, SeasonalWindowSize, DiscountFactor, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalWindowSize / 2, false, false); StateRef = new State(); diff --git a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs index 281fbe96c2..ff8bfd1848 100644 --- a/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs +++ b/test/Microsoft.ML.TimeSeries.Tests/TimeSeriesDirectApi.cs @@ -7,6 +7,7 @@ using System.IO; using Microsoft.ML.Data; using Microsoft.ML.TestFramework.Attributes; +using Microsoft.ML.TimeSeries; using Microsoft.ML.Transforms.TimeSeries; using Xunit; @@ -334,5 +335,103 @@ public void AnomalyDetectionWithSrCnn() k += 1; } } + + [Fact] + public void Forecasting() + { + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + // Create forecasting model. + var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, + 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, false, false); + + // Train. + model.Train(dataView); + + // Forecast. + var forecast = model.Forecast(5); + + // Update with new observations. + model.Update(dataView); + + // Checkpoint. + ml.Model.SaveForecastingModel(model, "model.zip"); + + // Load the checkpointed model from disk. + var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + + // Forecast with the checkpointed model loaded from disk. + var forecastCopy = modelCopy.Forecast(5); + + // Forecast with the original model(that was checkpointed to disk). + forecast = model.Forecast(5); + + // Both the forecasted values from model loaded from disk and + // already in memory model should be the same. + Assert.Equal(forecast, forecastCopy); + } + + [Fact] + public void ForecastingWithConfidenceInterval() + { + const int SeasonalitySize = 10; + const int NumberOfSeasonsInTraining = 5; + + List data = new List(); + + var ml = new MLContext(seed: 1); + var dataView = ml.Data.LoadFromEnumerable(data); + + for (int j = 0; j < NumberOfSeasonsInTraining; j++) + for (int i = 0; i < SeasonalitySize; i++) + data.Add(new Data(i)); + + // Create forecasting model. + var model = ml.Forecasting.AdaptiveSingularSpectrumSequenceModeler("Value", data.Count, SeasonalitySize + 1, SeasonalitySize, + 1, AdaptiveSingularSpectrumSequenceModeler.RankSelectionMethod.Exact, null, SeasonalitySize / 2, shouldComputeForecastIntervals: true, false); + + // Train. + model.Train(dataView); + + // Forecast. + float[] forecast; + float[] lowConfInterval; + float[] upperConfInterval; + model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); + + // Update with new observations. + model.Update(dataView); + + // Checkpoint. + ml.Model.SaveForecastingModel(model, "model.zip"); + + // Load the checkpointed model from disk. + var modelCopy = ml.Model.LoadForecastingModel("model.zip"); + + // Forecast with the checkpointed model loaded from disk. + float[] forecastCopy; + float[] lowConfIntervalCopy; + float[] upperConfIntervalCopy; + modelCopy.ForecastWithConfidenceIntervals(5, out forecastCopy, out lowConfIntervalCopy, out upperConfIntervalCopy); + + // Forecast with the original model(that was checkpointed to disk). + model.ForecastWithConfidenceIntervals(5, out forecast, out lowConfInterval, out upperConfInterval); + + // Both the forecasted values from model loaded from disk and + // already in memory model should be the same. + Assert.Equal(forecast, forecastCopy); + Assert.Equal(lowConfInterval, lowConfIntervalCopy); + Assert.Equal(upperConfInterval, upperConfIntervalCopy); + } } }