diff --git a/src/Microsoft.ML.TimeSeries/MathUtils.cs b/src/Microsoft.ML.TimeSeries/MathUtils.cs
new file mode 100644
index 0000000000..312d73aa9b
--- /dev/null
+++ b/src/Microsoft.ML.TimeSeries/MathUtils.cs
@@ -0,0 +1,121 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Microsoft.ML.TimeSeries
+{
+ internal class MathUtils
+ {
+ ///
+ /// use quick-sort like method to obtain the median value.
+ /// the complexity in expectation is O(n), which is faster than using quickSort.
+ ///
+ /// the input list of values. note that this list will be modified after calling this method
+ /// returns the median value
+ public static double QuickMedian(List values)
+ {
+ if (values == null || values.Count == 0)
+ return double.NaN;
+
+ // here the third parameter is start from 1. so we need to plus 1 to compliant.
+ return QuickSelect(values, values.Count / 2 + 1);
+ }
+
+ public static double QuickSelect(IReadOnlyList values, int k)
+ {
+ var nums = values;
+ double[] left = new double[values.Count];
+ double[] right = new double[values.Count];
+ int numsCount = nums.Count;
+
+ while (true)
+ {
+ if (numsCount == 1)
+ return nums[0];
+
+ int idx = FindMedianIndex(nums, 0, numsCount - 1);
+ double key = nums[idx];
+
+ int leftIdx = 0;
+ int rightIdx = 0;
+ for (int i = 0; i < numsCount; i++)
+ {
+ if (i == idx)
+ continue;
+
+ if (nums[i] < key)
+ left[leftIdx++] = nums[i];
+ else
+ right[rightIdx++] = nums[i];
+ }
+
+ if (leftIdx == k - 1)
+ return key;
+
+ if (leftIdx >= k)
+ {
+ nums = left;
+ numsCount = leftIdx;
+ }
+ else
+ {
+ nums = right;
+ k = k - leftIdx - 1;
+ numsCount = rightIdx;
+ }
+ }
+ }
+
+ public static int FindMedianIndex(IReadOnlyList values, int start, int end)
+ {
+ // use the middle value among first/middle/end as the guard value, to make sure the average performance good.
+ // according to unit test, this fix will improve the average performance 10%. and works normally when input list is ordered.
+ double first = values[start];
+ double last = values[end];
+ int midIndex = (start + end) / 2;
+ int medianIndex = -1;
+ double middleValue = values[midIndex];
+ if (first < last)
+ {
+ if (middleValue > last)
+ {
+ // last is the middle value
+ medianIndex = end;
+ }
+ else if (middleValue > first)
+ {
+ // middleValue is the middle value
+ medianIndex = midIndex;
+ }
+ else
+ {
+ // first is the middle value
+ medianIndex = start;
+ }
+ }
+ else
+ {
+ if (middleValue > first)
+ {
+ // first is the middle value
+ medianIndex = start;
+ }
+ else if (middleValue < last)
+ {
+ // last is the middle value
+ medianIndex = end;
+ }
+ else
+ {
+ // middleValue is the middle value
+ medianIndex = midIndex;
+ }
+ }
+ return medianIndex;
+ }
+ }
+}
diff --git a/src/Microsoft.ML.TimeSeries/PeriodDetectUtils.cs b/src/Microsoft.ML.TimeSeries/PeriodDetectUtils.cs
new file mode 100644
index 0000000000..8e384224e3
--- /dev/null
+++ b/src/Microsoft.ML.TimeSeries/PeriodDetectUtils.cs
@@ -0,0 +1,289 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+using Microsoft.ML.Transforms.TimeSeries;
+
+namespace Microsoft.ML.TimeSeries
+{
+ ///
+ /// this class is used to detect the periodicity automatically
+ ///
+ public class PeriodDetectUtils
+ {
+ ///
+ /// the minimum value that is considered as a valid period.
+ ///
+ private const int MinPeriod = 4;
+
+ ///
+ /// in practice, the max lag very rarely exceed 365, which lacks of strong interpretation, and which also brings performance overhead.
+ ///
+ private const int MaxLag = 400;
+
+ ///
+ /// suppose the length of time series is 651, now we found an period is 128, then 651/128 = 5, which means there are at most 5 recurrent period. this is too small, the significance build upon this is not trustable.
+ ///
+ private const int MinRecurrentCount = 8;
+
+ ///
+ /// when input time series is with very close values (i.e., different is smaller than E-20), the accuracy of double could distort the
+ /// final trend signal. any seasonal signal under such circumstance becomes unreliable.
+ /// so use this threshold to eliminate such kind of time series. here set to 1e-10 is for conservative consideration.
+ ///
+ private const double MinEnergyThreshold = 1e-10;
+
+ public static int DetectSeasonality(double[] y)
+ {
+ int length = y.Length;
+ int newLength = Get2Power(y.Length);
+ double[] fftRe = new double[newLength];
+ double[] fftIm = new double[newLength];
+ double[] inputRe = new double[newLength];
+
+ double mean = 0;
+ double std = 0;
+
+ foreach (double value in y)
+ mean += value;
+ mean /= length;
+
+ for (int i = 0; i < length; ++i)
+ {
+ inputRe[i] = y[i] - mean;
+ std += inputRe[i] * inputRe[i];
+ }
+ if (std / length < 1e-8)
+ {
+ return -1;
+ }
+
+ for (int i = length; i < newLength; ++i)
+ {
+ inputRe[i] = 0;
+ }
+
+ FftUtils.ComputeForwardFft(inputRe, Enumerable.Repeat(0.0, newLength).ToArray(), fftRe, fftIm, newLength);
+
+ var z = fftRe.Select((m, i) => new Complex(m, fftIm[i])).ToArray();
+ var w = z.Select((t, i) => t * Complex.Conjugate(t)).ToArray();
+ FindBestTwoFrequencies(w, length, out var bestFreq, out var secondFreq);
+
+ double[] ifftRe = new double[newLength];
+ double[] ifftIm = new double[newLength];
+ FftUtils.ComputeBackwardFft(
+ w.Select(t => (double)t.Real).ToArray(),
+ w.Select(t => (double)t.Imaginary).ToArray(), ifftRe, ifftIm, newLength);
+ var r = ifftRe.Select((t, i) => new Complex(t, ifftIm[i])).ToArray();
+ int period = FindTruePeriod(r, bestFreq, secondFreq, newLength);
+
+ if (period < MinPeriod)
+ {
+ period = -1;
+ }
+
+ return period;
+ }
+
+ private static int FindTruePeriod(Complex[] r, int bestFreq, int secondFreq, int timeSeriesLength)
+ {
+ int firstPeriod = -1;
+ int secondPeriod = -1;
+ double firstTimeDomainEnergy = -1;
+ double secondTimeDomainEnergy = -1;
+ firstPeriod = FindBestPeriod(r, bestFreq, timeSeriesLength, out firstTimeDomainEnergy);
+ if (secondFreq != -1)
+ {
+ secondPeriod = FindBestPeriod(r, secondFreq, timeSeriesLength, out secondTimeDomainEnergy);
+ }
+ if (firstPeriod == -1 && secondPeriod == -1)
+ return -1;
+ int truePeriod;
+ double trueTimeDomainEnergy;
+ if (firstPeriod == -1)
+ {
+ truePeriod = secondPeriod;
+ trueTimeDomainEnergy = secondTimeDomainEnergy;
+ }
+ else if (secondPeriod == -1)
+ {
+ truePeriod = firstPeriod;
+ trueTimeDomainEnergy = firstTimeDomainEnergy;
+ }
+ else
+ {
+ if (firstPeriod == secondPeriod)
+ {
+ truePeriod = firstPeriod;
+ trueTimeDomainEnergy = firstTimeDomainEnergy;
+ }
+ else
+ {
+ // hueristic: if the second frequency is with somewhat higher energy in time domain, we think it is a better candidate
+ if (secondTimeDomainEnergy > firstTimeDomainEnergy * 1.05)
+ {
+ truePeriod = secondPeriod;
+ trueTimeDomainEnergy = secondTimeDomainEnergy;
+ }
+ else
+ {
+ truePeriod = firstPeriod;
+ trueTimeDomainEnergy = firstTimeDomainEnergy;
+ }
+ }
+ }
+ trueTimeDomainEnergy /= r[0].Real;
+
+ // this is a key equation, which is named the "testing for randomness with the correlogram". /ref: http://www.ltrr.arizona.edu/~dmeko/notes_3.pdf
+ // actually, 1.96 is for the 2-sigma, which has 95% statistical confidence. 2.58 is for 99% confidence, 2.85 for 99.5% confidence
+ /* increasing the threshold aims to mitigate the fake seasonal component caused by outliers. in practice, if there exist true seasonal component,
+ * such as BirdStrike/Appdownloads, the energy is far larger than threshold, hence change threshold from 2.85 to 4.0 have no impact (tested);
+ */
+
+ double threshold = 4 / Math.Sqrt(timeSeriesLength);
+
+ if (trueTimeDomainEnergy < threshold || r[truePeriod].Real < MinEnergyThreshold)
+ return -1;
+
+ return truePeriod;
+ }
+
+ ///
+ /// in order to pick up a proper frequency robustly (this is useful especially for large frequency, or small period, e.g., period = 2),
+ /// this method aims to pick up the top two frequencies for further evaluation.
+ /// of course, the energy of the second frequency (in frequency domain) must be at similar magnitude compared with the energy of the first
+ /// frequency.
+ ///
+ /// the energy list in the frequency domain, the index is the frequency.
+ /// the original time series length
+ /// the frequency with highest energy
+ /// the frequency with second highest energy
+ private static void FindBestTwoFrequencies(Complex[] w, int timeSeriesLength, out int bestFreq, out int secondFreq)
+ {
+ bestFreq = -1;
+ double bestEnergy = -1.0;
+ secondFreq = -1;
+ double secondEnergy = -1.0;
+
+ if (w.Length < 2)
+ return;
+
+ List energies = new List();
+
+ /* length of time series divided by frequency is period. it is obvious that the period should be larger than 1 and smaller than the total length, and is an integer */
+ for (int i = w.Length / timeSeriesLength; i < w.Length / 2 + 1; i++)
+ {
+ double nextWeight = w[i].Magnitude;
+ energies.Add(nextWeight);
+
+ if (nextWeight > bestEnergy)
+ {
+ bestEnergy = nextWeight;
+ bestFreq = i;
+ }
+ }
+
+ // once we found a best frequency, the region formed by lower bound to upper bound corresponding to this frequency will not be inspected anymore. because they all share the same period.
+ int period = w.Length / bestFreq;
+ double lowerBound = w.Length * 1.0 / (period + 1);
+ double upperBound = w.Length * 1.0 / (period - 1);
+
+ for (int i = w.Length / timeSeriesLength; i < w.Length / 2 + 1; i++)
+ {
+ if (i > lowerBound && i < upperBound)
+ continue;
+ double weight = w[i].Magnitude;
+ if (weight > secondEnergy)
+ {
+ double prevWeight = 0;
+ if (i > 0)
+ prevWeight = w[i - 1].Magnitude;
+ double nextWeight = 0;
+ if (i < w.Length - 1)
+ nextWeight = w[i + 1].Magnitude;
+
+ // should be a local maximum
+ if (weight >= prevWeight && weight >= nextWeight)
+ {
+ secondEnergy = nextWeight;
+ secondFreq = i;
+ }
+ }
+ }
+ double typycalEnergy = MathUtils.QuickMedian(energies);
+
+ // the second energy must be at least significantly large enough than typical energies, and also similar to best energy at magnitude level.
+ if (typycalEnergy * 6.0 < secondEnergy && secondEnergy * 10.0 > bestEnergy)
+ return;
+
+ // set the second frequency to -1, since it is obviously not strong enought to compete with the best energy.
+ secondFreq = -1;
+ }
+
+ ///
+ /// given a frequency F represented by an integer, we aim to find the best period by inspecting the auto-correlation function in time domain.
+ /// since either frequency or the period is an integer, so the possible period located within
+ /// [N/(F+1), N/(F-1)], we need to check this domain, and pick the best one. where N is the length of the augmented time series
+ ///
+ /// the auto-correlation function of the augmented time series
+ /// the input frequency candidate
+ /// the length of the original time series, this is used for post processing to reduce false positive
+ /// output the energy on the auto-correlation function
+ /// return the best period estimated
+ private static int FindBestPeriod(Complex[] r, int frequency, int timeSeriesLength, out double energy)
+ {
+ energy = -1;
+
+ // this will never make sense of a seasonal signal
+ if (frequency <= 1)
+ return -1;
+
+ int lowerBound = r.Length / (frequency + 1);
+ int upperBound = r.Length / (frequency - 1);
+ int bestPeriod = -1;
+ for (int i = lowerBound; i <= upperBound && i < r.Length; i++)
+ {
+ var currentEnergy = r[i].Real;
+ if (currentEnergy > energy)
+ {
+ energy = currentEnergy;
+ bestPeriod = i;
+ }
+ }
+
+ /* condition1: period does not make sense, since the corresponding zone in the auto-correlation energy list are all negative.
+ * condition2: for real dataset, we do not think there will exist such long period. this is used to reduce false-positive
+ * condition3: the number of repeats under this period is too few. this is used to reduce false-positive
+ */
+ if (bestPeriod <= 1 || bestPeriod > MaxLag || timeSeriesLength < MinRecurrentCount * bestPeriod)
+ {
+ energy = -1;
+ return -1;
+ }
+ return bestPeriod;
+ }
+
+ ///
+ /// get the smallest 2^k which is equal or greater than n
+ ///
+ private static int Get2Power(int n)
+ {
+ int result = 1;
+ bool meet1 = false; // check is n is just equals to 2^k for some k
+ while (n > 1)
+ {
+ if ((n & 1) != 0)
+ meet1 = true;
+ result <<= 1;
+ n >>= 1;
+ }
+ if (meet1)
+ result <<= 1;
+ return result;
+ }
+ }
+}