Skip to content

Commit bb13d62

Browse files
lisahuayuyi@microsoft.comlisahuamsft
authored
Add DetectSeasonality as a Helper function in TimeSeries ExtensionDialog (#5231)
* create class PeriodDetectUtils * Test period detect * math utils * restore file * update license * 1. Add DetectSeasonality as a helper method in ExtensionsCatalog, 2. Remove the MathUtils and use MedianDblAggregator (make it BestFriend) 3. Add Unit Tests * Change SeasonalityDetector to be internal class * 1. Introduce randomnessThreshold as an optional parameter 2. Update comments and polish SeasonalityDetector for readability. * minor float to double type change * fix unit tests * address Harish's comments: 1. Change Randomness threshold to [0, 1] range as confidence internal and map to inverse normal cumulative distribution 2. Update unit tests to use sin(2pi + x) 3. Other formatting issues * minor format update * update comments * minor follow up comment update * update threshold to p value Co-authored-by: [email protected] <[email protected]> Co-authored-by: Lisa Hua <[email protected]>
1 parent 1c2469f commit bb13d62

File tree

5 files changed

+463
-1
lines changed

5 files changed

+463
-1
lines changed
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using System.Linq;
4+
using Microsoft.ML;
5+
using Microsoft.ML.TimeSeries;
6+
7+
namespace Samples.Dynamic
8+
{
9+
public static class DetectSeasonality
10+
{
11+
public static void Example()
12+
{
13+
/* Create a new ML context, for ML.NET operations. It can be used for
14+
exception tracking and logging, as well as the source of randomness.*/
15+
var mlContext = new MLContext();
16+
17+
// Create a seasonal data as input: y = sin(2 * Pi + x)
18+
var seasonalData = Enumerable.Range(0, 100).Select(x => new TimeSeriesData(Math.Sin(2 * Math.PI + x)));
19+
20+
// Load the input data as a DataView.
21+
var dataView = mlContext.Data.LoadFromEnumerable(seasonalData);
22+
23+
/* Two option parameters:
24+
* seasonalityWindowSize: Default value is -1. When set to -1, use the whole input to fit model;
25+
* when set to a positive integer, only the first windowSize number of values will be considered.
26+
* randomnessThreshold: Randomness threshold that specifies how confidence the input values follows
27+
* a predictable pattern recurring as seasonal data. By default, it is set as 0.99.
28+
* The higher the threshold is set, the more strict recurring pattern the
29+
* input values should follow to be determined as seasonal data.
30+
*/
31+
int period = mlContext.AnomalyDetection.DetectSeasonality(
32+
dataView,
33+
nameof(TimeSeriesData.Value),
34+
seasonalityWindowSize: 40);
35+
36+
// Print the Seasonality Period result.
37+
Console.WriteLine($"Seasonality Period: #{period}");
38+
}
39+
40+
private class TimeSeriesData
41+
{
42+
public double Value;
43+
44+
public TimeSeriesData(double value)
45+
{
46+
Value = value;
47+
}
48+
}
49+
50+
}
51+
}

src/Microsoft.ML.Data/Transforms/NormalizeColumnDbl.cs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -597,6 +597,7 @@ internal static void GetMedianSoFar(in double num, ref double median, ref MaxHea
597597
/// It tracks median values of non-sparse values (vCount).
598598
/// NaNs are ignored when updating min and max.
599599
/// </summary>
600+
[BestFriend]
600601
internal sealed class MedianDblAggregator : IColumnAggregator<double>
601602
{
602603
private MedianAggregatorUtils.MaxHeap<double> _belowMedianHeap;
@@ -1213,7 +1214,7 @@ private void GetResult(ref TFloat input, ref TFloat value)
12131214
}
12141215

12151216
public override NormalizingTransformer.NormalizerModelParametersBase GetNormalizerModelParams()
1216-
=> new NormalizingTransformer.BinNormalizerModelParameters<TFloat>(ImmutableArray.Create(_binUpperBounds), _den,_offset);
1217+
=> new NormalizingTransformer.BinNormalizerModelParameters<TFloat>(ImmutableArray.Create(_binUpperBounds), _den, _offset);
12171218
}
12181219

12191220
public sealed class ImplVec : BinColumnFunction

src/Microsoft.ML.TimeSeries/ExtensionsCatalog.cs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,52 @@ public static RootCause LocalizeRootCause(this AnomalyDetectionCatalog catalog,
213213
return dst;
214214
}
215215

216+
/// <summary>
217+
/// <para>
218+
/// In time series data, seasonality (or periodicity) is the presence of variations that occur at specific regular intervals,
219+
/// such as weekly, monthly, or quarterly.
220+
/// </para>
221+
/// <para>
222+
/// This method detects this predictable interval (or period) by adopting techniques of fourier analysis.
223+
/// Assuming the input values have the same time interval (e.g., sensor data collected at every second ordered by timestamps),
224+
/// this method takes a list of time-series data, and returns the regular period for the input seasonal data,
225+
/// if a predictable fluctuation or pattern can be found that recurs or repeats over this period throughout the input values.
226+
/// </para>
227+
/// <para>
228+
/// Returns -1 if no such pattern is found, that is, the input values do not follow a seasonal fluctuation.
229+
/// </para>
230+
/// </summary>
231+
/// <param name="catalog">The detect seasonality catalog.</param>
232+
/// <param name="input">Input DataView.The data is an instance of <see cref="Microsoft.ML.IDataView"/>.</param>
233+
/// <param name="inputColumnName">Name of column to process. The column data must be <see cref="System.Double"/>.</param>
234+
/// <param name="seasonalityWindowSize">An upper bound on the number of values to be considered in the input values.
235+
/// When set to -1, use the whole input to fit model; when set to a positive integer, only the first windowSize number
236+
/// of values will be considered. Default value is -1.</param>
237+
/// <param name="randomnessThreshold"><a href ="https://en.wikipedia.org/wiki/Correlogram">Randomness threshold</a>
238+
/// that specifies how confidently the input values follow a predictable pattern recurring as seasonal data.
239+
/// The range is between [0, 1]. By default, it is set as 0.95.
240+
/// </param>
241+
/// <returns>The regular interval for the input as seasonal data, otherwise return -1.</returns>
242+
/// <example>
243+
/// <format type="text/markdown">
244+
/// <![CDATA[
245+
/// [!code-csharp[LocalizeRootCause](~/../docs/samples/docs/samples/Microsoft.ML.Samples/Dynamic/Transforms/TimeSeries/DetectSeasonality.cs)]
246+
/// ]]>
247+
/// </format>
248+
/// </example>
249+
public static int DetectSeasonality(
250+
this AnomalyDetectionCatalog catalog,
251+
IDataView input,
252+
string inputColumnName,
253+
int seasonalityWindowSize = -1,
254+
double randomnessThreshold = 0.95)
255+
=> new SeasonalityDetector().DetectSeasonality(
256+
CatalogUtils.GetEnvironment(catalog),
257+
input,
258+
inputColumnName,
259+
seasonalityWindowSize,
260+
randomnessThreshold);
261+
216262
private static void CheckRootCauseInput(IHostEnvironment host, RootCauseLocalizationInput src)
217263
{
218264
host.CheckUserArg(src.Slices.Count >= 1, nameof(src.Slices), "Must has more than one item");

0 commit comments

Comments
 (0)