diff --git a/Microsoft.ML.sln b/Microsoft.ML.sln
index db998668dd..1421025ec9 100644
--- a/Microsoft.ML.sln
+++ b/Microsoft.ML.sln
@@ -93,7 +93,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.CpuMath", "Mic
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools-local", "tools-local", "{7F13E156-3EBA-4021-84A5-CD56BA72F99E}"
EndProject
-Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer", "tools-local\Microsoft.ML.CodeAnalyzer\Microsoft.ML.CodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.InternalCodeAnalyzer", "tools-local\Microsoft.ML.InternalCodeAnalyzer\Microsoft.ML.InternalCodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer.Tests", "test\Microsoft.ML.CodeAnalyzer.Tests\Microsoft.ML.CodeAnalyzer.Tests.csproj", "{3E4ABF07-7970-4BE6-B45B-A13D3C397545}"
EndProject
@@ -111,6 +111,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.HalLearners",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TensorFlow", "src\Microsoft.ML.TensorFlow\Microsoft.ML.TensorFlow.csproj", "{570A0B8A-5463-44D2-8521-54C0CA4CACA9}"
EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Analyzer", "src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj", "{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}"
+EndProject
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.StaticPipelineTesting", "test\Microsoft.ML.StaticPipelineTesting\Microsoft.ML.StaticPipelineTesting.csproj", "{8B38BF24-35F4-4787-A9C5-22D35987106E}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@@ -399,6 +403,22 @@ Global
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release|Any CPU.Build.0 = Release|Any CPU
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release|Any CPU.Build.0 = Release|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug|Any CPU.Build.0 = Debug|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Release|Any CPU.ActiveCfg = Release|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Release|Any CPU.Build.0 = Release|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
+ {8B38BF24-35F4-4787-A9C5-22D35987106E}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -444,6 +464,8 @@ Global
{00E38F77-1E61-4CDF-8F97-1417D4E85053} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{A7222F41-1CF0-47D9-B80C-B4D77B027A61} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{570A0B8A-5463-44D2-8521-54C0CA4CACA9} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
+ {6DEF0F40-3853-47B3-8165-5F24BA5E14DF} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
+ {8B38BF24-35F4-4787-A9C5-22D35987106E} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
diff --git a/build/Dependencies.props b/build/Dependencies.props
index e880e8c66b..07f59e50e9 100644
--- a/build/Dependencies.props
+++ b/build/Dependencies.props
@@ -12,5 +12,9 @@
4.5.0
0.11.1
1.10.0
+
+ 2.9.0
+ 4.5.0
+ 1.2.0
diff --git a/src/Directory.Build.props b/src/Directory.Build.props
index ee32523d8e..6c6117e7dd 100644
--- a/src/Directory.Build.props
+++ b/src/Directory.Build.props
@@ -26,7 +26,7 @@
+ Include="$(MSBuildThisFileDirectory)\..\tools-local\Microsoft.ML.InternalCodeAnalyzer\Microsoft.ML.InternalCodeAnalyzer.csproj">
false
Analyzer
diff --git a/src/Microsoft.ML.Analyzer/Microsoft.ML.Analyzer.csproj b/src/Microsoft.ML.Analyzer/Microsoft.ML.Analyzer.csproj
new file mode 100644
index 0000000000..afe45e57f5
--- /dev/null
+++ b/src/Microsoft.ML.Analyzer/Microsoft.ML.Analyzer.csproj
@@ -0,0 +1,13 @@
+
+
+
+ netstandard1.3
+
+
+
+
+
+
+
+
+
diff --git a/src/Microsoft.ML.Analyzer/TypeIsSchemaShapeAnalyzer.cs b/src/Microsoft.ML.Analyzer/TypeIsSchemaShapeAnalyzer.cs
new file mode 100644
index 0000000000..f5b9b240ee
--- /dev/null
+++ b/src/Microsoft.ML.Analyzer/TypeIsSchemaShapeAnalyzer.cs
@@ -0,0 +1,180 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System.Collections.Immutable;
+using System.Linq;
+using Microsoft.CodeAnalysis;
+using Microsoft.CodeAnalysis.CSharp;
+using Microsoft.CodeAnalysis.CSharp.Syntax;
+using Microsoft.CodeAnalysis.Diagnostics;
+
+namespace Microsoft.ML.Analyzer
+{
+ [DiagnosticAnalyzer(LanguageNames.CSharp)]
+ public sealed class TypeIsSchemaShapeAnalyzer : DiagnosticAnalyzer
+ {
+ internal static class ShapeDiagnostic
+ {
+ private const string Category = "Type Check";
+ public const string Id = "MSML_TypeShouldBeSchemaShape";
+ private const string Title = "The type is not a schema shape";
+ private const string Format = "Type{0} is neither a PipelineColumn nor a ValueTuple.";
+ internal const string Description =
+ "Within statically typed pipeline elements of ML.NET, the shape of the schema is determined by a type. " +
+ "A valid type is either an instance of one of the PipelineColumn subclasses (e.g., Scalar " +
+ "or something like that), or a ValueTuple containing only valid types. (So, ValueTuples containing " +
+ "other value tuples are fine, so long as they terminate in a PipelineColumn subclass.)";
+
+ internal static DiagnosticDescriptor Rule =
+ new DiagnosticDescriptor(Id, Title, Format, Category,
+ DiagnosticSeverity.Error, isEnabledByDefault: true, description: Description);
+ }
+
+ internal static class ShapeParameterDiagnostic
+ {
+ private const string Category = "Type Check";
+ public const string Id = "MSML_TypeParameterShouldBeSchemaShape";
+ private const string Title = "The type is not a schema shape";
+ private const string Format = "Type parameter {0} is not marked with [IsShape] or appropriate type constraints.";
+ internal const string Description = ShapeDiagnostic.Description + " " +
+ "If using type parameters when interacting with the statically typed pipelines, the type parameter ought to be " +
+ "constrained in such a way that it, either by applying the [IsShape] attribute or by having type constraints to " +
+ "indicate that it is valid, e.g., constraining the type to descend from PipelineColumn.";
+
+ internal static DiagnosticDescriptor Rule =
+ new DiagnosticDescriptor(Id, Title, Format, Category,
+ DiagnosticSeverity.Error, isEnabledByDefault: true, description: Description);
+ }
+
+ private const string AttributeName = "Microsoft.ML.Data.StaticPipe.IsShapeAttribute";
+ private const string LeafTypeName = "Microsoft.ML.Data.StaticPipe.Runtime.PipelineColumn";
+
+ public override ImmutableArray SupportedDiagnostics =>
+ ImmutableArray.Create(ShapeDiagnostic.Rule, ShapeParameterDiagnostic.Rule);
+
+ public override void Initialize(AnalysisContext context)
+ {
+ context.RegisterSemanticModelAction(Analyze);
+ }
+
+ private void Analyze(SemanticModelAnalysisContext context)
+ {
+ // We start with the model, then do the the method invocations.
+ // We could have phrased it as RegisterSyntaxNodeAction(Analyze, SyntaxKind.InvocationExpression),
+ // but this seemed more inefficient since getting the model and fetching the type symbols every
+ // single time seems to incur significant cost. The following invocation is somewhat more awkward
+ // since we must iterate over the invocation syntaxes ourselves, but this seems to be worthwhile.
+ var model = context.SemanticModel;
+ var comp = model.Compilation;
+
+ // Get the symbols of the key types we are analyzing. If we can't find any of them there is
+ // no point in going further.
+ var attrType = comp.GetTypeByMetadataName(AttributeName);
+ if (attrType == null)
+ return;
+ var leafType = comp.GetTypeByMetadataName(LeafTypeName);
+ if (leafType == null)
+ return;
+
+ // This internal helper method recursively determines whether an attributed type parameter
+ // has a valid type. It is called externally from the loop over invocations.
+ bool CheckType(ITypeSymbol type, out string path, out ITypeSymbol problematicType)
+ {
+ if (type.TypeKind == TypeKind.TypeParameter)
+ {
+ var typeParam = (ITypeParameterSymbol)type;
+ path = null;
+ problematicType = null;
+ // Does the type parameter have the attribute that triggers a check?
+ if (type.GetAttributes().Any(attr => attr.AttributeClass == attrType))
+ return true;
+ // Are any of the declared constraint types OK?
+ if (typeParam.ConstraintTypes.Any(ct => CheckType(ct, out string ctPath, out var ctProb)))
+ return true;
+ // Well, probably not good then. Let's call it a day.
+ problematicType = typeParam;
+ return false;
+ }
+ else if (type.IsTupleType)
+ {
+ INamedTypeSymbol nameType = (INamedTypeSymbol)type;
+ var tupleElems = nameType.TupleElements;
+
+ for (int i = 0; i < tupleElems.Length; ++i)
+ {
+ var e = tupleElems[i];
+ if (!CheckType(e.Type, out string innerPath, out problematicType))
+ {
+ path = e.Name ?? $"Item{i + 1}";
+ if (innerPath != null)
+ path += "." + innerPath;
+ return false;
+ }
+ }
+ path = null;
+ problematicType = null;
+ return true;
+ }
+ else
+ {
+ for (var rt = type; rt != null; rt = rt.BaseType)
+ {
+ if (rt == leafType)
+ {
+ path = null;
+ problematicType = null;
+ return true;
+ }
+ }
+ path = null;
+ problematicType = type;
+ return false;
+ }
+ }
+
+ foreach (var invocation in model.SyntaxTree.GetRoot().DescendantNodes().OfType())
+ {
+ var symbolInfo = model.GetSymbolInfo(invocation);
+ if (!(symbolInfo.Symbol is IMethodSymbol methodSymbol))
+ {
+ // Should we perhaps skip when there is a method resolution failure? This is often but not always a sign of another problem.
+ if (symbolInfo.CandidateReason != CandidateReason.OverloadResolutionFailure || symbolInfo.CandidateSymbols.Length == 0)
+ continue;
+ methodSymbol = symbolInfo.CandidateSymbols[0] as IMethodSymbol;
+ if (methodSymbol == null)
+ continue;
+ }
+ // Analysis only applies to generic methods.
+ if (!methodSymbol.IsGenericMethod)
+ continue;
+ // Scan the type parameters for one that has our target attribute.
+ for (int i = 0; i < methodSymbol.TypeParameters.Length; ++i)
+ {
+ var par = methodSymbol.TypeParameters[i];
+ var attr = par.GetAttributes();
+ if (attr.Length == 0)
+ continue;
+ if (!attr.Any(a => a.AttributeClass == attrType))
+ continue;
+ // We've found it. Check the type argument to ensure it is of the appropriate type.
+ var p = methodSymbol.TypeArguments[i];
+ if (CheckType(p, out string path, out ITypeSymbol problematicType))
+ continue;
+
+ if (problematicType.Kind == SymbolKind.TypeParameter)
+ {
+ var diagnostic = Diagnostic.Create(ShapeParameterDiagnostic.Rule, invocation.GetLocation(), problematicType.Name);
+ context.ReportDiagnostic(diagnostic);
+ }
+ else
+ {
+ path = path == null ? "" : " of item " + path;
+ var diagnostic = Diagnostic.Create(ShapeDiagnostic.Rule, invocation.GetLocation(), path);
+ context.ReportDiagnostic(diagnostic);
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Core/Data/DataKind.cs b/src/Microsoft.ML.Core/Data/DataKind.cs
index 32325f44a1..0249745691 100644
--- a/src/Microsoft.ML.Core/Data/DataKind.cs
+++ b/src/Microsoft.ML.Core/Data/DataKind.cs
@@ -30,7 +30,7 @@ public enum DataKind : byte
Num = R4,
TX = 11,
-#pragma warning disable MSML_GeneralName // The data kind enum has its own logic, independnet of C# naming conventions.
+#pragma warning disable MSML_GeneralName // The data kind enum has its own logic, independent of C# naming conventions.
TXT = TX,
Text = TX,
diff --git a/src/Microsoft.ML.Core/Data/IEstimator.cs b/src/Microsoft.ML.Core/Data/IEstimator.cs
index 509a67bb4f..54b0c64cd9 100644
--- a/src/Microsoft.ML.Core/Data/IEstimator.cs
+++ b/src/Microsoft.ML.Core/Data/IEstimator.cs
@@ -192,11 +192,10 @@ public interface IDataReader
public interface IDataReaderEstimator
where TReader : IDataReader
{
+ // REVIEW: you could consider the transformer to take a different , but we don't have such components
+ // yet, so why complicate matters?
///
/// Train and return a data reader.
- ///
- /// REVIEW: you could consider the transformer to take a different , but we don't have such components
- /// yet, so why complicate matters?
///
TReader Fit(TSource input);
diff --git a/src/Microsoft.ML.Core/Utilities/Utils.cs b/src/Microsoft.ML.Core/Utilities/Utils.cs
index 96c23a0fe3..ac100b6dc9 100644
--- a/src/Microsoft.ML.Core/Utilities/Utils.cs
+++ b/src/Microsoft.ML.Core/Utilities/Utils.cs
@@ -1049,6 +1049,24 @@ public static void MarshalActionInvoke(Action act, Type genArg, TA
meth.Invoke(act.Target, new object[] { arg1 });
}
+ ///
+ /// A two-argument version of .
+ ///
+ public static void MarshalActionInvoke(Action act, Type genArg, TArg1 arg1, TArg2 arg2)
+ {
+ var meth = MarshalActionInvokeCheckAndCreate(genArg, act);
+ meth.Invoke(act.Target, new object[] { arg1, arg2 });
+ }
+
+ ///
+ /// A three-argument version of .
+ ///
+ public static void MarshalActionInvoke(Action act, Type genArg, TArg1 arg1, TArg2 arg2, TArg3 arg3)
+ {
+ var meth = MarshalActionInvokeCheckAndCreate(genArg, act);
+ meth.Invoke(act.Target, new object[] { arg1, arg2, arg3 });
+ }
+
public static string GetDescription(this Enum value)
{
Type type = value.GetType();
diff --git a/src/Microsoft.ML.Data/Data/IColumn.cs b/src/Microsoft.ML.Data/Data/IColumn.cs
index 2f2f496f99..04000c2d38 100644
--- a/src/Microsoft.ML.Data/Data/IColumn.cs
+++ b/src/Microsoft.ML.Data/Data/IColumn.cs
@@ -97,6 +97,30 @@ private static IColumn GetColumnCore(IRow row, int col)
return new RowWrap(row, col);
}
+ ///
+ /// Exposes a single column in a schema. The column is considered inactive.
+ ///
+ /// The schema to get the data for
+ /// The column to get
+ /// A column with false
+ public static IColumn GetColumn(ISchema schema, int col)
+ {
+ Contracts.CheckValue(schema, nameof(schema));
+ Contracts.CheckParam(0 <= col && col < schema.ColumnCount, nameof(col));
+
+ Func func = GetColumnCore;
+ return Utils.MarshalInvoke(func, schema.GetColumnType(col).RawType, schema, col);
+ }
+
+ private static IColumn GetColumnCore(ISchema schema, int col)
+ {
+ Contracts.AssertValue(schema);
+ Contracts.Assert(0 <= col && col < schema.ColumnCount);
+ Contracts.Assert(schema.GetColumnType(col).RawType == typeof(T));
+
+ return new SchemaWrap(schema, col);
+ }
+
///
/// Wraps the metadata of a column as a row.
///
@@ -183,7 +207,7 @@ public static IColumn GetColumn(string name, ColumnType type, ValueGetter
/// , or if null, the output row will yield default values for those implementations,
/// that is, a totally static row
/// A set of row columns
- /// A row with items derived from
+ /// A row with items derived from
public static IRow GetRow(ICounted counted, params IColumn[] columns)
{
Contracts.CheckValueOrNull(counted);
@@ -229,9 +253,9 @@ private sealed class RowWrap : IColumn
private readonly int _col;
private MetadataRow _meta;
- public string Name { get { return _row.Schema.GetColumnName(_col); } }
- public ColumnType Type { get { return _row.Schema.GetColumnType(_col); } }
- public bool IsActive { get { return _row.IsColumnActive(_col); } }
+ public string Name => _row.Schema.GetColumnName(_col);
+ public ColumnType Type => _row.Schema.GetColumnType(_col);
+ public bool IsActive => _row.IsColumnActive(_col);
public IRow Metadata
{
@@ -254,14 +278,10 @@ public RowWrap(IRow row, int col)
}
Delegate IColumn.GetGetter()
- {
- return GetGetter();
- }
+ => GetGetter();
public ValueGetter GetGetter()
- {
- return _row.GetGetter(_col);
- }
+ => _row.GetGetter(_col);
}
///
@@ -269,17 +289,53 @@ public ValueGetter GetGetter()
///
private abstract class DefaultCounted : ICounted
{
- public long Position { get { return 0; } }
- public long Batch { get { return 0; } }
+ public long Position => 0;
+ public long Batch => 0;
public ValueGetter GetIdGetter()
+ => IdGetter;
+
+ private static void IdGetter(ref UInt128 id)
+ => id = default;
+ }
+
+ ///
+ /// Simple wrapper for a schema column, considered inctive with no getter.
+ ///
+ /// The type of the getter
+ private sealed class SchemaWrap : IColumn
+ {
+ private readonly ISchema _schema;
+ private readonly int _col;
+ private MetadataRow _meta;
+
+ public string Name => _schema.GetColumnName(_col);
+ public ColumnType Type => _schema.GetColumnType(_col);
+ public bool IsActive => false;
+
+ public IRow Metadata
{
- return IdGetter;
+ get {
+ if (_meta == null)
+ Interlocked.CompareExchange(ref _meta, new MetadataRow(_schema, _col), null);
+ return _meta;
+ }
}
- private static void IdGetter(ref UInt128 id)
+ public SchemaWrap(ISchema schema, int col)
{
- id = default(UInt128);
+ Contracts.AssertValue(schema);
+ Contracts.Assert(0 <= col && col < schema.ColumnCount);
+ Contracts.Assert(schema.GetColumnType(col).RawType == typeof(T));
+
+ _schema = schema;
+ _col = col;
}
+
+ Delegate IColumn.GetGetter()
+ => GetGetter();
+
+ public ValueGetter GetGetter()
+ => throw Contracts.Except("Column not active");
}
///
@@ -289,7 +345,7 @@ private static void IdGetter(ref UInt128 id)
///
private sealed class MetadataRow : DefaultCounted, IRow
{
- public ISchema Schema { get { return _schema; } }
+ public ISchema Schema => _schema;
private readonly ISchema _metaSchema;
private readonly int _col;
@@ -379,13 +435,9 @@ public ValueGetter GetGetter(int col)
///
private abstract class SimpleColumnBase : IColumn
{
- private readonly IRow _meta;
- private readonly string _name;
- private readonly ColumnType _type;
-
- public string Name { get { return _name; } }
- public IRow Metadata { get { return _meta; } }
- public ColumnType Type { get { return _type; } }
+ public string Name { get; }
+ public IRow Metadata { get; }
+ public ColumnType Type { get; }
public abstract bool IsActive { get; }
public SimpleColumnBase(string name, IRow meta, ColumnType type)
@@ -395,9 +447,9 @@ public SimpleColumnBase(string name, IRow meta, ColumnType type)
Contracts.CheckValue(type, nameof(type));
Contracts.CheckParam(type.RawType == typeof(T), nameof(type), "Mismatch between CLR type and column type");
- _name = name;
- _meta = meta;
- _type = type;
+ Name = name;
+ Metadata = meta;
+ Type = type;
}
Delegate IColumn.GetGetter()
@@ -427,7 +479,7 @@ private sealed class ConstOneImpl : SimpleColumnBase
{
private readonly T _value;
- public override bool IsActive { get { return true; } }
+ public override bool IsActive => true;
public ConstOneImpl(string name, IRow meta, ColumnType type, T value)
: base(name, meta, type)
@@ -474,7 +526,7 @@ private sealed class GetterImpl : SimpleColumnBase
{
private readonly ValueGetter _getter;
- public override bool IsActive { get { return _getter != null; } }
+ public override bool IsActive => _getter != null;
public GetterImpl(string name, IRow meta, ColumnType type, ValueGetter getter)
: base(name, meta, type)
@@ -500,9 +552,9 @@ private sealed class RowColumnRow : IRow
private readonly IColumn[] _columns;
private readonly SchemaImpl _schema;
- public ISchema Schema { get { return _schema; } }
- public long Position { get { return _counted.Position; } }
- public long Batch { get { return _counted.Batch; } }
+ public ISchema Schema => _schema;
+ public long Position => _counted.Position;
+ public long Batch => _counted.Batch;
public RowColumnRow(ICounted counted, IColumn[] columns)
{
@@ -538,7 +590,7 @@ private sealed class SchemaImpl : ISchema
private readonly RowColumnRow _parent;
private readonly Dictionary _nameToIndex;
- public int ColumnCount { get { return _parent._columns.Length; } }
+ public int ColumnCount => _parent._columns.Length;
public SchemaImpl(RowColumnRow parent)
{
diff --git a/src/Microsoft.ML.Data/DataLoadSave/CompositeReaderEstimator.cs b/src/Microsoft.ML.Data/DataLoadSave/CompositeReaderEstimator.cs
index 49f7f8b99d..e30aa97a42 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/CompositeReaderEstimator.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/CompositeReaderEstimator.cs
@@ -55,5 +55,4 @@ public CompositeReaderEstimator Append(IEstimator
return new CompositeReaderEstimator(_start, _estimatorChain.Append(estimator));
}
}
-
}
diff --git a/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs b/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs
index 9cb7ec4dab..ecbc28ebdf 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/EstimatorExtensions.cs
@@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System;
using Microsoft.ML.Core.Data;
namespace Microsoft.ML.Runtime.Data
@@ -74,5 +75,53 @@ public static TransformerChain Append(this ITransformer start, T
return new TransformerChain(start, transformer);
}
+
+ private sealed class DelegateEstimator : IEstimator
+ where TTransformer : class, ITransformer
+ {
+ private readonly IEstimator _est;
+ private readonly Action _onFit;
+
+ public DelegateEstimator(IEstimator estimator, Action onFit)
+ {
+ Contracts.AssertValue(estimator);
+ Contracts.AssertValue(onFit);
+ _est = estimator;
+ _onFit = onFit;
+ }
+
+ public TTransformer Fit(IDataView input)
+ {
+ var trans = _est.Fit(input);
+ _onFit(trans);
+ return trans;
+ }
+
+ public SchemaShape GetOutputSchema(SchemaShape inputSchema)
+ => _est.GetOutputSchema(inputSchema);
+ }
+
+ ///
+ /// Given an estimator, return a wrapping object that will call a delegate once
+ /// is called. It is often important for an estimator to return information about what was fit, which is why the
+ /// method returns a specifically typed object, rather than just a general
+ /// . However, at the same time, are often formed into pipelines
+ /// with many objects, so we may need to build a chain of estimators via where the
+ /// estimator for which we want to get the transformer is buried somewhere in this chain. For that scenario, we can through this
+ /// method attach a delegate that will be called once fit is called.
+ ///
+ /// The type of returned by
+ /// The estimator to wrap
+ /// The delegate that is called with the resulting instances once
+ /// is called. Because
+ /// may be called multiple times, this delegate may also be called multiple times.
+ /// A wrapping estimator that calls the indicated delegate whenever fit is called
+ public static IEstimator WithOnFitDelegate(this IEstimator estimator, Action onFit)
+ where TTransformer : class, ITransformer
+ {
+ Contracts.CheckValue(estimator, nameof(estimator));
+ Contracts.CheckValue(onFit, nameof(onFit));
+ return new DelegateEstimator(estimator, onFit);
+ }
}
}
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
index dc7cb43830..5ccbb8d7e9 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoader.cs
@@ -200,16 +200,35 @@ public sealed class Range
{
public Range() { }
+ ///
+ /// A range representing a single value. Will result in a scalar column.
+ ///
+ /// The index of the field of the text file to read.
public Range(int index)
- : this(index, index) { }
+ {
+ Contracts.CheckParam(index >= 0, nameof(index), "Must be non-negative");
+ Min = index;
+ Max = index;
+ }
- public Range(int min, int max)
+ ///
+ /// A range representing a set of values. Will result in a vector column.
+ ///
+ /// The minimum inclusive index of the column.
+ /// The maximum-inclusive index of the column. If null
+ /// indicates that the should auto-detect the legnth
+ /// of the lines, and read till the end.
+ public Range(int min, int? max)
{
- Contracts.CheckParam(min >= 0, nameof(min), "min must be non-negative.");
- Contracts.CheckParam(max >= min, nameof(max), "max must be greater than or equal to min.");
+ Contracts.CheckParam(min >= 0, nameof(min), "Must be non-negative");
+ Contracts.CheckParam(!(max < min), nameof(max), "If specified, must be greater than or equal to " + nameof(min));
Min = min;
Max = max;
+ // Note that without the following being set, in the case where there is a single range
+ // where Min == Max, the result will not be a vector valued but a scalar column.
+ ForceVector = true;
+ AutoEnd = max == null;
}
[Argument(ArgumentType.Required, HelpText = "First index in the range")]
diff --git a/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
new file mode 100644
index 0000000000..095617deb1
--- /dev/null
+++ b/src/Microsoft.ML.Data/DataLoadSave/Text/TextLoaderStatic.cs
@@ -0,0 +1,259 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Data.StaticPipe;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+
+namespace Microsoft.ML.Runtime.Data
+{
+ public sealed partial class TextLoader
+ {
+ ///
+ /// Configures a reader for text files.
+ ///
+ /// The type shape parameter, which must be
+ ///
+ /// The delegate that describes what fields to read from the text file, as well as
+ /// describing their input type. The way in which it works is that the delegate is fed a ,
+ /// and the user composes a value-tuple with instances out of that .
+ /// The resulting data will have columns with the names corresponding to their names in the value-tuple.
+ /// Input files. If null then no files are read, but this means that options or
+ /// configurations that require input data for initialization (e.g., or
+ /// ) with a null second argument.
+ /// Data file has header with feature names.
+ /// Text field separator.
+ /// Whether the input -may include quoted values, which can contain separator
+ /// characters, colons, and distinguish empty values from missing values. When true, consecutive separators
+ /// denote a missing value and an empty value is denoted by "". When false, consecutive separators
+ /// denote an empty value.
+ /// Whether the input may include sparse representations.
+ /// Remove trailing whitespace from lines.
+ /// A configured statically-typed reader for text files.
+ public static DataReader CreateReader<[IsShape] TTupleShape>(
+ IHostEnvironment env, Func func, IMultiStreamSource files = null,
+ bool hasHeader = false, char separator = '\t', bool allowQuoting = true, bool allowSparse = true,
+ bool trimWhitspace = false)
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(func, nameof(func));
+ env.CheckValueOrNull(files);
+
+ // Populate all args except the columns.
+ var args = new Arguments();
+ args.AllowQuoting = allowQuoting;
+ args.AllowSparse = allowSparse;
+ args.HasHeader = hasHeader;
+ args.SeparatorChars = new[] { separator };
+ args.TrimWhitespace = trimWhitspace;
+
+ var rec = new TextReconciler(args, files);
+ var ctx = new Context(rec);
+
+ using (var ch = env.Start("Initializing " + nameof(TextLoader)))
+ {
+ var readerEst = StaticPipeUtils.ReaderEstimatorAnalyzerHelper(env, ch, ctx, rec, func);
+ Contracts.AssertValue(readerEst);
+ var reader = readerEst.Fit(files);
+ ch.Done();
+ return reader;
+ }
+ }
+
+ private sealed class TextReconciler : ReaderReconciler
+ {
+ private readonly Arguments _args;
+ private readonly IMultiStreamSource _files;
+
+ public TextReconciler(Arguments args, IMultiStreamSource files)
+ {
+ Contracts.AssertValue(args);
+ Contracts.AssertValueOrNull(files);
+
+ _args = args;
+ _files = files;
+ }
+
+ public override IDataReaderEstimator> Reconcile(
+ IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary outputNames)
+ {
+ Contracts.AssertValue(env);
+ Contracts.AssertValue(toOutput);
+ Contracts.AssertValue(outputNames);
+ Contracts.Assert(_args.Column == null);
+
+ Column Create(PipelineColumn pipelineCol)
+ {
+ var pipelineArgCol = (IPipelineArgColumn)pipelineCol;
+ var argCol = pipelineArgCol.Create();
+ argCol.Name = outputNames[pipelineCol];
+ return argCol;
+ }
+
+ var cols = _args.Column = new Column[toOutput.Length];
+ for (int i = 0; i < toOutput.Length; ++i)
+ cols[i] = Create(toOutput[i]);
+
+ var orig = new TextLoader(env, _args, _files);
+ return new TrivialReaderEstimator(orig);
+ }
+ }
+
+ private interface IPipelineArgColumn
+ {
+ ///
+ /// Creates a object corresponding to the , with everything
+ /// filled in except .
+ ///
+ Column Create();
+ }
+
+ ///
+ /// Context object by which a user can indicate what fields they want to read from a text file, and what data type they ought to have.
+ /// Instances of this class are never made but the user, but rather are fed into the delegate in
+ /// .
+ ///
+ public sealed class Context
+ {
+ private readonly Reconciler _rec;
+
+ internal Context(Reconciler rec)
+ {
+ Contracts.AssertValue(rec);
+ _rec = rec;
+ }
+
+ ///
+ /// Reads a scalar Boolean column from a single field in the text file.
+ ///
+ /// The zero-based index of the field to read from.
+ /// The column representation.
+ public Scalar LoadBool(int ordinal) => Load(DataKind.BL, ordinal);
+
+ ///
+ /// Reads a vector Boolean column from a range of fields in the text file.
+ ///
+ /// The zero-based inclusive lower index of the field to read from.
+ /// The zero-based inclusive upper index of the field to read from.
+ /// Note that if this is null, it will read to the end of the line. The file(s)
+ /// will be inspected to get the length of the type.
+ /// The column representation.
+ public Vector LoadBool(int minOrdinal, int? maxOrdinal) => Load(DataKind.BL, minOrdinal, maxOrdinal);
+
+ ///
+ /// Reads a scalar single-precision floating point column from a single field in the text file.
+ ///
+ /// The zero-based index of the field to read from.
+ /// The column representation.
+ public Scalar LoadFloat(int ordinal) => Load(DataKind.R4, ordinal);
+
+ ///
+ /// Reads a vector single-precision column from a range of fields in the text file.
+ ///
+ /// The zero-based inclusive lower index of the field to read from.
+ /// The zero-based inclusive upper index of the field to read from.
+ /// Note that if this is null, it will read to the end of the line. The file(s)
+ /// will be inspected to get the length of the type.
+ /// The column representation.
+ public Vector LoadFloat(int minOrdinal, int? maxOrdinal) => Load(DataKind.R4, minOrdinal, maxOrdinal);
+
+ ///
+ /// Reads a scalar double-precision floating point column from a single field in the text file.
+ ///
+ /// The zero-based index of the field to read from.
+ /// The column representation.
+ public Scalar LoadDouble(int ordinal) => Load(DataKind.R8, ordinal);
+
+ ///
+ /// Reads a vector double-precision column from a range of fields in the text file.
+ ///
+ /// The zero-based inclusive lower index of the field to read from.
+ /// The zero-based inclusive upper index of the field to read from.
+ /// Note that if this is null, it will read to the end of the line. The file(s)
+ /// will be inspected to get the length of the type.
+ /// The column representation.
+ public Vector LoadDouble(int minOrdinal, int? maxOrdinal) => Load(DataKind.R8, minOrdinal, maxOrdinal);
+
+ ///
+ /// Reads a scalar textual column from a single field in the text file.
+ ///
+ /// The zero-based index of the field to read from.
+ /// The column representation.
+ public Scalar LoadText(int ordinal) => Load(DataKind.TX, ordinal);
+
+ ///
+ /// Reads a vector textual column from a range of fields in the text file.
+ ///
+ /// The zero-based inclusive lower index of the field to read from.
+ /// The zero-based inclusive upper index of the field to read from.
+ /// Note that if this is null, it will read to the end of the line. The file(s)
+ /// will be inspected to get the length of the type.
+ /// The column representation.
+ public Vector LoadText(int minOrdinal, int? maxOrdinal) => Load(DataKind.TX, minOrdinal, maxOrdinal);
+
+ private Scalar Load(DataKind kind, int ordinal)
+ {
+ Contracts.CheckParam(ordinal >= 0, nameof(ordinal), "Should be non-negative");
+ return new MyScalar(_rec, kind, ordinal);
+ }
+
+ private Vector Load(DataKind kind, int minOrdinal, int? maxOrdinal)
+ {
+ Contracts.CheckParam(minOrdinal >= 0, nameof(minOrdinal), "Should be non-negative");
+ var v = maxOrdinal >= minOrdinal;
+ Contracts.CheckParam(!(maxOrdinal < minOrdinal), nameof(maxOrdinal), "If specified, cannot be less than " + nameof(minOrdinal));
+ return new MyVector(_rec, kind, minOrdinal, maxOrdinal);
+ }
+
+ private class MyScalar : Scalar, IPipelineArgColumn
+ {
+ private readonly DataKind _kind;
+ private readonly int _ordinal;
+
+ public MyScalar(Reconciler rec, DataKind kind, int ordinal)
+ : base(rec, null)
+ {
+ _kind = kind;
+ _ordinal = ordinal;
+ }
+
+ public Column Create()
+ {
+ return new Column()
+ {
+ Type = _kind,
+ Source = new[] { new Range(_ordinal) },
+ };
+ }
+ }
+
+ private class MyVector : Vector, IPipelineArgColumn
+ {
+ private readonly DataKind _kind;
+ private readonly int _min;
+ private readonly int? _max;
+
+ public MyVector(Reconciler rec, DataKind kind, int min, int? max)
+ : base(rec, null)
+ {
+ _kind = kind;
+ _min = min;
+ _max = max;
+ }
+
+ public Column Create()
+ {
+ return new Column()
+ {
+ Type = _kind,
+ Source = new[] { new Range(_min, _max) },
+ };
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/Microsoft.ML.Data/DataLoadSave/TrivialReaderEstimator.cs b/src/Microsoft.ML.Data/DataLoadSave/TrivialReaderEstimator.cs
index 3bb589191f..506ea8cf73 100644
--- a/src/Microsoft.ML.Data/DataLoadSave/TrivialReaderEstimator.cs
+++ b/src/Microsoft.ML.Data/DataLoadSave/TrivialReaderEstimator.cs
@@ -12,15 +12,15 @@ namespace Microsoft.ML.Runtime.Data
public sealed class TrivialReaderEstimator: IDataReaderEstimator
where TReader: IDataReader
{
- private readonly TReader _reader;
+ public TReader Reader { get; }
public TrivialReaderEstimator(TReader reader)
{
- _reader = reader;
+ Reader = reader;
}
- public TReader Fit(TSource input) => _reader;
+ public TReader Fit(TSource input) => Reader;
- public SchemaShape GetOutputSchema() => SchemaShape.Create(_reader.GetOutputSchema());
+ public SchemaShape GetOutputSchema() => SchemaShape.Create(Reader.GetOutputSchema());
}
}
diff --git a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj
index 8d5b0fd2d0..2a23a7322c 100644
--- a/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj
+++ b/src/Microsoft.ML.Data/Microsoft.ML.Data.csproj
@@ -1,4 +1,4 @@
-
+
netstandard2.0
diff --git a/src/Microsoft.ML.Data/StaticPipe/Attributes.cs b/src/Microsoft.ML.Data/StaticPipe/Attributes.cs
new file mode 100644
index 0000000000..6a02b98d1a
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/Attributes.cs
@@ -0,0 +1,27 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ ///
+ /// An indicator to the analyzer that this type parameter ought to be a valid schema-shape object (e.g., a leaf-tuple, or
+ /// value-tuples of such) as the return type. Note that this attribute is typically only used in situations where a user
+ /// might be essentially declaring that type, as opposed to using an already established shape type. So: a method that merely
+ /// takes an already existing typed instance would tend on the other hand to not use this type parameter. To give an example:
+ ///
+ /// has the parameter on the new output tuple shape.
+ ///
+ /// The cost to not specifying this on such an entry point is that the compile time type-checks on the shape parameters will
+ /// no longer be enforced, which is suboptimal given that the purpose of the statically typed interfaces is to have compile-time
+ /// checks. However, it is not disastrous since the runtime checks will still be in effect.
+ ///
+ /// User code may use this attribute on their types if they have generic type parameters that interface with this library.
+ ///
+ [AttributeUsage(AttributeTargets.GenericParameter)]
+ public sealed class IsShapeAttribute : Attribute
+ {
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/DataReader.cs b/src/Microsoft.ML.Data/StaticPipe/DataReader.cs
new file mode 100644
index 0000000000..3cf9866509
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/DataReader.cs
@@ -0,0 +1,56 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ public sealed class DataReader : SchemaBearing
+ {
+ public IDataReader AsDynamic { get; }
+
+ internal DataReader(IHostEnvironment env, IDataReader reader, StaticSchemaShape shape)
+ : base(env, shape)
+ {
+ Env.AssertValue(reader);
+
+ AsDynamic = reader;
+ Shape.Check(Env, AsDynamic.GetOutputSchema());
+ }
+
+ public DataReaderEstimator> Append(Estimator estimator)
+ where TTrans : class, ITransformer
+ {
+ Contracts.Assert(nameof(Append) == nameof(CompositeReaderEstimator.Append));
+
+ var readerEst = AsDynamic.Append(estimator.AsDynamic);
+ return new DataReaderEstimator>(Env, readerEst, estimator.Shape);
+ }
+
+ public DataReader Append(Transformer transformer)
+ where TTransformer : class, ITransformer
+ {
+ Env.CheckValue(transformer, nameof(transformer));
+ Env.Assert(nameof(Append) == nameof(CompositeReaderEstimator.Append));
+
+ var reader = AsDynamic.Append(transformer.AsDynamic);
+ return new DataReader(Env, reader, transformer.Shape);
+ }
+
+ public DataView Read(TIn input)
+ {
+ // We cannot check the value of input since it may not be a reference type, and it is not clear
+ // that there is an absolute case for insisting that the input type be a reference type, and much
+ // less further that null inputs will never be correct. So we rely on the wrapping object to make
+ // that determination.
+ Env.Assert(nameof(Read) == nameof(IDataReader.Read));
+
+ var data = AsDynamic.Read(input);
+ return new DataView(Env, data, Shape);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/DataReaderEstimator.cs b/src/Microsoft.ML.Data/StaticPipe/DataReaderEstimator.cs
new file mode 100644
index 0000000000..d922c2a677
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/DataReaderEstimator.cs
@@ -0,0 +1,43 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ public sealed class DataReaderEstimator : SchemaBearing
+ where TDataReader : class, IDataReader
+ {
+ public IDataReaderEstimator AsDynamic { get; }
+
+ internal DataReaderEstimator(IHostEnvironment env, IDataReaderEstimator estimator, StaticSchemaShape shape)
+ : base(env, shape)
+ {
+ Env.AssertValue(estimator);
+
+ AsDynamic = estimator;
+ Shape.Check(Env, AsDynamic.GetOutputSchema());
+ }
+
+ public DataReader Fit(TIn input)
+ {
+ Contracts.Assert(nameof(Fit) == nameof(IDataReaderEstimator.Fit));
+
+ var reader = AsDynamic.Fit(input);
+ return new DataReader(Env, reader, Shape);
+ }
+
+ public DataReaderEstimator> Append(Estimator est)
+ where TTrans : class, ITransformer
+ {
+ Contracts.Assert(nameof(Append) == nameof(CompositeReaderEstimator.Append));
+
+ var readerEst = AsDynamic.Append(est.AsDynamic);
+ return new DataReaderEstimator>(Env, readerEst, est.Shape);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/DataView.cs b/src/Microsoft.ML.Data/StaticPipe/DataView.cs
new file mode 100644
index 0000000000..9c2f3f22c5
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/DataView.cs
@@ -0,0 +1,24 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ public class DataView : SchemaBearing
+ {
+ public IDataView AsDynamic { get; }
+
+ internal DataView(IHostEnvironment env, IDataView view, StaticSchemaShape shape)
+ : base(env, shape)
+ {
+ Env.AssertValue(view);
+
+ AsDynamic = view;
+ Shape.Check(Env, AsDynamic.Schema);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/Estimator.cs b/src/Microsoft.ML.Data/StaticPipe/Estimator.cs
new file mode 100644
index 0000000000..28e79712b5
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/Estimator.cs
@@ -0,0 +1,98 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ public sealed class Estimator : SchemaBearing
+ where TTransformer : class, ITransformer
+ {
+ public IEstimator AsDynamic { get; }
+ private readonly StaticSchemaShape _inShape;
+
+ internal Estimator(IHostEnvironment env, IEstimator estimator, StaticSchemaShape inShape, StaticSchemaShape outShape)
+ : base(env, outShape)
+ {
+ Env.CheckValue(estimator, nameof(estimator));
+ AsDynamic = estimator;
+ _inShape = inShape;
+ // Our ability to check estimators at constructor time is somewaht limited. During fit though we could.
+ // Fortunately, estimators are one of the least likely things that users will freqeuently declare the
+ // types of on their own.
+ }
+
+ public Transformer Fit(DataView view)
+ {
+ Contracts.Assert(nameof(Fit) == nameof(IEstimator.Fit));
+ _inShape.Check(Env, view.AsDynamic.Schema);
+
+ var trans = AsDynamic.Fit(view.AsDynamic);
+ return new Transformer(Env, trans, _inShape, Shape);
+ }
+
+ public Estimator Append(Estimator estimator)
+ {
+ Env.CheckValue(estimator, nameof(estimator));
+
+ var est = AsDynamic.Append(estimator.AsDynamic);
+ return new Estimator(Env, est, _inShape, estimator.Shape);
+ }
+
+ public Estimator Append<[IsShape] TTupleNewOutShape>(Func mapper)
+ {
+ Contracts.CheckValue(mapper, nameof(mapper));
+
+ using (var ch = Env.Start(nameof(Append)))
+ {
+ var method = mapper.Method;
+
+ // Construct the dummy column structure, then apply the mapping.
+ var input = StaticPipeInternalUtils.MakeAnalysisInstance(out var fakeReconciler);
+ KeyValuePair[] inPairs = StaticPipeInternalUtils.GetNamesValues(input, method.GetParameters()[0]);
+
+ // Initially we suppose we've only assigned names to the inputs.
+ var inputColToName = new Dictionary();
+ foreach (var p in inPairs)
+ inputColToName[p.Value] = p.Key;
+ string NameMap(PipelineColumn col)
+ {
+ inputColToName.TryGetValue(col, out var val);
+ return val;
+ }
+
+ var readerEst = StaticPipeUtils.GeneralFunctionAnalyzer(Env, ch, input, fakeReconciler, mapper, out var estTail, NameMap);
+ ch.Assert(readerEst == null);
+ ch.AssertValue(estTail);
+
+ var est = AsDynamic.Append(estTail);
+ var newOut = StaticSchemaShape.Make(method.ReturnParameter);
+ var toReturn = new Estimator(Env, est, _inShape, newOut);
+ ch.Done();
+ return toReturn;
+ }
+ }
+ }
+
+ public static class Estimator
+ {
+ ///
+ /// Create an object that can be used as the start of a new pipeline, that assumes it uses
+ /// something with the sahape of as its input schema shape.
+ /// The returned object is an empty estimator.
+ ///
+ /// Creates a new empty head of a pipeline
+ /// The empty esitmator, to which new items may be appended to create a pipeline
+ public static Estimator MakeNew(SchemaBearing fromSchema)
+ {
+ Contracts.CheckValue(fromSchema, nameof(fromSchema));
+ return fromSchema.MakeNewEstimator();
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/PipelineColumn.cs b/src/Microsoft.ML.Data/StaticPipe/PipelineColumn.cs
new file mode 100644
index 0000000000..dd8ace66b3
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/PipelineColumn.cs
@@ -0,0 +1,144 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe.Runtime
+{
+ ///
+ /// This class is used as a type marker for producing structures for use in the statically
+ /// typed columnate pipeline building helper API. Users will not create these structures directly. Rather components
+ /// will implement (hidden) subclasses of one of this classes subclasses (e.g., ,
+ /// ), which will contain information that the builder API can use to construct an actual
+ /// sequence of objects.
+ ///
+ public abstract class PipelineColumn
+ {
+ internal readonly Reconciler ReconcilerObj;
+ internal readonly PipelineColumn[] Dependencies;
+
+ private protected PipelineColumn(Reconciler reconciler, PipelineColumn[] dependencies)
+ {
+ Contracts.CheckValue(reconciler, nameof(reconciler));
+ Contracts.CheckValueOrNull(dependencies);
+
+ ReconcilerObj = reconciler;
+ Dependencies = dependencies;
+ }
+ }
+
+ ///
+ /// For representing a non-key, non-vector .
+ ///
+ ///
+ public abstract class Scalar : PipelineColumn
+ {
+ protected Scalar(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(Scalar)}<{typeof(T).Name}>";
+ }
+
+ ///
+ /// For representing a of known length.
+ ///
+ /// The vector item type.
+ public abstract class Vector : PipelineColumn
+ {
+ protected Vector(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(Vector)}<{typeof(T).Name}>";
+ }
+
+ ///
+ /// For representing a that is normalized, that is, its
+ /// value is set with the value true.
+ ///
+ /// The vector item type.
+ public abstract class NormVector : Vector
+ {
+ protected NormVector(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(NormVector)}<{typeof(T).Name}>";
+ }
+
+ ///
+ /// For representing a of unknown length.
+ ///
+ /// The vector item type.
+ public abstract class VarVector : PipelineColumn
+ {
+ protected VarVector(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(VarVector)}<{typeof(T).Name}>";
+ }
+
+ ///
+ /// For representing a of known cardinality, where the type of key is not specified.
+ ///
+ /// The physical type representing the key, which should always be one of ,
+ /// , , or
+ /// Note that a vector of keys type we would represent as with a
+ /// type parameter. Note also, if the type of the key is known then that should be represented
+ /// by .
+ public abstract class Key : PipelineColumn
+ {
+ protected Key(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(Key)}<{typeof(T).Name}>";
+ }
+
+ ///
+ /// For representing a key-type of known cardinality that has key values over a particular type. This is used to
+ /// represent a where it is known that it will have of a particular type .
+ ///
+ /// The physical type representing the key, which should always be one of ,
+ /// , , or
+ /// The type of values the key-type is enumerating. Commonly this is but
+ /// this is not necessary
+ public abstract class Key : Key
+ {
+ protected Key(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(Key)}<{typeof(T).Name}, {typeof(TVal).Name}>";
+ }
+
+ ///
+ /// For representing a of unknown cardinality.
+ ///
+ /// The physical type representing the key, which should always be one of ,
+ /// , , or
+ /// Note that unlike the and duality, there is no
+ /// type corresponding to this type but with key-values, since key-values are necessarily a vector of known
+ /// size so any enumeration into that set would itself be a key-value of unknown cardinality.
+ public abstract class VarKey : PipelineColumn
+ {
+ protected VarKey(Reconciler reconciler, params PipelineColumn[] dependencies)
+ : base(reconciler, dependencies)
+ {
+ }
+
+ public override string ToString() => $"{nameof(VarKey)}<{typeof(T).Name}>";
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/Reconciler.cs b/src/Microsoft.ML.Data/StaticPipe/Reconciler.cs
new file mode 100644
index 0000000000..dac94f7a01
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/Reconciler.cs
@@ -0,0 +1,78 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe.Runtime
+{
+ ///
+ /// An object for instances to indicate to the analysis code for static pipelines that
+ /// they should be considered a single group of columns (through equality on the reconcilers), as well as how to
+ /// actually create the underlying dynamic structures, whether an
+ /// (for the ) or a
+ /// (for the ).
+ ///
+ public abstract class Reconciler
+ {
+ private protected Reconciler() { }
+ }
+
+ ///
+ /// Reconciler for column groups intended to resolve to a new
+ /// or .
+ ///
+ /// The input type of the
+ /// object.
+ public abstract class ReaderReconciler : Reconciler
+ {
+ public ReaderReconciler() : base() { }
+
+ ///
+ /// Returns a data-reader estimator. Note that there are no input names because the columns from a data-reader
+ /// estimator should have no dependencies.
+ ///
+ /// The host environment to use to create the data-reader estimator
+ /// The columns that the object created by the reconciler should output
+ /// A map containing
+ ///
+ public abstract IDataReaderEstimator> Reconcile(
+ IHostEnvironment env, PipelineColumn[] toOutput, IReadOnlyDictionary outputNames);
+ }
+
+ ///
+ /// Reconciler for column groups intended to resolve to an . This type of
+ /// reconciler will work with
+ /// or other methods that involve the creation of estimator chains.
+ ///
+ public abstract class EstimatorReconciler : Reconciler
+ {
+ public EstimatorReconciler() : base() { }
+
+ ///
+ /// Returns an estimator.
+ ///
+ /// The host environment to use to create the estimator
+ /// The columns that the object created by the reconciler should output
+ /// The name mapping that maps dependencies of the output columns to their names
+ /// The name mapping that maps the output column to their names
+ /// While most estimators allow full control over the names of their outputs, a limited
+ /// subset of estimator transforms do not allow this: they produce columns whose names are unconfigurable. For
+ /// these, there is this collection which provides the names used by the analysis tool. If the estimator under
+ /// construction must use one of the names here, then they are responsible for "saving" the column they will
+ /// overwrite using applications of the . Note that if the estimator under
+ /// construction has complete control over what columns it produces, there is no need for it to pay this argument
+ /// any attention.
+ /// Returns an estimator.
+ public abstract IEstimator Reconcile(
+ IHostEnvironment env,
+ PipelineColumn[] toOutput,
+ IReadOnlyDictionary inputNames,
+ IReadOnlyDictionary outputNames,
+ IReadOnlyCollection usedNames);
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/SchemaAssertionContext.cs b/src/Microsoft.ML.Data/StaticPipe/SchemaAssertionContext.cs
new file mode 100644
index 0000000000..5fb6babbec
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/SchemaAssertionContext.cs
@@ -0,0 +1,215 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe.Runtime
+{
+ ///
+ /// An object for declaring a schema-shape. This is mostly commonly used in situations where a user is
+ /// asserting that a dynamic object bears a certain specific static schema. For example: when phrasing
+ /// the dynamically typed as being a specific .
+ /// It is never created by the user directly, but instead an instance is typically fed in as an argument
+ /// to a delegate, and the user will call methods on this context to indicate a certain type is so.
+ ///
+ ///
+ /// All objects are, deliberately, imperitavely useless as they are
+ /// intended to be used only in a declarative fashion. The methods and properties of this class go one step
+ /// further and return null for everything with a return type of .
+ ///
+ /// Because 's type system is extensible, assemblies that declare their own types
+ /// should allow users to assert typedness in their types by defining extension methods over this class.
+ /// However, even failing the provision of such a helper, a user can still provide a workaround by just
+ /// declaring the type as something like default(Scalar<TheCustomType>, without using the
+ /// instance of this context.
+ ///
+ public sealed class SchemaAssertionContext
+ {
+ // Hiding all these behind empty-structures is a bit of a cheap trick, but probably works
+ // pretty well considering that the alternative is a bunch of tiny objects allocated on the
+ // stack. Plus, the default value winds up working for them. We can also exploit the `ref struct`
+ // property of these things to make sure people don't make the mistake of assigning them as the
+ // values.
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions I1 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions I2 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions I4 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions I8 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions U1 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions U2 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions U4 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions U8 => default;
+
+ /// Assertions over a column of .
+ public NormalizableTypeAssertions R4 => default;
+
+ /// Assertions over a column of .
+ public NormalizableTypeAssertions R8 => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions Text => default;
+
+ /// Assertions over a column of .
+ public PrimitiveTypeAssertions Bool => default;
+
+ /// Assertions over a column of with .
+ public KeyTypeSelectorAssertions KeyU1 => default;
+ /// Assertions over a column of with .
+ public KeyTypeSelectorAssertions KeyU2 => default;
+ /// Assertions over a column of with .
+ public KeyTypeSelectorAssertions KeyU4 => default;
+ /// Assertions over a column of with .
+ public KeyTypeSelectorAssertions KeyU8 => default;
+
+ internal static SchemaAssertionContext Inst = new SchemaAssertionContext();
+
+ private SchemaAssertionContext() { }
+
+ // Until we have some transforms that use them, we might not expect to see too much interest in asserting
+ // the time relevant datatypes.
+
+ ///
+ /// Holds assertions relating to the basic primitive types.
+ ///
+ public ref struct PrimitiveTypeAssertions
+ {
+ private PrimitiveTypeAssertions(int i) { }
+
+ ///
+ /// Asserts a type that is directly this .
+ ///
+ public Scalar Scalar => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public Vector Vector => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public VarVector VarVector => null;
+ }
+
+ public ref struct NormalizableTypeAssertions
+ {
+ private NormalizableTypeAssertions(int i) { }
+
+ ///
+ /// Asserts a type that is directly this .
+ ///
+ public Scalar Scalar => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public Vector Vector => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public VarVector VarVector => null;
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true, and the
+ /// metadata is defined with a Boolean true value.
+ ///
+ public NormVector NormVector => null;
+ }
+
+ ///
+ /// Once a single general key type has been selected, we can select its vector-ness.
+ ///
+ /// The static type corresponding to a .
+ public ref struct KeyTypeVectorAssertions
+ where T : class
+ {
+ private KeyTypeVectorAssertions(int i) { }
+
+ ///
+ /// Asserts a type that is directly this .
+ ///
+ public T Scalar => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public Vector Vector => null;
+
+ ///
+ /// Asserts a type corresponding to a of this ,
+ /// where is true.
+ ///
+ public VarVector VarVector => null;
+ }
+
+ ///
+ /// Assertions for key types of various forms. Used to select a particular .
+ ///
+ ///
+ public ref struct KeyTypeSelectorAssertions
+ {
+ private KeyTypeSelectorAssertions(int i) { }
+
+ ///
+ /// Asserts a type corresponding to a where is positive, that is, is of known cardinality,
+ /// but that we are not asserting has any particular type of metadata.
+ ///
+ public KeyTypeVectorAssertions> NoValue => default;
+
+ ///
+ /// Asserts a type corresponding to a where is zero, that is, is of unknown cardinality.
+ ///
+ public KeyTypeVectorAssertions> UnknownCardinality => default;
+
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> I1Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> I2Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> I4Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> I8Values => default;
+
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> U1Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> U2Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> U4Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> U8Values => default;
+
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> R4Values => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> R8Values => default;
+
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> TextValues => default;
+ /// Asserts a of known cardinality with a vector of metadata.
+ public KeyTypeVectorAssertions> BoolValues => default;
+ }
+ }
+}
\ No newline at end of file
diff --git a/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs b/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs
new file mode 100644
index 0000000000..7413ab4764
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/SchemaBearing.cs
@@ -0,0 +1,50 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Data;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ ///
+ /// A base class for the statically-typed pipeline components, that are marked as producing
+ /// data whose schema has a certain shape.
+ ///
+ ///
+ public abstract class SchemaBearing
+ {
+ private protected readonly IHostEnvironment Env;
+ internal readonly StaticSchemaShape Shape;
+
+ ///
+ /// Constructor for a block maker.
+ ///
+ /// The host environment, stored with this object
+ /// The item holding the name and types as enumerated within
+ ///
+ private protected SchemaBearing(IHostEnvironment env, StaticSchemaShape shape)
+ {
+ Contracts.AssertValue(env);
+ env.AssertValue(shape);
+
+ Env = env;
+ Shape = shape;
+ }
+
+ ///
+ /// Create an object that can be used as the start of a new pipeline, that assumes it uses
+ /// something with the sahape of as its input schema shape.
+ /// The returned object is an empty estimator.
+ ///
+ internal Estimator MakeNewEstimator()
+ {
+ var est = new EstimatorChain();
+ return new Estimator(Env, est, Shape, Shape);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticPipeExtensions.cs b/src/Microsoft.ML.Data/StaticPipe/StaticPipeExtensions.cs
new file mode 100644
index 0000000000..d14e0f857a
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/StaticPipeExtensions.cs
@@ -0,0 +1,98 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using Microsoft.ML.Runtime.Data;
+using System;
+using Microsoft.ML.Data.StaticPipe.Runtime;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Core.Data;
+
+namespace Microsoft.ML.Data.StaticPipe
+{
+ public static class StaticPipeExtensions
+ {
+ ///
+ /// Asserts that a given data view has the indicated schema. If this method returns without
+ /// throwing then the view has been validated to have columns with the indicated names and types.
+ ///
+ /// The type representing the view's schema shape
+ /// The view to assert the static schema on
+ /// The host environment to keep in the statically typed variant
+ /// The delegate through which we declare the schema, which ought to
+ /// use the input to declare a
+ /// of the indices, properly named
+ /// A statically typed wrapping of the input view
+ public static DataView AssertStatic<[IsShape] T>(this IDataView view, IHostEnvironment env,
+ Func outputDecl)
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(view, nameof(view));
+ env.CheckValue(outputDecl, nameof(outputDecl));
+
+ // We don't actually need to call the method, it's just there to give the declaration.
+#if DEBUG
+ outputDecl(SchemaAssertionContext.Inst);
+#endif
+
+ var schema = StaticSchemaShape.Make(outputDecl.Method.ReturnParameter);
+ return new DataView(env, view, schema);
+ }
+
+ public static DataReader AssertStatic(this IDataReader reader, IHostEnvironment env,
+ Func outputDecl)
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(reader, nameof(reader));
+ env.CheckValue(outputDecl, nameof(outputDecl));
+
+ var schema = StaticSchemaShape.Make(outputDecl.Method.ReturnParameter);
+ return new DataReader(env, reader, schema);
+ }
+
+ public static DataReaderEstimator AssertStatic(
+ this IDataReaderEstimator readerEstimator, IHostEnvironment env,
+ Func outputDecl)
+ where TReader : class, IDataReader
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(readerEstimator, nameof(readerEstimator));
+ env.CheckValue(outputDecl, nameof(outputDecl));
+
+ var schema = StaticSchemaShape.Make(outputDecl.Method.ReturnParameter);
+ return new DataReaderEstimator(env, readerEstimator, schema);
+ }
+
+ public static Transformer AssertStatic<[IsShape] TIn, [IsShape] TOut, TTrans>(
+ this TTrans transformer, IHostEnvironment env,
+ Func inputDecl,
+ Func outputDecl)
+ where TTrans : class, ITransformer
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(transformer, nameof(transformer));
+ env.CheckValue(inputDecl, nameof(inputDecl));
+ env.CheckValue(outputDecl, nameof(outputDecl));
+
+ var inSchema = StaticSchemaShape.Make(inputDecl.Method.ReturnParameter);
+ var outSchema = StaticSchemaShape.Make(outputDecl.Method.ReturnParameter);
+ return new Transformer(env, transformer, inSchema, outSchema);
+ }
+
+ public static Estimator AssertStatic<[IsShape] TIn, [IsShape] TOut, TTrans>(
+ this IEstimator estimator, IHostEnvironment env,
+ Func inputDecl,
+ Func outputDecl)
+ where TTrans : class, ITransformer
+ {
+ Contracts.CheckValue(env, nameof(env));
+ env.CheckValue(estimator, nameof(estimator));
+ env.CheckValue(inputDecl, nameof(inputDecl));
+ env.CheckValue(outputDecl, nameof(outputDecl));
+
+ var inSchema = StaticSchemaShape.Make(inputDecl.Method.ReturnParameter);
+ var outSchema = StaticSchemaShape.Make(outputDecl.Method.ReturnParameter);
+ return new Estimator(env, estimator, inSchema, outSchema);
+ }
+ }
+}
diff --git a/src/Microsoft.ML.Data/StaticPipe/StaticPipeInternalUtils.cs b/src/Microsoft.ML.Data/StaticPipe/StaticPipeInternalUtils.cs
new file mode 100644
index 0000000000..54119f0d0a
--- /dev/null
+++ b/src/Microsoft.ML.Data/StaticPipe/StaticPipeInternalUtils.cs
@@ -0,0 +1,486 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using Microsoft.ML.Core.Data;
+using Microsoft.ML.Runtime;
+using Microsoft.ML.Runtime.Internal.Utilities;
+
+namespace Microsoft.ML.Data.StaticPipe.Runtime
+{
+ ///
+ /// Utility functions useful for the internal implementations of the key pipeline utilities.
+ ///
+ internal static class StaticPipeInternalUtils
+ {
+ ///
+ /// Given a type which is a tree with leaves, return an instance of that
+ /// type which has appropriate instances of that use the returned reconciler.
+ ///
+ /// This is a data-reconciler that always reconciles to a null object
+ /// A type of either or one of the major subclasses
+ /// (e.g., , , etc.)
+ /// An instance of where all fields have the provided reconciler
+ public static T MakeAnalysisInstance(out ReaderReconciler fakeReconciler)
+ {
+ var rec = new AnalyzeUtil.Rec();
+ fakeReconciler = rec;
+ return (T)AnalyzeUtil.MakeAnalysisInstanceCore(rec);
+ }
+
+ private static class AnalyzeUtil
+ {
+ public sealed class Rec : ReaderReconciler
+ {
+ public Rec() : base() { }
+
+ public override IDataReaderEstimator