Skip to content

Static typed Estimator/Transformer/Data #778

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Sep 4, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion Microsoft.ML.sln
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Microsoft.ML.CpuMath", "Mic
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "tools-local", "tools-local", "{7F13E156-3EBA-4021-84A5-CD56BA72F99E}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer", "tools-local\Microsoft.ML.CodeAnalyzer\Microsoft.ML.CodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.InternalCodeAnalyzer", "tools-local\Microsoft.ML.InternalCodeAnalyzer\Microsoft.ML.InternalCodeAnalyzer.csproj", "{B4E55B2D-2A92-46E7-B72F-E76D6FD83440}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.CodeAnalyzer.Tests", "test\Microsoft.ML.CodeAnalyzer.Tests\Microsoft.ML.CodeAnalyzer.Tests.csproj", "{3E4ABF07-7970-4BE6-B45B-A13D3C397545}"
EndProject
Expand All @@ -111,6 +111,10 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.HalLearners",
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.TensorFlow", "src\Microsoft.ML.TensorFlow\Microsoft.ML.TensorFlow.csproj", "{570A0B8A-5463-44D2-8521-54C0CA4CACA9}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.Analyzer", "src\Microsoft.ML.Analyzer\Microsoft.ML.Analyzer.csproj", "{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}"
EndProject
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Microsoft.ML.StaticPipelineTesting", "test\Microsoft.ML.StaticPipelineTesting\Microsoft.ML.StaticPipelineTesting.csproj", "{8B38BF24-35F4-4787-A9C5-22D35987106E}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand Down Expand Up @@ -399,6 +403,22 @@ Global
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release|Any CPU.Build.0 = Release|Any CPU
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
{570A0B8A-5463-44D2-8521-54C0CA4CACA9}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug|Any CPU.Build.0 = Debug|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release|Any CPU.ActiveCfg = Release|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release|Any CPU.Build.0 = Release|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug|Any CPU.Build.0 = Debug|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug-Intrinsics|Any CPU.ActiveCfg = Debug|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Debug-Intrinsics|Any CPU.Build.0 = Debug|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Release|Any CPU.ActiveCfg = Release|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Release|Any CPU.Build.0 = Release|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Release-Intrinsics|Any CPU.ActiveCfg = Release|Any CPU
{8B38BF24-35F4-4787-A9C5-22D35987106E}.Release-Intrinsics|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down Expand Up @@ -444,6 +464,8 @@ Global
{00E38F77-1E61-4CDF-8F97-1417D4E85053} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{A7222F41-1CF0-47D9-B80C-B4D77B027A61} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{570A0B8A-5463-44D2-8521-54C0CA4CACA9} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{6DEF0F40-3853-47B3-8165-5F24BA5E14DF} = {09EADF06-BE25-4228-AB53-95AE3E15B530}
{8B38BF24-35F4-4787-A9C5-22D35987106E} = {AED9C836-31E3-4F3F-8ABC-929555D3F3C4}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {41165AF1-35BB-4832-A189-73060F82B01D}
Expand Down
4 changes: 4 additions & 0 deletions build/Dependencies.props
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,9 @@
<SystemDrawingCommonPackageVersion>4.5.0</SystemDrawingCommonPackageVersion>
<BenchmarkDotNetVersion>0.11.1</BenchmarkDotNetVersion>
<TensorFlowVersion>1.10.0</TensorFlowVersion>

<MicrosoftCodeAnalysisCSharpVersion>2.9.0</MicrosoftCodeAnalysisCSharpVersion>
<MicrosoftCSharpVersion>4.5.0</MicrosoftCSharpVersion>
<SystemCompositionVersion>1.2.0</SystemCompositionVersion>
</PropertyGroup>
</Project>
2 changes: 1 addition & 1 deletion src/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
<ItemGroup>
<ProjectReference
Condition="'$(UseMLCodeAnalyzer)' != 'false' and '$(MSBuildProjectExtension)' == '.csproj'"
Include="$(MSBuildThisFileDirectory)\..\tools-local\Microsoft.ML.CodeAnalyzer\Microsoft.ML.CodeAnalyzer.csproj">
Include="$(MSBuildThisFileDirectory)\..\tools-local\Microsoft.ML.InternalCodeAnalyzer\Microsoft.ML.InternalCodeAnalyzer.csproj">
<ReferenceOutputAssembly>false</ReferenceOutputAssembly>
<OutputItemType>Analyzer</OutputItemType>
</ProjectReference>
Expand Down
13 changes: 13 additions & 0 deletions src/Microsoft.ML.Analyzer/Microsoft.ML.Analyzer.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In order to get this analyzer in our Microsoft.ML NuGet package, we will have to do some build authoring. Today our infrastructure only supports putting things in the build, lib and runtimes folders. It shouldn't be much work, but someone will have to do it.

Let me know if you want me to pick the NuGet authoring up, or if you want to handle it yourself (I can give you some tips if necessary).

<TargetFramework>netstandard1.3</TargetFramework>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="Microsoft.CodeAnalysis.CSharp.Workspaces" Version="$(MicrosoftCodeAnalysisCSharpVersion)" />
<PackageReference Include="Microsoft.CSharp" Version="$(MicrosoftCSharpVersion)" />
<PackageReference Include="System.Composition" Version="$(SystemCompositionVersion)" />
</ItemGroup>

</Project>
180 changes: 180 additions & 0 deletions src/Microsoft.ML.Analyzer/TypeIsSchemaShapeAnalyzer.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.

using System.Collections.Immutable;
using System.Linq;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.Diagnostics;

namespace Microsoft.ML.Analyzer
{
[DiagnosticAnalyzer(LanguageNames.CSharp)]
public sealed class TypeIsSchemaShapeAnalyzer : DiagnosticAnalyzer
{
internal static class ShapeDiagnostic
{
private const string Category = "Type Check";
public const string Id = "MSML_TypeShouldBeSchemaShape";
private const string Title = "The type is not a schema shape";
private const string Format = "Type{0} is neither a PipelineColumn nor a ValueTuple.";
internal const string Description =
"Within statically typed pipeline elements of ML.NET, the shape of the schema is determined by a type. " +
"A valid type is either an instance of one of the PipelineColumn subclasses (e.g., Scalar<bool> " +
"or something like that), or a ValueTuple containing only valid types. (So, ValueTuples containing " +
"other value tuples are fine, so long as they terminate in a PipelineColumn subclass.)";

internal static DiagnosticDescriptor Rule =
new DiagnosticDescriptor(Id, Title, Format, Category,
DiagnosticSeverity.Error, isEnabledByDefault: true, description: Description);
}

internal static class ShapeParameterDiagnostic
{
private const string Category = "Type Check";
public const string Id = "MSML_TypeParameterShouldBeSchemaShape";
private const string Title = "The type is not a schema shape";
private const string Format = "Type parameter {0} is not marked with [IsShape] or appropriate type constraints.";
internal const string Description = ShapeDiagnostic.Description + " " +
"If using type parameters when interacting with the statically typed pipelines, the type parameter ought to be " +
"constrained in such a way that it, either by applying the [IsShape] attribute or by having type constraints to " +
"indicate that it is valid, e.g., constraining the type to descend from PipelineColumn.";

internal static DiagnosticDescriptor Rule =
new DiagnosticDescriptor(Id, Title, Format, Category,
DiagnosticSeverity.Error, isEnabledByDefault: true, description: Description);
}

private const string AttributeName = "Microsoft.ML.Data.StaticPipe.IsShapeAttribute";
private const string LeafTypeName = "Microsoft.ML.Data.StaticPipe.Runtime.PipelineColumn";

public override ImmutableArray<DiagnosticDescriptor> SupportedDiagnostics =>
ImmutableArray.Create(ShapeDiagnostic.Rule, ShapeParameterDiagnostic.Rule);

public override void Initialize(AnalysisContext context)
{
context.RegisterSemanticModelAction(Analyze);
}

private void Analyze(SemanticModelAnalysisContext context)
{
// We start with the model, then do the the method invocations.
// We could have phrased it as RegisterSyntaxNodeAction(Analyze, SyntaxKind.InvocationExpression),
// but this seemed more inefficient since getting the model and fetching the type symbols every
// single time seems to incur significant cost. The following invocation is somewhat more awkward
// since we must iterate over the invocation syntaxes ourselves, but this seems to be worthwhile.
var model = context.SemanticModel;
var comp = model.Compilation;

// Get the symbols of the key types we are analyzing. If we can't find any of them there is
// no point in going further.
var attrType = comp.GetTypeByMetadataName(AttributeName);
if (attrType == null)
return;
var leafType = comp.GetTypeByMetadataName(LeafTypeName);
if (leafType == null)
return;

// This internal helper method recursively determines whether an attributed type parameter
// has a valid type. It is called externally from the loop over invocations.
bool CheckType(ITypeSymbol type, out string path, out ITypeSymbol problematicType)
{
if (type.TypeKind == TypeKind.TypeParameter)
{
var typeParam = (ITypeParameterSymbol)type;
path = null;
problematicType = null;
// Does the type parameter have the attribute that triggers a check?
if (type.GetAttributes().Any(attr => attr.AttributeClass == attrType))
return true;
// Are any of the declared constraint types OK?
if (typeParam.ConstraintTypes.Any(ct => CheckType(ct, out string ctPath, out var ctProb)))
return true;
// Well, probably not good then. Let's call it a day.
problematicType = typeParam;
return false;
}
else if (type.IsTupleType)
{
INamedTypeSymbol nameType = (INamedTypeSymbol)type;
var tupleElems = nameType.TupleElements;

for (int i = 0; i < tupleElems.Length; ++i)
{
var e = tupleElems[i];
if (!CheckType(e.Type, out string innerPath, out problematicType))
{
path = e.Name ?? $"Item{i + 1}";
if (innerPath != null)
path += "." + innerPath;
return false;
}
}
path = null;
problematicType = null;
return true;
}
else
{
for (var rt = type; rt != null; rt = rt.BaseType)
{
if (rt == leafType)
{
path = null;
problematicType = null;
return true;
}
}
path = null;
problematicType = type;
return false;
}
}

foreach (var invocation in model.SyntaxTree.GetRoot().DescendantNodes().OfType<InvocationExpressionSyntax>())
{
var symbolInfo = model.GetSymbolInfo(invocation);
if (!(symbolInfo.Symbol is IMethodSymbol methodSymbol))
{
// Should we perhaps skip when there is a method resolution failure? This is often but not always a sign of another problem.
if (symbolInfo.CandidateReason != CandidateReason.OverloadResolutionFailure || symbolInfo.CandidateSymbols.Length == 0)
continue;
methodSymbol = symbolInfo.CandidateSymbols[0] as IMethodSymbol;
if (methodSymbol == null)
continue;
}
// Analysis only applies to generic methods.
if (!methodSymbol.IsGenericMethod)
continue;
// Scan the type parameters for one that has our target attribute.
for (int i = 0; i < methodSymbol.TypeParameters.Length; ++i)
{
var par = methodSymbol.TypeParameters[i];
var attr = par.GetAttributes();
if (attr.Length == 0)
continue;
if (!attr.Any(a => a.AttributeClass == attrType))
continue;
// We've found it. Check the type argument to ensure it is of the appropriate type.
var p = methodSymbol.TypeArguments[i];
if (CheckType(p, out string path, out ITypeSymbol problematicType))
continue;

if (problematicType.Kind == SymbolKind.TypeParameter)
{
var diagnostic = Diagnostic.Create(ShapeParameterDiagnostic.Rule, invocation.GetLocation(), problematicType.Name);
context.ReportDiagnostic(diagnostic);
}
else
{
path = path == null ? "" : " of item " + path;
var diagnostic = Diagnostic.Create(ShapeDiagnostic.Rule, invocation.GetLocation(), path);
context.ReportDiagnostic(diagnostic);
}
}
}
}
}
}
2 changes: 1 addition & 1 deletion src/Microsoft.ML.Core/Data/DataKind.cs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ public enum DataKind : byte
Num = R4,

TX = 11,
#pragma warning disable MSML_GeneralName // The data kind enum has its own logic, independnet of C# naming conventions.
#pragma warning disable MSML_GeneralName // The data kind enum has its own logic, independent of C# naming conventions.
TXT = TX,
Text = TX,

Expand Down
5 changes: 2 additions & 3 deletions src/Microsoft.ML.Core/Data/IEstimator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -192,11 +192,10 @@ public interface IDataReader<in TSource>
public interface IDataReaderEstimator<in TSource, out TReader>
where TReader : IDataReader<TSource>
{
// REVIEW: you could consider the transformer to take a different <typeparamref name="TSource"/>, but we don't have such components
// yet, so why complicate matters?
/// <summary>
/// Train and return a data reader.
///
/// REVIEW: you could consider the transformer to take a different <typeparamref name="TSource"/>, but we don't have such components
/// yet, so why complicate matters?
/// </summary>
TReader Fit(TSource input);

Expand Down
18 changes: 18 additions & 0 deletions src/Microsoft.ML.Core/Utilities/Utils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1049,6 +1049,24 @@ public static void MarshalActionInvoke<TArg1>(Action<TArg1> act, Type genArg, TA
meth.Invoke(act.Target, new object[] { arg1 });
}

/// <summary>
/// A two-argument version of <see cref="MarshalActionInvoke(Action, Type)"/>.
/// </summary>
public static void MarshalActionInvoke<TArg1, TArg2>(Action<TArg1, TArg2> act, Type genArg, TArg1 arg1, TArg2 arg2)
{
var meth = MarshalActionInvokeCheckAndCreate(genArg, act);
meth.Invoke(act.Target, new object[] { arg1, arg2 });
}

/// <summary>
/// A three-argument version of <see cref="MarshalActionInvoke(Action, Type)"/>.
/// </summary>
public static void MarshalActionInvoke<TArg1, TArg2, TArg3>(Action<TArg1, TArg2, TArg3> act, Type genArg, TArg1 arg1, TArg2 arg2, TArg3 arg3)
{
var meth = MarshalActionInvokeCheckAndCreate(genArg, act);
meth.Invoke(act.Target, new object[] { arg1, arg2, arg3 });
}

public static string GetDescription(this Enum value)
{
Type type = value.GetType();
Expand Down
Loading