-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Added convenience constructor for set of transforms (#380). #405
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
bfac765
20d9194
44e8e8d
d3c627e
5a34a16
1566ed0
e9b1023
05637d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -527,6 +527,28 @@ private static VersionInfo GetVersionInfo() | |
|
||
public override ISchema Schema => _bindings; | ||
|
||
public ConcatTransform(IHostEnvironment env, IDataView input, string outputColumn, params string[] inputColumns) | ||
: base(env, RegistrationName, input) | ||
{ | ||
var cols = new Column[1]; | ||
cols[0] = new Column() | ||
{ | ||
Name = outputColumn, | ||
Source = inputColumns | ||
}; | ||
|
||
var args = new Arguments() | ||
{ | ||
Column = cols | ||
}; | ||
Host.CheckValue(args, nameof(args)); | ||
Host.CheckUserArg(Utils.Size(args.Column) > 0, nameof(args.Column)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand why we do it other constructor, because we get them as parameters, and you need to validate them, but here, you create args and it content specifically. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. make sense! #Resolved |
||
for (int i = 0; i < args.Column.Length; i++) | ||
Host.CheckUserArg(Utils.Size(args.Column[i].Source) > 0, nameof(args.Column)); | ||
|
||
_bindings = new Bindings(args.Column, null, Source.Schema); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
what's the point to create args, if you only use it property? #Resolved |
||
} | ||
|
||
/// <summary> | ||
/// Public constructor corresponding to SignatureDataTransform. | ||
/// </summary> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -64,6 +64,32 @@ private static VersionInfo GetVersionInfo() | |
|
||
private const string RegistrationName = "CopyColumns"; | ||
|
||
public static CopyColumnsTransform Create(IHostEnvironment env, IDataView input, params string[] inputColumns) | ||
{ | ||
var inputOutputColumns = new(string inputColumn, string outputColumn)[inputColumns.Length]; | ||
for (int i = 0; i < inputColumns.Length; i++) | ||
{ | ||
inputOutputColumns[i].inputColumn = inputOutputColumns[i].outputColumn = inputColumns[i]; | ||
} | ||
return Create(env, input, inputOutputColumns); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The only practical use of the copy transform is to, effectively, rename columns. Therefore, handling the case where name of the output is identical to that of the input seems to serve no purpose. #Closed |
||
} | ||
|
||
public static CopyColumnsTransform Create(IHostEnvironment env, IDataView input, params (string inputColumn, string outputColumn)[] inputOutputColumns) | ||
{ | ||
Column[] cols = new Column[inputOutputColumns.Length]; | ||
for (int i = 0; i < inputOutputColumns.Length; i++) | ||
{ | ||
cols[i] = new Column(); | ||
cols[i].Source = inputOutputColumns[i].inputColumn; | ||
cols[i].Name = inputOutputColumns[i].outputColumn; | ||
} | ||
var args = new Arguments() | ||
{ | ||
Column = cols | ||
}; | ||
return new CopyColumnsTransform(env,args,input); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Blank lines with trailing whitespace on them is evil. #Resolved |
||
|
||
public CopyColumnsTransform(IHostEnvironment env, Arguments args, IDataView input) | ||
: base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, input, null) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,6 +237,16 @@ private static VersionInfo GetVersionInfo() | |
private const string DropRegistrationName = "DropColumns"; | ||
private const string KeepRegistrationName = "KeepColumns"; | ||
|
||
public DropColumnsTransform CreateColumnDroper(IHostEnvironment env, IDataView input, params string[] columnsToDrop) | ||
{ | ||
return new DropColumnsTransform(env, new Arguments() { Column = columnsToDrop }, input); | ||
} | ||
|
||
public static DropColumnsTransform CreateColumnSelector(IHostEnvironment env, IDataView input, params string[] columnsToKeep) | ||
{ | ||
return new DropColumnsTransform(env, new KeepArguments() { Column = columnsToKeep }, input); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In order to keep columns, we do The fact that we do so in the underlying C# code is just because previously this little bit of confusion didn't matter, since we weren't advocating that people use this directly. However since we now are, we ought to adjust accordingly. For that reason, I wonder if we can make the relationship explicit... public static class KeepColumnsTransform
{
public IDataTransform Create(IHostEnvironment env, IDataView input, params string[] columnsToKeep)
=> new DropColumnsTransform(env, new KeepArguments() { Column = columnsToKeep }, input);
} or something along these lines. Once that ambiguity is resolved, we can change "drop" creator back to a plain old constructor. #Closed |
||
|
||
/// <summary> | ||
/// Public constructor corresponding to SignatureDataTransform. | ||
/// </summary> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -76,6 +76,12 @@ public BootstrapSampleTransform(IHostEnvironment env, Arguments args, IDataView | |
_poolSize = args.PoolSize; | ||
} | ||
|
||
public BootstrapSampleTransform(IHostEnvironment env, IDataView input, bool complement = false, uint? seed = null, bool shuffleInput = true, int poolSize = 1000) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
out of curiosity, why argument object is bad solution? why we want to flatten that object in set of parameters? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so that transform can be neatly instantiated on one line. Otherwise for every transform a relevant "Argument" object is needed to be created first which is also another option. In reply to: 197909492 [](ancestors = 197909492) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can see why it can be helpful in case if you have only Column object inside argument class, you add one more constructor which is nice to use. But in this particular case, i don't see any advantages of using this constructor, and it makes code maintainability much harder. I don't mind to have constructor which will take only Columns information and will use default parameters for internal argument class, but make sure to proper comment which indicates what if user needs more, it need to use Arguments class In reply to: 197912065 [](ancestors = 197912065,197909492) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. but again, feel free to bring heavy artillery (@TomFinley) In reply to: 197921908 [](ancestors = 197921908,197912065,197909492) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this case, definitely constructor can be removed. But I have a concern regarding if user has to instantiate an Another option could be public BootstrapSampleTransform(IHostEnvironment env, IDataView input)
: this(env, new Arguments() { }, input)
{
} Yes, for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TBH I actually rather liked the constructor as it stands -- I do not consider the proposed replacement @Ivanidzo4ka , is the concern with having a convenience constructor with default arguments, that the defaults could diverge? If that is the case, would perhaps prefer constants (either directly as If the point is rather that there should be exactly one way to do this, I'm not entirely certain I agree -- the arguments object may need to exist for the command line parser and entry-points which (being reflection driven) require total uniformity in how things are instantiated, but this bears very little resemblance to how someone, if trying to write a .NET API, would actually write code. If it is a small "shim" as it is now over the arguments, I'm not sure I see the harm, and I see a considerable benefit. #Resolved There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I just don't see much reason to have constructor which duplicate other constructor with one difference: instead of one object we accept all the fields of this object. class(main_option) In reply to: 197983331 [](ancestors = 197983331) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I disagree. Where they can be applied, the new idioms are clearly easier to use and understand. In reply to: 198273557 [](ancestors = 198273557,197983331) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is based on the convenience you get in calling constructor. In my point of view, calling new BootstrapSampleTransform(env, input, Complement, Seed, ShuffleInput, PoolSize); is more convenient and elegant than calling new BootstrapSampleTransform(env, input, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }); In reply to: 198273557 [](ancestors = 198273557,197983331) |
||
: this(env, new Arguments() { Complement = complement, Seed = seed, ShuffleInput = shuffleInput, PoolSize = poolSize }, input) | ||
{ | ||
|
||
} | ||
|
||
private BootstrapSampleTransform(IHost host, ModelLoadContext ctx, IDataView input) | ||
: base(host, input) | ||
{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -237,6 +237,37 @@ private static VersionInfo GetVersionInfo() | |
|
||
private readonly ColInfoEx[] _exes; | ||
|
||
public static IDataTransform CreateGlobalContrastNormalizer(IHostEnvironment env, IDataView input, params string[] inputColumns) | ||
{ | ||
var inputOutputColumns = new(string inputColumn, string outputColumn)[inputColumns.Length]; | ||
for (int i = 0; i < inputColumns.Length; i++) | ||
{ | ||
inputOutputColumns[i].inputColumn = inputOutputColumns[i].outputColumn = inputColumns[i]; | ||
} | ||
return CreateGlobalContrastNormalizer(env, input, inputOutputColumns); | ||
} | ||
|
||
public static IDataTransform CreateGlobalContrastNormalizer(IHostEnvironment env, IDataView input, params (string inputColumn, string outputColumn)[] inputOutputColumns) | ||
{ | ||
GcnColumn[] cols = new GcnColumn[inputOutputColumns.Length]; | ||
for (int i = 0; i < inputOutputColumns.Length; i++) | ||
{ | ||
cols[i] = new GcnColumn(); | ||
cols[i].Source = inputOutputColumns[i].inputColumn; | ||
cols[i].Name = inputOutputColumns[i].outputColumn; | ||
} | ||
var args = new GcnArguments() | ||
{ | ||
Column = cols | ||
}; | ||
return new LpNormNormalizerTransform(env, args, input); | ||
} | ||
|
||
public static IDataTransform CreateGlobalContrastNormalizer(IHostEnvironment env, IDataView input, GcnArguments args) | ||
{ | ||
return new LpNormNormalizerTransform(env, args, input); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not certain I see the utility in this specific constructor. #Closed |
||
|
||
/// <summary> | ||
/// Public constructor corresponding to SignatureDataTransform. | ||
/// </summary> | ||
|
@@ -263,9 +294,40 @@ public LpNormNormalizerTransform(IHostEnvironment env, GcnArguments args, IDataV | |
SetMetadata(); | ||
} | ||
|
||
public static IDataTransform CreateLpNormNormalizer(IHostEnvironment env, IDataView input, params string[] inputColumns) | ||
{ | ||
var inputOutputColumns = new(string inputColumn, string outputColumn)[inputColumns.Length]; | ||
for (int i = 0; i < inputColumns.Length; i++) | ||
{ | ||
inputOutputColumns[i].inputColumn = inputOutputColumns[i].outputColumn = inputColumns[i]; | ||
} | ||
return CreateLpNormNormalizer(env, input, inputOutputColumns); | ||
} | ||
|
||
public static IDataTransform CreateLpNormNormalizer(IHostEnvironment env, IDataView input, params (string inputColumn, string outputColumn)[] inputOutputColumns) | ||
{ | ||
Column[] cols = new Column[inputOutputColumns.Length]; | ||
for (int i = 0; i < inputOutputColumns.Length; i++) | ||
{ | ||
cols[i] = new Column(); | ||
cols[i].Source = inputOutputColumns[i].inputColumn; | ||
cols[i].Name = inputOutputColumns[i].outputColumn; | ||
} | ||
var args = new Arguments() | ||
{ | ||
Column = cols | ||
}; | ||
return new LpNormNormalizerTransform(env, args, input); | ||
} | ||
|
||
public static IDataTransform CreateLpNormNormalizer(IHostEnvironment env, IDataView input, Arguments args) | ||
{ | ||
return new LpNormNormalizerTransform(env, args, input); | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This also seems to be of limited utility. #Closed |
||
|
||
public LpNormNormalizerTransform(IHostEnvironment env, Arguments args, IDataView input) | ||
: base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, | ||
input, TestIsFloatVector) | ||
: base(env, RegistrationName, env.CheckRef(args, nameof(args)).Column, | ||
input, TestIsFloatVector) | ||
{ | ||
Host.AssertNonEmpty(Infos); | ||
Host.Assert(Infos.Length == Utils.Size(args.Column)); | ||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This usage is inconsistent with your usage elsewhere, where the source is first, and the names of the output is second.
I would recommend actually making name of output first, and name of inputs second (as you have done here)... since I think the situation where you have multiple
params
to indicate input columns is more common. Indeed I cannot think of a single case where aparams
describes output columns, but I can think of instances where it describes input columns. #ClosedThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have renamed to "name" and "source" as well to be consistent with their naming in Argument object.
In reply to: 197985474 [](ancestors = 197985474)