-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Remove ISchema in TextLoader.cs and TextLoaderCursor.cs #2140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -520,26 +520,25 @@ public static ColInfo Create(string name, PrimitiveType itemType, Segment[] segs | |
} | ||
} | ||
|
||
private sealed class Bindings : ISchema | ||
private sealed class Bindings | ||
{ | ||
/// <summary> | ||
/// <see cref="Infos"/>[i] stores the i-th column's name type loaded from the input text file. | ||
/// </summary> | ||
public readonly ColInfo[] Infos; | ||
public readonly Dictionary<string, int> NameToInfoIndex; | ||
/// <summary> | ||
/// <see cref="Infos"/>[i] stores the i-th column's metadata, slot names. | ||
/// </summary> | ||
private readonly VBuffer<ReadOnlyMemory<char>>[] _slotNames; | ||
// Empty iff either header+ not set in args, or if no header present, or upon load | ||
// there was no header stored in the model. | ||
/// <summary> | ||
/// Empty iff either header+ not set in args, or if no header present, or upon load | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I might prefer something like an actual reference using a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now we have
In reply to: 247682367 [](ancestors = 247682367) |
||
/// there was no header stored in the model. | ||
/// </summary> | ||
private readonly ReadOnlyMemory<char> _header; | ||
|
||
private readonly MetadataUtils.MetadataGetter<VBuffer<ReadOnlyMemory<char>>> _getSlotNames; | ||
|
||
public Schema AsSchema { get; } | ||
|
||
private Bindings() | ||
{ | ||
_getSlotNames = GetSlotNames; | ||
} | ||
public Schema OutputSchema { get; } | ||
|
||
public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, IMultiStreamSource dataSample) | ||
: this() | ||
{ | ||
Contracts.AssertNonEmpty(cols); | ||
Contracts.AssertValueOrNull(headerFile); | ||
|
@@ -590,14 +589,17 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, | |
int isegOther = -1; | ||
|
||
Infos = new ColInfo[cols.Length]; | ||
NameToInfoIndex = new Dictionary<string, int>(Infos.Length); | ||
|
||
// This dictionary is used only for detecting duplicated column names specified by user. | ||
var nameToInfoIndex = new Dictionary<string, int>(Infos.Length); | ||
|
||
for (int iinfo = 0; iinfo < Infos.Length; iinfo++) | ||
{ | ||
var col = cols[iinfo]; | ||
|
||
ch.CheckNonWhiteSpace(col.Name, nameof(col.Name)); | ||
string name = col.Name.Trim(); | ||
if (iinfo == NameToInfoIndex.Count && NameToInfoIndex.ContainsKey(name)) | ||
if (iinfo == nameToInfoIndex.Count && nameToInfoIndex.ContainsKey(name)) | ||
ch.Info("Duplicate name(s) specified - later columns will hide earlier ones"); | ||
|
||
PrimitiveType itemType; | ||
|
@@ -669,7 +671,7 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, | |
if (iinfoOther != iinfo) | ||
Infos[iinfo] = ColInfo.Create(name, itemType, segs, true); | ||
|
||
NameToInfoIndex[name] = iinfo; | ||
nameToInfoIndex[name] = iinfo; | ||
} | ||
|
||
// Note that segsOther[isegOther] is not a real segment to be included. | ||
|
@@ -734,11 +736,10 @@ public Bindings(TextLoader parent, Column[] cols, IMultiStreamSource headerFile, | |
if (!_header.IsEmpty) | ||
Parser.ParseSlotNames(parent, _header, Infos, _slotNames); | ||
} | ||
AsSchema = Schema.Create(this); | ||
OutputSchema = ComputeOutputSchema(); | ||
} | ||
|
||
public Bindings(ModelLoadContext ctx, TextLoader parent) | ||
: this() | ||
{ | ||
Contracts.AssertValue(ctx); | ||
|
||
|
@@ -760,7 +761,9 @@ public Bindings(ModelLoadContext ctx, TextLoader parent) | |
int cinfo = ctx.Reader.ReadInt32(); | ||
Contracts.CheckDecode(cinfo > 0); | ||
Infos = new ColInfo[cinfo]; | ||
NameToInfoIndex = new Dictionary<string, int>(Infos.Length); | ||
|
||
// This dictionary is used only for detecting duplicated column names specified by user. | ||
var nameToInfoIndex = new Dictionary<string, int>(Infos.Length); | ||
|
||
for (int iinfo = 0; iinfo < cinfo; iinfo++) | ||
{ | ||
|
@@ -808,7 +811,7 @@ public Bindings(ModelLoadContext ctx, TextLoader parent) | |
// of multiple variable segments (since those segments will overlap and overlapping | ||
// segments are illegal). | ||
Infos[iinfo] = ColInfo.Create(name, itemType, segs, false); | ||
NameToInfoIndex[name] = iinfo; | ||
nameToInfoIndex[name] = iinfo; | ||
} | ||
|
||
_slotNames = new VBuffer<ReadOnlyMemory<char>>[Infos.Length]; | ||
|
@@ -818,7 +821,7 @@ public Bindings(ModelLoadContext ctx, TextLoader parent) | |
if (!string.IsNullOrEmpty(result)) | ||
Parser.ParseSlotNames(parent, _header = result.AsMemory(), Infos, _slotNames); | ||
|
||
AsSchema = Schema.Create(this); | ||
OutputSchema = ComputeOutputSchema(); | ||
} | ||
|
||
public void Save(ModelSaveContext ctx) | ||
|
@@ -869,86 +872,29 @@ public void Save(ModelSaveContext ctx) | |
ctx.SaveTextStream("Header.txt", writer => writer.WriteLine(_header.ToString())); | ||
} | ||
|
||
public int ColumnCount | ||
{ | ||
get { return Infos.Length; } | ||
} | ||
|
||
public bool TryGetColumnIndex(string name, out int col) | ||
{ | ||
Contracts.CheckValueOrNull(name); | ||
return NameToInfoIndex.TryGetValue(name, out col); | ||
} | ||
|
||
public string GetColumnName(int col) | ||
{ | ||
Contracts.CheckParam(0 <= col && col < Infos.Length, nameof(col)); | ||
return Infos[col].Name; | ||
} | ||
|
||
public ColumnType GetColumnType(int col) | ||
{ | ||
Contracts.CheckParam(0 <= col && col < Infos.Length, nameof(col)); | ||
return Infos[col].ColType; | ||
} | ||
|
||
public IEnumerable<KeyValuePair<string, ColumnType>> GetMetadataTypes(int col) | ||
{ | ||
Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); | ||
|
||
var names = _slotNames[col]; | ||
if (names.Length > 0) | ||
{ | ||
Contracts.Assert(Infos[col].ColType.VectorSize == names.Length); | ||
yield return MetadataUtils.GetSlotNamesPair(names.Length); | ||
} | ||
} | ||
|
||
public ColumnType GetMetadataTypeOrNull(string kind, int col) | ||
{ | ||
Contracts.CheckNonEmpty(kind, nameof(kind)); | ||
Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); | ||
|
||
switch (kind) | ||
{ | ||
case MetadataUtils.Kinds.SlotNames: | ||
var names = _slotNames[col]; | ||
if (names.Length == 0) | ||
return null; | ||
Contracts.Assert(Infos[col].ColType.VectorSize == names.Length); | ||
return MetadataUtils.GetNamesType(names.Length); | ||
|
||
default: | ||
return null; | ||
} | ||
} | ||
|
||
public void GetMetadata<TValue>(string kind, int col, ref TValue value) | ||
private Schema ComputeOutputSchema() | ||
{ | ||
Contracts.CheckNonEmpty(kind, nameof(kind)); | ||
Contracts.CheckParam(0 <= col && col < ColumnCount, nameof(col)); | ||
var schemaBuilder = new SchemaBuilder(); | ||
|
||
switch (kind) | ||
// Iterate through all loaded columns. The index i indicates the i-th column loaded. | ||
for (int i = 0; i < Infos.Length; ++i) | ||
{ | ||
case MetadataUtils.Kinds.SlotNames: | ||
_getSlotNames.Marshal(col, ref value); | ||
return; | ||
|
||
default: | ||
throw MetadataUtils.ExceptGetMetadata(); | ||
var info = Infos[i]; | ||
// Retrieve the only possible metadata of this class. | ||
var names = _slotNames[i]; | ||
if (names.Length > 0) | ||
{ | ||
// Slot names presents! Let's add them. | ||
var metadataBuilder = new MetadataBuilder(); | ||
metadataBuilder.AddSlotNames(names.Length, (ref VBuffer<ReadOnlyMemory<char>> value) => names.CopyTo(ref value)); | ||
schemaBuilder.AddColumn(info.Name, info.ColType, metadataBuilder.GetMetadata()); | ||
} | ||
else | ||
// Slot names is empty. | ||
schemaBuilder.AddColumn(info.Name, info.ColType); | ||
} | ||
} | ||
|
||
private void GetSlotNames(int col, ref VBuffer<ReadOnlyMemory<char>> dst) | ||
{ | ||
Contracts.Assert(0 <= col && col < ColumnCount); | ||
|
||
var names = _slotNames[col]; | ||
if (names.Length == 0) | ||
throw MetadataUtils.ExceptGetMetadata(); | ||
|
||
Contracts.Assert(Infos[col].ColType.VectorSize == names.Length); | ||
names.CopyTo(ref dst); | ||
return schemaBuilder.GetSchema(); | ||
} | ||
} | ||
|
||
|
@@ -1355,7 +1301,7 @@ public void Save(ModelSaveContext ctx) | |
_bindings.Save(ctx); | ||
} | ||
|
||
public Schema GetOutputSchema() => _bindings.AsSchema; | ||
public Schema GetOutputSchema() => _bindings.OutputSchema; | ||
|
||
public IDataView Read(IMultiStreamSource source) => new BoundLoader(this, source); | ||
|
||
|
@@ -1455,21 +1401,21 @@ public BoundLoader(TextLoader reader, IMultiStreamSource files) | |
// REVIEW: Should we try to support shuffling? | ||
public bool CanShuffle => false; | ||
|
||
public Schema Schema => _reader._bindings.AsSchema; | ||
public Schema Schema => _reader._bindings.OutputSchema; | ||
|
||
public RowCursor GetRowCursor(Func<int, bool> predicate, Random rand = null) | ||
{ | ||
_host.CheckValue(predicate, nameof(predicate)); | ||
_host.CheckValueOrNull(rand); | ||
var active = Utils.BuildArray(_reader._bindings.ColumnCount, predicate); | ||
var active = Utils.BuildArray(_reader._bindings.OutputSchema.Count, predicate); | ||
return Cursor.Create(_reader, _files, active); | ||
} | ||
|
||
public RowCursor[] GetRowCursorSet(Func<int, bool> predicate, int n, Random rand = null) | ||
{ | ||
_host.CheckValue(predicate, nameof(predicate)); | ||
_host.CheckValueOrNull(rand); | ||
var active = Utils.BuildArray(_reader._bindings.ColumnCount, predicate); | ||
var active = Utils.BuildArray(_reader._bindings.OutputSchema.Count, predicate); | ||
return Cursor.CreateSet(_reader, _files, active, n); | ||
} | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not a big deal perhaps since this is not a public comment, but what is a "name type"? #Resolved
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
name type
is changed toname and type
. Thanks.In reply to: 247681984 [](ancestors = 247681984)