Skip to content

Commit 24b59e6

Browse files
committed
Remove IRowCursorConsolidator.
1 parent 41d3196 commit 24b59e6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+252
-372
lines changed

src/Microsoft.ML.Core/Data/IDataView.cs

Lines changed: 42 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ internal interface ISchema
6363

6464
/// <summary>
6565
/// The input and output of Query Operators (Transforms). This is the fundamental data pipeline
66-
/// type, comparable to IEnumerable for LINQ.
66+
/// type, comparable to <see cref="IEnumerable{T}"/> for LINQ.
6767
/// </summary>
6868
public interface IDataView
6969
{
@@ -92,7 +92,7 @@ public interface IDataView
9292
RowCursor GetRowCursor(Func<int, bool> needCol, Random rand = null);
9393

9494
/// <summary>
95-
/// This constructs a set of parallel batch cursors. The value n is a recommended limit
95+
/// This constructs a set of parallel batch cursors. The value <paramref name="n"/> is a recommended limit
9696
/// on cardinality. If <paramref name="n"/> is non-positive, this indicates that the caller
9797
/// has no recommendation, and the implementation should have some default behavior to cover
9898
/// this case. Note that this is strictly a recommendation: it is entirely possible that
@@ -104,16 +104,18 @@ public interface IDataView
104104
/// but all rows should be returned by exactly one of the cursors returned from this cursor.
105105
/// The cursors can have their values reconciled downstream through the use of the
106106
/// <see cref="Row.Batch"/> property.
107+
///
108+
/// The typical usage pattern is that a set of cursors is requested, each of them is then
109+
/// given to a set of working threads that consume from them independently while, ultimately,
110+
/// the results are finally collated in the end by exploiting the ordering of the <see cref="Row.Batch"/>
111+
/// property described above. More typical scenarios will be content with pulling from the single
112+
/// serial cursor of <see cref="GetRowCursor(Func{int, bool}, Random)"/>.
107113
/// </summary>
108-
/// <param name="consolidator">This is an object that can be used to reconcile the
109-
/// returned array of cursors. When the array of cursors is of length 1, it is legal,
110-
/// indeed expected, that this parameter should be null.</param>
111114
/// <param name="needCol">The predicate, where a column is active if this returns true.</param>
112115
/// <param name="n">The suggested degree of parallelism.</param>
113116
/// <param name="rand">An instance </param>
114117
/// <returns></returns>
115-
RowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator,
116-
Func<int, bool> needCol, int n, Random rand = null);
118+
RowCursor[] GetRowCursorSet(Func<int, bool> needCol, int n, Random rand = null);
117119

118120
/// <summary>
119121
/// Gets an instance of Schema.
@@ -122,20 +124,8 @@ RowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator,
122124
}
123125

124126
/// <summary>
125-
/// This is used to consolidate parallel cursors into a single cursor. The object that determines
126-
/// the number of cursors and splits the row "stream" provides the consolidator object.
127-
/// </summary>
128-
public interface IRowCursorConsolidator
129-
{
130-
/// <summary>
131-
/// Create a consolidated cursor from the given parallel cursor set.
132-
/// </summary>
133-
RowCursor CreateCursor(IChannelProvider provider, RowCursor[] inputs);
134-
}
135-
136-
/// <summary>
137-
/// Delegate type to get a value. This can used for efficient access to data in an IRow
138-
/// or IRowCursor.
127+
/// Delegate type to get a value. This can used for efficient access to data in a <see cref="Row"/>
128+
/// or <see cref="RowCursor"/>.
139129
/// </summary>
140130
public delegate void ValueGetter<TValue>(ref TValue value);
141131

@@ -146,43 +136,50 @@ public interface IRowCursorConsolidator
146136
public abstract class Row : IDisposable
147137
{
148138
/// <summary>
149-
/// This is incremented when the underlying contents changes, giving clients a way to detect change.
150-
/// Generally it's -1 when the object is in an invalid state. In particular, for an <see cref="RowCursor"/>, this is -1
151-
/// when the <see cref="RowCursor.State"/> is <see cref="CursorState.NotStarted"/> or <see cref="CursorState.Done"/>.
139+
/// This is incremented when the underlying contents changes, giving clients a way to detect change. Generally
140+
/// it's -1 when the object is in an invalid state. In particular, for an <see cref="RowCursor"/>, this is -1
141+
/// when the <see cref="RowCursor.State"/> is <see cref="CursorState.NotStarted"/> or <see
142+
/// cref="CursorState.Done"/>.
152143
///
153-
/// Note that this position is not position within the underlying data, but position of this cursor only.
154-
/// If one, for example, opened a set of parallel streaming cursors, or a shuffled cursor, each such cursor's
155-
/// first valid entry would always have position 0.
144+
/// Note that this position is not position within the underlying data, but position of this cursor only. If
145+
/// one, for example, opened a set of parallel streaming cursors, or a shuffled cursor, each such cursor's first
146+
/// valid entry would always have position 0.
156147
/// </summary>
157148
public abstract long Position { get; }
158149

159150
/// <summary>
160-
/// This provides a means for reconciling multiple streams of counted things. Generally, in each stream,
161-
/// batch numbers should be non-decreasing. Furthermore, any given batch number should only appear in one
162-
/// of the streams. Order is determined by batch number. The reconciler ensures that each stream (that is
163-
/// still active) has at least one item available, then takes the item with the smallest batch number.
151+
/// This provides a means for reconciling multiple rows that have been produced generally from
152+
/// <see cref="IDataView.GetRowCursorSet(Func{int, bool}, int, Random)"/>. When getting a set, there is a need
153+
/// to, while allowing parallel processing to proceed, always have an aim thatthe original order should be
154+
/// reconverable. So: for any cursor implementation, batch numbers should be non-decreasing. Furthermore, any
155+
/// given batch number should only appear in one of the cursors as returned by
156+
/// <see cref="IDataView.GetRowCursorSet(Func{int, bool}, int, Random)"/>. In this way, order is determined by
157+
/// batch number. An operation that reconciles these cursors to produce a consistent single cursoring, could do
158+
/// so by drawing from the single cursor, among all cursors in the set, that has the smallest batch number
159+
/// available.
164160
///
165-
/// Note that there is no suggestion that the batches for a particular entry will be consistent from
166-
/// cursoring to cursoring, except for the consistency in resulting in the same overall ordering. The same
167-
/// entry could have different batch numbers from one cursoring to another. There is also no requirement
168-
/// that any given batch number must appear, at all.
161+
/// Note that there is no suggestion that the batches for a particular entry will be consistent from cursoring
162+
/// to cursoring, except for the consistency in resulting in the same overall ordering. The same entry could
163+
/// have different batch numbers from one cursoring to another. There is also no requirement that any given
164+
/// batch number must appear, at all. It is merely a mechanism for recovering ordering from a possibly arbitrary
165+
/// partitioning of the data. It also follows from this, of course, that considering the batch to be a property
166+
/// of the data is completely invalid.
169167
/// </summary>
170168
public abstract long Batch { get; }
171169

172170
/// <summary>
173171
/// A getter for a 128-bit ID value. It is common for objects to serve multiple <see cref="Row"/>
174172
/// instances to iterate over what is supposed to be the same data, for example, in a <see cref="IDataView"/>
175-
/// a cursor set will produce the same data as a serial cursor, just partitioned, and a shuffled cursor
176-
/// will produce the same data as a serial cursor or any other shuffled cursor, only shuffled. The ID
177-
/// exists for applications that need to reconcile which entry is actually which. Ideally this ID should
178-
/// be unique, but for practical reasons, it suffices if collisions are simply extremely improbable.
173+
/// a cursor set will produce the same data as a serial cursor, just partitioned, and a shuffled cursor will
174+
/// produce the same data as a serial cursor or any other shuffled cursor, only shuffled. The ID exists for
175+
/// applications that need to reconcile which entry is actually which. Ideally this ID should be unique, but for
176+
/// practical reasons, it suffices if collisions are simply extremely improbable.
179177
///
180-
/// Note that this ID, while it must be consistent for multiple streams according to the semantics
181-
/// above, is not considered part of the data per se. So, to take the example of a data view specifically,
182-
/// a single data view must render consistent IDs across all cursorings, but there is no suggestion at
183-
/// all that if the "same" data were presented in a different data view (as by, say, being transformed,
184-
/// cached, saved, or whatever), that the IDs between the two different data views would have any
185-
/// discernable relationship.</summary>
178+
/// Note that this ID, while it must be consistent for multiple streams according to the semantics above, is not
179+
/// considered part of the data per se. So, to take the example of a data view specifically, a single data view
180+
/// must render consistent IDs across all cursorings, but there is no suggestion at all that if the "same" data
181+
/// were presented in a different data view (as by, say, being transformed, cached, saved, or whatever), that
182+
/// the IDs between the two different data views would have any discernable relationship.</summary>
186183
public abstract ValueGetter<RowId> GetIdGetter();
187184

188185
/// <summary>

0 commit comments

Comments
 (0)