@@ -63,7 +63,7 @@ internal interface ISchema
63
63
64
64
/// <summary>
65
65
/// The input and output of Query Operators (Transforms). This is the fundamental data pipeline
66
- /// type, comparable to IEnumerable for LINQ.
66
+ /// type, comparable to <see cref=" IEnumerable{T}"/> for LINQ.
67
67
/// </summary>
68
68
public interface IDataView
69
69
{
@@ -92,28 +92,28 @@ public interface IDataView
92
92
RowCursor GetRowCursor ( Func < int , bool > needCol , Random rand = null ) ;
93
93
94
94
/// <summary>
95
- /// This constructs a set of parallel batch cursors. The value n is a recommended limit
96
- /// on cardinality. If <paramref name="n"/> is non-positive, this indicates that the caller
97
- /// has no recommendation, and the implementation should have some default behavior to cover
98
- /// this case. Note that this is strictly a recommendation: it is entirely possible that
99
- /// an implementation can return a different number of cursors.
95
+ /// This constructs a set of parallel batch cursors. The value <paramref name="n"/> is a recommended limit on
96
+ /// cardinality. If <paramref name="n"/> is non-positive, this indicates that the caller has no recommendation,
97
+ /// and the implementation should have some default behavior to cover this case. Note that this is strictly a
98
+ /// recommendation: it is entirely possible that an implementation can return a different number of cursors.
100
99
///
101
100
/// The cursors should return the same data as returned through
102
- /// <see cref="GetRowCursor(Func{int, bool}, Random)"/>, except partitioned: no two cursors
103
- /// should return the "same" row as would have been returned through the regular serial cursor,
104
- /// but all rows should be returned by exactly one of the cursors returned from this cursor.
105
- /// The cursors can have their values reconciled downstream through the use of the
106
- /// <see cref="Row.Batch"/> property.
101
+ /// <see cref="GetRowCursor(Func{int, bool}, Random)"/>, except partitioned: no two cursors should return the
102
+ /// "same" row as would have been returned through the regular serial cursor, but all rows should be returned by
103
+ /// exactly one of the cursors returned from this cursor. The cursors can have their values reconciled
104
+ /// downstream through the use of the <see cref="Row.Batch"/> property.
105
+ ///
106
+ /// The typical usage pattern is that a set of cursors is requested, each of them is then given to a set of
107
+ /// working threads that consume from them independently while, ultimately, the results are finally collated in
108
+ /// the end by exploiting the ordering of the <see cref="Row.Batch"/> property described above. More typical
109
+ /// scenarios will be content with pulling from the single serial cursor of
110
+ /// <see cref="GetRowCursor(Func{int, bool}, Random)"/>.
107
111
/// </summary>
108
- /// <param name="consolidator">This is an object that can be used to reconcile the
109
- /// returned array of cursors. When the array of cursors is of length 1, it is legal,
110
- /// indeed expected, that this parameter should be null.</param>
111
112
/// <param name="needCol">The predicate, where a column is active if this returns true.</param>
112
113
/// <param name="n">The suggested degree of parallelism.</param>
113
114
/// <param name="rand">An instance </param>
114
115
/// <returns></returns>
115
- RowCursor [ ] GetRowCursorSet ( out IRowCursorConsolidator consolidator ,
116
- Func < int , bool > needCol , int n , Random rand = null ) ;
116
+ RowCursor [ ] GetRowCursorSet ( Func < int , bool > needCol , int n , Random rand = null ) ;
117
117
118
118
/// <summary>
119
119
/// Gets an instance of Schema.
@@ -122,20 +122,8 @@ RowCursor[] GetRowCursorSet(out IRowCursorConsolidator consolidator,
122
122
}
123
123
124
124
/// <summary>
125
- /// This is used to consolidate parallel cursors into a single cursor. The object that determines
126
- /// the number of cursors and splits the row "stream" provides the consolidator object.
127
- /// </summary>
128
- public interface IRowCursorConsolidator
129
- {
130
- /// <summary>
131
- /// Create a consolidated cursor from the given parallel cursor set.
132
- /// </summary>
133
- RowCursor CreateCursor ( IChannelProvider provider , RowCursor [ ] inputs ) ;
134
- }
135
-
136
- /// <summary>
137
- /// Delegate type to get a value. This can used for efficient access to data in an IRow
138
- /// or IRowCursor.
125
+ /// Delegate type to get a value. This can be used for efficient access to data in a <see cref="Row"/>
126
+ /// or <see cref="RowCursor"/>.
139
127
/// </summary>
140
128
public delegate void ValueGetter < TValue > ( ref TValue value ) ;
141
129
@@ -146,43 +134,54 @@ public interface IRowCursorConsolidator
146
134
public abstract class Row : IDisposable
147
135
{
148
136
/// <summary>
149
- /// This is incremented when the underlying contents changes, giving clients a way to detect change.
150
- /// Generally it's -1 when the object is in an invalid state. In particular, for an <see cref="RowCursor"/>, this is -1
151
- /// when the <see cref="RowCursor.State"/> is <see cref="CursorState.NotStarted"/> or <see cref="CursorState.Done"/>.
137
+ /// This is incremented when the underlying contents changes, giving clients a way to detect change. Generally
138
+ /// it's -1 when the object is in an invalid state. In particular, for an <see cref="RowCursor"/>, this is -1
139
+ /// when the <see cref="RowCursor.State"/> is <see cref="CursorState.NotStarted"/> or <see
140
+ /// cref="CursorState.Done"/>.
152
141
///
153
- /// Note that this position is not position within the underlying data, but position of this cursor only.
154
- /// If one, for example, opened a set of parallel streaming cursors, or a shuffled cursor, each such cursor's
155
- /// first valid entry would always have position 0.
142
+ /// Note that this position is not position within the underlying data, but position of this cursor only. If
143
+ /// one, for example, opened a set of parallel streaming cursors, or a shuffled cursor, each such cursor's first
144
+ /// valid entry would always have position 0.
156
145
/// </summary>
157
146
public abstract long Position { get ; }
158
147
159
148
/// <summary>
160
- /// This provides a means for reconciling multiple streams of counted things. Generally, in each stream,
161
- /// batch numbers should be non-decreasing. Furthermore, any given batch number should only appear in one
162
- /// of the streams. Order is determined by batch number. The reconciler ensures that each stream (that is
163
- /// still active) has at least one item available, then takes the item with the smallest batch number.
149
+ /// This provides a means for reconciling multiple rows that have been produced generally from
150
+ /// <see cref="IDataView.GetRowCursorSet(Func{int, bool}, int, Random)"/>. When getting a set, there is a need
151
+ /// to, while allowing parallel processing to proceed, always have an aim that the original order should be
152
+ /// reconverable. Note, whether or not a user cares about that original order in ones specific application is
153
+ /// another story altogether (most callers of this as a practical matter do not, otherwise they would not call
154
+ /// it), but at least in principle it should be possible to reconstruct the original order one would get from an
155
+ /// identically configured <see cref="IDataView.GetRowCursor(Func{int, bool}, Random)"/>. So: for any cursor
156
+ /// implementation, batch numbers should be non-decreasing. Furthermore, any given batch number should only
157
+ /// appear in one of the cursors as returned by
158
+ /// <see cref="IDataView.GetRowCursorSet(Func{int, bool}, int, Random)"/>. In this way, order is determined by
159
+ /// batch number. An operation that reconciles these cursors to produce a consistent single cursoring, could do
160
+ /// so by drawing from the single cursor, among all cursors in the set, that has the smallest batch number
161
+ /// available.
164
162
///
165
- /// Note that there is no suggestion that the batches for a particular entry will be consistent from
166
- /// cursoring to cursoring, except for the consistency in resulting in the same overall ordering. The same
167
- /// entry could have different batch numbers from one cursoring to another. There is also no requirement
168
- /// that any given batch number must appear, at all.
163
+ /// Note that there is no suggestion that the batches for a particular entry will be consistent from cursoring
164
+ /// to cursoring, except for the consistency in resulting in the same overall ordering. The same entry could
165
+ /// have different batch numbers from one cursoring to another. There is also no requirement that any given
166
+ /// batch number must appear, at all. It is merely a mechanism for recovering ordering from a possibly arbitrary
167
+ /// partitioning of the data. It also follows from this, of course, that considering the batch to be a property
168
+ /// of the data is completely invalid.
169
169
/// </summary>
170
170
public abstract long Batch { get ; }
171
171
172
172
/// <summary>
173
173
/// A getter for a 128-bit ID value. It is common for objects to serve multiple <see cref="Row"/>
174
174
/// instances to iterate over what is supposed to be the same data, for example, in a <see cref="IDataView"/>
175
- /// a cursor set will produce the same data as a serial cursor, just partitioned, and a shuffled cursor
176
- /// will produce the same data as a serial cursor or any other shuffled cursor, only shuffled. The ID
177
- /// exists for applications that need to reconcile which entry is actually which. Ideally this ID should
178
- /// be unique, but for practical reasons, it suffices if collisions are simply extremely improbable.
175
+ /// a cursor set will produce the same data as a serial cursor, just partitioned, and a shuffled cursor will
176
+ /// produce the same data as a serial cursor or any other shuffled cursor, only shuffled. The ID exists for
177
+ /// applications that need to reconcile which entry is actually which. Ideally this ID should be unique, but for
178
+ /// practical reasons, it suffices if collisions are simply extremely improbable.
179
179
///
180
- /// Note that this ID, while it must be consistent for multiple streams according to the semantics
181
- /// above, is not considered part of the data per se. So, to take the example of a data view specifically,
182
- /// a single data view must render consistent IDs across all cursorings, but there is no suggestion at
183
- /// all that if the "same" data were presented in a different data view (as by, say, being transformed,
184
- /// cached, saved, or whatever), that the IDs between the two different data views would have any
185
- /// discernable relationship.</summary>
180
+ /// Note that this ID, while it must be consistent for multiple streams according to the semantics above, is not
181
+ /// considered part of the data per se. So, to take the example of a data view specifically, a single data view
182
+ /// must render consistent IDs across all cursorings, but there is no suggestion at all that if the "same" data
183
+ /// were presented in a different data view (as by, say, being transformed, cached, saved, or whatever), that
184
+ /// the IDs between the two different data views would have any discernable relationship.</summary>
186
185
public abstract ValueGetter < RowId > GetIdGetter ( ) ;
187
186
188
187
/// <summary>
0 commit comments