Skip to content

Commit 36c5b98

Browse files
committed
Senja about why 64.
1 parent f8f89c0 commit 36c5b98

File tree

1 file changed

+8
-1
lines changed

1 file changed

+8
-1
lines changed

src/Microsoft.ML.Data/Data/DataViewUtils.cs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,14 @@ public static bool TryCreateConsolidatingCursor(out RowCursor curs,
135135
if (inputs.Length == 1)
136136
curs = inputs[0];
137137
else
138-
curs = DataViewUtils.ConsolidateGeneric(host, inputs, 64);
138+
{
139+
// We have a somewhat arbitrary batch size of about 64 for buffering results from the
140+
// intermediate cursors, since that at least empirically for most datasets seems to
141+
// strike a nice balance between a size large enough to benefit from parallelism but
142+
// small enough so as to not be too onerous to keep in memory.
143+
const int batchSize = 64;
144+
curs = DataViewUtils.ConsolidateGeneric(host, inputs, batchSize);
145+
}
139146
return true;
140147
}
141148

0 commit comments

Comments
 (0)