@@ -142,7 +142,7 @@ public DataFrame Join(DataFrame other, string leftSuffix = "_left", string right
142
142
return ret ;
143
143
}
144
144
145
- private static bool IsAnyNullValueInColumns ( IReadOnlyCollection < DataFrameColumn > columns , long index )
145
+ private static bool IsAnyNullValueInColumns ( IReadOnlyCollection < DataFrameColumn > columns , long index )
146
146
{
147
147
foreach ( var column in columns )
148
148
{
@@ -176,19 +176,19 @@ private static HashSet<long> Merge(DataFrame retainedDataFrame, DataFrame supple
176
176
throw new ArgumentNullException ( nameof ( supplemetaryJoinColumnNames ) ) ;
177
177
178
178
if ( retainedJoinColumnNames . Length != supplemetaryJoinColumnNames . Length )
179
- throw new ArgumentException ( Strings . MismatchedArrayLengths , nameof ( retainedJoinColumnNames ) ) ;
180
-
179
+ throw new ArgumentException ( Strings . MismatchedArrayLengths , nameof ( retainedJoinColumnNames ) ) ;
180
+
181
181
182
182
HashSet < long > intersection = calculateIntersection ? new HashSet < long > ( ) : null ;
183
183
184
184
// Get occurrences of values in columns used for join in the retained and supplementary dataframes
185
185
Dictionary < long , ICollection < long > > occurrences = null ;
186
186
Dictionary < long , long > retainedIndicesReverseMapping = null ;
187
-
187
+
188
188
HashSet < long > supplementaryJoinColumnsNullIndices = new HashSet < long > ( ) ;
189
189
190
-
191
- for ( int colNameIndex = 0 ; colNameIndex < retainedJoinColumnNames . Length ; colNameIndex ++ )
190
+
191
+ for ( int colNameIndex = 0 ; colNameIndex < retainedJoinColumnNames . Length ; colNameIndex ++ )
192
192
{
193
193
DataFrameColumn shrinkedRetainedColumn = retainedDataFrame . Columns [ retainedJoinColumnNames [ colNameIndex ] ] ;
194
194
@@ -211,7 +211,7 @@ private static HashSet<long> Merge(DataFrame retainedDataFrame, DataFrame supple
211
211
retainedIndicesReverseMapping = newRetainedIndicesReverseMapping ;
212
212
shrinkedRetainedColumn = shrinkedRetainedColumn . Clone ( new Int64DataFrameColumn ( "Indices" , shrinkedRetainedIndices ) ) ;
213
213
}
214
-
214
+
215
215
DataFrameColumn supplementaryColumn = supplementaryDataFrame . Columns [ supplemetaryJoinColumnNames [ colNameIndex ] ] ;
216
216
217
217
//Find occurrenses on current step (join column)
@@ -222,7 +222,7 @@ private static HashSet<long> Merge(DataFrame retainedDataFrame, DataFrame supple
222
222
newOccurrences = newOccurrences . ToDictionary ( kvp => retainedIndicesReverseMapping [ kvp . Key ] , kvp => kvp . Value ) ;
223
223
224
224
supplementaryJoinColumnsNullIndices . UnionWith ( supplementaryColumnNullIndices ) ;
225
-
225
+
226
226
// shrink join result on current column by previous join columns (if any)
227
227
// (we have to remove occurrences that doesn't exist in previous columns, because JOIN happens only if ALL left and right columns in JOIN are matched)
228
228
if ( occurrences != null )
@@ -242,7 +242,7 @@ private static HashSet<long> Merge(DataFrame retainedDataFrame, DataFrame supple
242
242
243
243
occurrences = newOccurrences ;
244
244
}
245
-
245
+
246
246
retainedRowIndices = new Int64DataFrameColumn ( "RetainedIndices" ) ;
247
247
supplementaryRowIndices = new Int64DataFrameColumn ( "SupplementaryIndices" ) ;
248
248
@@ -280,18 +280,18 @@ private static HashSet<long> Merge(DataFrame retainedDataFrame, DataFrame supple
280
280
}
281
281
}
282
282
else
283
- {
283
+ {
284
284
foreach ( long row in supplementaryJoinColumnsNullIndices )
285
285
{
286
286
retainedRowIndices . Append ( i ) ;
287
287
supplementaryRowIndices . Append ( row ) ;
288
288
}
289
289
}
290
290
}
291
-
291
+
292
292
return intersection ;
293
293
}
294
-
294
+
295
295
public DataFrame Merge ( DataFrame other , string [ ] leftJoinColumns , string [ ] rightJoinColumns , string leftSuffix = "_left" , string rightSuffix = "_right" , JoinAlgorithm joinAlgorithm = JoinAlgorithm . Left )
296
296
{
297
297
if ( other == null )
@@ -335,7 +335,7 @@ public DataFrame Merge(DataFrame other, string[] leftJoinColumns, string[] right
335
335
else if ( joinAlgorithm == JoinAlgorithm . FullOuter )
336
336
{
337
337
//In full outer join we would like to retain data from both side, so we do it into 2 steps: one first we do LEFT JOIN and then add lost data from the RIGHT side
338
-
338
+
339
339
//Step 1
340
340
//Do LEFT JOIN
341
341
isLeftDataFrameRetained = true ;
@@ -347,7 +347,7 @@ public DataFrame Merge(DataFrame other, string[] leftJoinColumns, string[] right
347
347
var retainedJoinColumns = isLeftDataFrameRetained ? leftJoinColumns : rightJoinColumns ;
348
348
349
349
var intersection = Merge ( retainedDataFrame , supplementaryDataFrame , retainedJoinColumns , supplementaryJoinColumns , out retainedRowIndices , out supplementaryRowIndices , calculateIntersection : true ) ;
350
-
350
+
351
351
//Step 2
352
352
//Do RIGHT JOIN to retain all data from supplementary DataFrame too (take into account data intersection from the first step to avoid duplicates)
353
353
for ( long i = 0 ; i < supplementaryDataFrame . Columns . RowCount ; i ++ )
@@ -365,9 +365,9 @@ public DataFrame Merge(DataFrame other, string[] leftJoinColumns, string[] right
365
365
}
366
366
else
367
367
throw new NotImplementedException ( nameof ( joinAlgorithm ) ) ;
368
-
368
+
369
369
DataFrame ret = new DataFrame ( ) ;
370
-
370
+
371
371
//insert columns from left dataframe (this)
372
372
for ( int i = 0 ; i < this . Columns . Count ; i ++ )
373
373
{
0 commit comments