@@ -1388,27 +1388,35 @@ def dask_groupby_agg(
1388
1388
return (result , groups )
1389
1389
1390
1390
1391
- def _validate_reindex (reindex : bool | None , func , method : T_Method , expected_groups ) -> bool | None :
1391
+ def _validate_reindex (
1392
+ reindex : bool | None , func , method : T_Method , expected_groups , by_is_dask : bool
1393
+ ) -> bool :
1392
1394
if reindex is True :
1393
1395
if _is_arg_reduction (func ):
1394
1396
raise NotImplementedError
1395
1397
if method == "blockwise" :
1396
1398
raise NotImplementedError
1397
1399
1398
- if method == "blockwise" or _is_arg_reduction (func ):
1399
- reindex = False
1400
+ if reindex is None :
1401
+ if method == "blockwise" or _is_arg_reduction (func ):
1402
+ reindex = False
1400
1403
1401
- if reindex is None and expected_groups is not None :
1402
- reindex = True
1404
+ elif expected_groups is not None :
1405
+ reindex = True
1406
+
1407
+ elif method in ["split-reduce" , "cohorts" ]:
1408
+ reindex = True
1409
+
1410
+ elif method == "map-reduce" :
1411
+ if expected_groups is None and by_is_dask :
1412
+ reindex = False
1413
+ else :
1414
+ reindex = True
1403
1415
1404
1416
if method in ["split-reduce" , "cohorts" ] and reindex is False :
1405
1417
raise NotImplementedError
1406
1418
1407
- if method in ["split-reduce" , "cohorts" ] and reindex is None :
1408
- reindex = True
1409
-
1410
- # TODO: Should reindex be a bool-only at this point? Would've been nice but
1411
- # None's are relied on after this function as well.
1419
+ assert isinstance (reindex , bool )
1412
1420
return reindex
1413
1421
1414
1422
@@ -1597,7 +1605,6 @@ def groupby_reduce(
1597
1605
"argreductions not supported for engine='flox' yet."
1598
1606
"Try engine='numpy' or engine='numba' instead."
1599
1607
)
1600
- reindex = _validate_reindex (reindex , func , method , expected_groups )
1601
1608
1602
1609
bys = tuple (np .asarray (b ) if not is_duck_array (b ) else b for b in by )
1603
1610
nby = len (bys )
@@ -1606,6 +1613,8 @@ def groupby_reduce(
1606
1613
if method in ["split-reduce" , "cohorts" ] and by_is_dask :
1607
1614
raise ValueError (f"method={ method !r} can only be used when grouping by numpy arrays." )
1608
1615
1616
+ reindex = _validate_reindex (reindex , func , method , expected_groups , by_is_dask )
1617
+
1609
1618
if not is_duck_array (array ):
1610
1619
array = np .asarray (array )
1611
1620
is_bool_array = np .issubdtype (array .dtype , bool )
0 commit comments