Skip to content

Commit 2a6665e

Browse files
jbrockmendelNico Cernek
authored and
Nico Cernek
committed
BUG: Fix TypeError raised in libreduction (pandas-dev#28643)
1 parent 12e00d3 commit 2a6665e

File tree

3 files changed

+42
-22
lines changed

3 files changed

+42
-22
lines changed

pandas/_libs/reduction.pyx

+17-9
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,15 @@ from numpy cimport (ndarray,
1515
cnp.import_array()
1616

1717
cimport pandas._libs.util as util
18-
from pandas._libs.lib import maybe_convert_objects, values_from_object
18+
from pandas._libs.lib import maybe_convert_objects
1919

2020

2121
cdef _get_result_array(object obj, Py_ssize_t size, Py_ssize_t cnt):
2222

2323
if (util.is_array(obj) or
2424
(isinstance(obj, list) and len(obj) == cnt) or
2525
getattr(obj, 'shape', None) == (cnt,)):
26-
raise ValueError('function does not reduce')
26+
raise ValueError('Function does not reduce')
2727

2828
return np.empty(size, dtype='O')
2929

@@ -103,7 +103,7 @@ cdef class Reducer:
103103
ndarray arr, result, chunk
104104
Py_ssize_t i, incr
105105
flatiter it
106-
bint has_labels
106+
bint has_labels, has_ndarray_labels
107107
object res, name, labels, index
108108
object cached_typ=None
109109

@@ -113,14 +113,18 @@ cdef class Reducer:
113113
chunk.data = arr.data
114114
labels = self.labels
115115
has_labels = labels is not None
116+
has_ndarray_labels = util.is_array(labels)
116117
has_index = self.index is not None
117118
incr = self.increment
118119

119120
try:
120121
for i in range(self.nresults):
121122

122-
if has_labels:
123+
if has_ndarray_labels:
123124
name = util.get_value_at(labels, i)
125+
elif has_labels:
126+
# labels is an ExtensionArray
127+
name = labels[i]
124128
else:
125129
name = None
126130

@@ -362,7 +366,8 @@ cdef class SeriesGrouper:
362366

363367
def get_result(self):
364368
cdef:
365-
ndarray arr, result
369+
# Define result to avoid UnboundLocalError
370+
ndarray arr, result = None
366371
ndarray[int64_t] labels, counts
367372
Py_ssize_t i, n, group_size, lab
368373
object res
@@ -428,6 +433,9 @@ cdef class SeriesGrouper:
428433
islider.reset()
429434
vslider.reset()
430435

436+
if result is None:
437+
raise ValueError("No result.")
438+
431439
if result.dtype == np.object_:
432440
result = maybe_convert_objects(result)
433441

@@ -639,11 +647,11 @@ def compute_reduction(arr, f, axis=0, dummy=None, labels=None):
639647
"""
640648

641649
if labels is not None:
642-
if labels._has_complex_internals:
643-
raise Exception('Cannot use shortcut')
650+
# Caller is responsible for ensuring we don't have MultiIndex
651+
assert not labels._has_complex_internals
644652

645-
# pass as an ndarray
646-
labels = values_from_object(labels)
653+
# pass as an ndarray/ExtensionArray
654+
labels = labels._values
647655

648656
reducer = Reducer(arr, f, axis=axis, dummy=dummy, labels=labels)
649657
return reducer.get_result()

pandas/core/apply.py

+24-8
Original file line numberDiff line numberDiff line change
@@ -223,10 +223,12 @@ def apply_empty_result(self):
223223

224224
def apply_raw(self):
225225
""" apply to the values as a numpy array """
226-
227226
try:
228227
result = libreduction.compute_reduction(self.values, self.f, axis=self.axis)
229-
except Exception:
228+
except ValueError as err:
229+
if "Function does not reduce" not in str(err):
230+
# catch only ValueError raised intentionally in libreduction
231+
raise
230232
result = np.apply_along_axis(self.f, self.axis, self.values)
231233

232234
# TODO: mixed type case
@@ -273,24 +275,38 @@ def apply_standard(self):
273275
if (
274276
self.result_type in ["reduce", None]
275277
and not self.dtypes.apply(is_extension_type).any()
278+
# Disallow complex_internals since libreduction shortcut
279+
# cannot handle MultiIndex
280+
and not self.agg_axis._has_complex_internals
276281
):
277282

278-
# Create a dummy Series from an empty array
279-
from pandas import Series
280-
281283
values = self.values
282284
index = self.obj._get_axis(self.axis)
283285
labels = self.agg_axis
284286
empty_arr = np.empty(len(index), dtype=values.dtype)
285-
dummy = Series(empty_arr, index=index, dtype=values.dtype)
287+
288+
# Preserve subclass for e.g. test_subclassed_apply
289+
dummy = self.obj._constructor_sliced(
290+
empty_arr, index=index, dtype=values.dtype
291+
)
286292

287293
try:
288294
result = libreduction.compute_reduction(
289295
values, self.f, axis=self.axis, dummy=dummy, labels=labels
290296
)
291-
return self.obj._constructor_sliced(result, index=labels)
292-
except Exception:
297+
except ValueError as err:
298+
if "Function does not reduce" not in str(err):
299+
# catch only ValueError raised intentionally in libreduction
300+
raise
301+
except TypeError:
302+
# e.g. test_apply_ignore_failures we just ignore
303+
if not self.ignore_failures:
304+
raise
305+
except ZeroDivisionError:
306+
# reached via numexpr; fall back to python implementation
293307
pass
308+
else:
309+
return self.obj._constructor_sliced(result, index=labels)
294310

295311
# compute the result using the series generator
296312
self.apply_series_generator()

pandas/tests/groupby/test_groupby.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -775,11 +775,7 @@ def test_omit_nuisance(df):
775775

776776
# won't work with axis = 1
777777
grouped = df.groupby({"A": 0, "C": 0, "D": 1, "E": 1}, axis=1)
778-
msg = (
779-
r'\("unsupported operand type\(s\) for \+: '
780-
"'Timestamp' and 'float'\""
781-
r", 'occurred at index 0'\)"
782-
)
778+
msg = r'\("unsupported operand type\(s\) for \+: ' "'Timestamp' and 'float'\", 0"
783779
with pytest.raises(TypeError, match=msg):
784780
grouped.agg(lambda x: x.sum(0, numeric_only=False))
785781

0 commit comments

Comments
 (0)