Skip to content

Commit 04356be

Browse files
authored
Fix binary operations on attrs for Series and DataFrame (#59636)
* Fix binary operators on attrs for Series and DataFrame * Add tests for Series and DataFrame for attrs binary operations * Add getattr as other might not possess attrs as attribute * Fix some tests in pandas/tests/generic/test_finalize.py::test_binops * remove xfail tests related to attrs * Add all_binary_operators * use __finalize__ for attrs bin ops in base.py * use __finalize__ for attrs bin ops in frame.py * use __finalize__ for attrs bin ops in series.py * ENH: Ensure attrs are copied from other in Series arithmetic operations. Moved from base.py to series.py * REF: Refactor binary operation tests for DataFrame and Series attributes * REF: Enhance _construct_result method to accept 'other' parameter for improved attribute handling * REF: Simplify test_attrs_binary_operations by parameterizing left and right inputs * Refactor DataFrame and Series methods to improve attribute handling and finalize behavior * Refactor DataFrame alignment logic to improve attribute consistency * Refactor DataFrame and Series methods to enhance attribute finalization logic * Clean and remove unnecessry checks * Fix: Prioritizing False if self.flags.allows_duplicate_labels or other.flags.allows_duplicate_labels is False
1 parent b8abe06 commit 04356be

File tree

6 files changed

+40
-68
lines changed

6 files changed

+40
-68
lines changed

pandas/core/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1480,9 +1480,9 @@ def _arith_method(self, other, op):
14801480
with np.errstate(all="ignore"):
14811481
result = ops.arithmetic_op(lvalues, rvalues, op)
14821482

1483-
return self._construct_result(result, name=res_name)
1483+
return self._construct_result(result, name=res_name, other=other)
14841484

1485-
def _construct_result(self, result, name):
1485+
def _construct_result(self, result, name, other):
14861486
"""
14871487
Construct an appropriately-wrapped result from the ArrayLike result
14881488
of an arithmetic-like operation.

pandas/core/frame.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -7879,7 +7879,7 @@ def _cmp_method(self, other, op):
78797879

78807880
# See GH#4537 for discussion of scalar op behavior
78817881
new_data = self._dispatch_frame_op(other, op, axis=axis)
7882-
return self._construct_result(new_data)
7882+
return self._construct_result(new_data, other=other)
78837883

78847884
def _arith_method(self, other, op):
78857885
if self._should_reindex_frame_op(other, op, 1, None, None):
@@ -7892,7 +7892,7 @@ def _arith_method(self, other, op):
78927892

78937893
with np.errstate(all="ignore"):
78947894
new_data = self._dispatch_frame_op(other, op, axis=axis)
7895-
return self._construct_result(new_data)
7895+
return self._construct_result(new_data, other=other)
78967896

78977897
_logical_method = _arith_method
78987898

@@ -8088,8 +8088,7 @@ def _align_for_op(
80888088
80898089
Parameters
80908090
----------
8091-
left : DataFrame
8092-
right : Any
8091+
other : Any
80938092
axis : int
80948093
flex : bool or None, default False
80958094
Whether this is a flex op, in which case we reindex.
@@ -8208,7 +8207,6 @@ def to_series(right):
82088207
level=level,
82098208
)
82108209
right = left._maybe_align_series_as_frame(right, axis)
8211-
82128210
return left, right
82138211

82148212
def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
@@ -8237,7 +8235,7 @@ def _maybe_align_series_as_frame(self, series: Series, axis: AxisInt):
82378235
index=self.index,
82388236
columns=self.columns,
82398237
dtype=rvalues.dtype,
8240-
)
8238+
).__finalize__(series)
82418239

82428240
def _flex_arith_method(
82438241
self, other, op, *, axis: Axis = "columns", level=None, fill_value=None
@@ -8269,9 +8267,9 @@ def _flex_arith_method(
82698267

82708268
new_data = self._dispatch_frame_op(other, op)
82718269

8272-
return self._construct_result(new_data)
8270+
return self._construct_result(new_data, other=other)
82738271

8274-
def _construct_result(self, result) -> DataFrame:
8272+
def _construct_result(self, result, other) -> DataFrame:
82758273
"""
82768274
Wrap the result of an arithmetic, comparison, or logical operation.
82778275
@@ -8288,6 +8286,7 @@ def _construct_result(self, result) -> DataFrame:
82888286
# non-unique columns case
82898287
out.columns = self.columns
82908288
out.index = self.index
8289+
out = out.__finalize__(other)
82918290
return out
82928291

82938292
def __divmod__(self, other) -> tuple[DataFrame, DataFrame]:
@@ -8308,7 +8307,7 @@ def _flex_cmp_method(self, other, op, *, axis: Axis = "columns", level=None):
83088307
self, other = self._align_for_op(other, axis, flex=True, level=level)
83098308

83108309
new_data = self._dispatch_frame_op(other, op, axis=axis)
8311-
return self._construct_result(new_data)
8310+
return self._construct_result(new_data, other=other)
83128311

83138312
@Appender(ops.make_flex_doc("eq", "dataframe"))
83148313
def eq(self, other, axis: Axis = "columns", level=None) -> DataFrame:

pandas/core/generic.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -6076,8 +6076,10 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
60766076
# One could make the deepcopy unconditionally, but a deepcopy
60776077
# of an empty dict is 50x more expensive than the empty check.
60786078
self.attrs = deepcopy(other.attrs)
6079-
6080-
self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
6079+
self.flags.allows_duplicate_labels = (
6080+
self.flags.allows_duplicate_labels
6081+
and other.flags.allows_duplicate_labels
6082+
)
60816083
# For subclasses using _metadata.
60826084
for name in set(self._metadata) & set(other._metadata):
60836085
assert isinstance(name, str)

pandas/core/indexes/base.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -7148,10 +7148,10 @@ def _logical_method(self, other, op):
71487148
rvalues = extract_array(other, extract_numpy=True, extract_range=True)
71497149

71507150
res_values = ops.logical_op(lvalues, rvalues, op)
7151-
return self._construct_result(res_values, name=res_name)
7151+
return self._construct_result(res_values, name=res_name, other=other)
71527152

71537153
@final
7154-
def _construct_result(self, result, name):
7154+
def _construct_result(self, result, name, other):
71557155
if isinstance(result, tuple):
71567156
return (
71577157
Index(result[0], name=name, dtype=result[0].dtype),

pandas/core/series.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -5858,7 +5858,7 @@ def _cmp_method(self, other, op):
58585858

58595859
res_values = ops.comparison_op(lvalues, rvalues, op)
58605860

5861-
return self._construct_result(res_values, name=res_name)
5861+
return self._construct_result(res_values, name=res_name, other=other)
58625862

58635863
def _logical_method(self, other, op):
58645864
res_name = ops.get_op_result_name(self, other)
@@ -5868,7 +5868,7 @@ def _logical_method(self, other, op):
58685868
rvalues = extract_array(other, extract_numpy=True, extract_range=True)
58695869

58705870
res_values = ops.logical_op(lvalues, rvalues, op)
5871-
return self._construct_result(res_values, name=res_name)
5871+
return self._construct_result(res_values, name=res_name, other=other)
58725872

58735873
def _arith_method(self, other, op):
58745874
self, other = self._align_for_op(other)
@@ -5930,11 +5930,15 @@ def _binop(self, other: Series, func, level=None, fill_value=None) -> Series:
59305930
result = func(this_vals, other_vals)
59315931

59325932
name = ops.get_op_result_name(self, other)
5933-
out = this._construct_result(result, name)
5933+
5934+
out = this._construct_result(result, name, other)
59345935
return cast(Series, out)
59355936

59365937
def _construct_result(
5937-
self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable
5938+
self,
5939+
result: ArrayLike | tuple[ArrayLike, ArrayLike],
5940+
name: Hashable,
5941+
other: AnyArrayLike | DataFrame,
59385942
) -> Series | tuple[Series, Series]:
59395943
"""
59405944
Construct an appropriately-labelled Series from the result of an op.
@@ -5943,6 +5947,7 @@ def _construct_result(
59435947
----------
59445948
result : ndarray or ExtensionArray
59455949
name : Label
5950+
other : Series, DataFrame or array-like
59465951
59475952
Returns
59485953
-------
@@ -5952,8 +5957,8 @@ def _construct_result(
59525957
if isinstance(result, tuple):
59535958
# produced by divmod or rdivmod
59545959

5955-
res1 = self._construct_result(result[0], name=name)
5956-
res2 = self._construct_result(result[1], name=name)
5960+
res1 = self._construct_result(result[0], name=name, other=other)
5961+
res2 = self._construct_result(result[1], name=name, other=other)
59575962

59585963
# GH#33427 assertions to keep mypy happy
59595964
assert isinstance(res1, Series)
@@ -5965,6 +5970,7 @@ def _construct_result(
59655970
dtype = getattr(result, "dtype", None)
59665971
out = self._constructor(result, index=self.index, dtype=dtype, copy=False)
59675972
out = out.__finalize__(self)
5973+
out = out.__finalize__(other)
59685974

59695975
# Set the result's name after __finalize__ is called because __finalize__
59705976
# would set it back to self.name

pandas/tests/generic/test_finalize.py

+12-47
Original file line numberDiff line numberDiff line change
@@ -427,53 +427,6 @@ def test_binops(request, args, annotate, all_binary_operators):
427427
if annotate == "right" and isinstance(right, int):
428428
pytest.skip("right is an int and doesn't support .attrs")
429429

430-
if not (isinstance(left, int) or isinstance(right, int)) and annotate != "both":
431-
if not all_binary_operators.__name__.startswith("r"):
432-
if annotate == "right" and isinstance(left, type(right)):
433-
request.applymarker(
434-
pytest.mark.xfail(
435-
reason=f"{all_binary_operators} doesn't work when right has "
436-
f"attrs and both are {type(left)}"
437-
)
438-
)
439-
if not isinstance(left, type(right)):
440-
if annotate == "left" and isinstance(left, pd.Series):
441-
request.applymarker(
442-
pytest.mark.xfail(
443-
reason=f"{all_binary_operators} doesn't work when the "
444-
"objects are different Series has attrs"
445-
)
446-
)
447-
elif annotate == "right" and isinstance(right, pd.Series):
448-
request.applymarker(
449-
pytest.mark.xfail(
450-
reason=f"{all_binary_operators} doesn't work when the "
451-
"objects are different Series has attrs"
452-
)
453-
)
454-
else:
455-
if annotate == "left" and isinstance(left, type(right)):
456-
request.applymarker(
457-
pytest.mark.xfail(
458-
reason=f"{all_binary_operators} doesn't work when left has "
459-
f"attrs and both are {type(left)}"
460-
)
461-
)
462-
if not isinstance(left, type(right)):
463-
if annotate == "right" and isinstance(right, pd.Series):
464-
request.applymarker(
465-
pytest.mark.xfail(
466-
reason=f"{all_binary_operators} doesn't work when the "
467-
"objects are different Series has attrs"
468-
)
469-
)
470-
elif annotate == "left" and isinstance(left, pd.Series):
471-
request.applymarker(
472-
pytest.mark.xfail(
473-
reason=f"{all_binary_operators} doesn't work when the "
474-
"objects are different Series has attrs"
475-
)
476-
)
477430
if annotate in {"left", "both"} and not isinstance(left, int):
478431
left.attrs = {"a": 1}
479432
if annotate in {"right", "both"} and not isinstance(right, int):
@@ -497,6 +450,18 @@ def test_binops(request, args, annotate, all_binary_operators):
497450
assert result.attrs == {"a": 1}
498451

499452

453+
@pytest.mark.parametrize("left", [pd.Series, pd.DataFrame])
454+
@pytest.mark.parametrize("right", [pd.Series, pd.DataFrame])
455+
def test_attrs_binary_operations(all_binary_operators, left, right):
456+
# GH 51607
457+
attrs = {"a": 1}
458+
left = left([1])
459+
left.attrs = attrs
460+
right = right([2])
461+
assert all_binary_operators(left, right).attrs == attrs
462+
assert all_binary_operators(right, left).attrs == attrs
463+
464+
500465
# ----------------------------------------------------------------------------
501466
# Accessors
502467

0 commit comments

Comments
 (0)