Skip to content

BUG: Catch overflow in both directions for checked add #14453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions asv_bench/benchmarks/algorithms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,14 @@ def setup(self):
self.int = pd.Int64Index(np.arange(N).repeat(5))
self.float = pd.Float64Index(np.random.randn(N).repeat(5))

# Convenience naming.
self.checked_add = pd.core.nanops._checked_add_with_arr

self.arr = np.arange(1000000)
self.arrpos = np.arange(1000000)
self.arrneg = np.arange(-1000000, 0)
self.arrmixed = np.array([1, -1]).repeat(500000)

def time_int_factorize(self):
self.int.factorize()

Expand All @@ -29,3 +37,21 @@ def time_int_duplicated(self):

def time_float_duplicated(self):
self.float.duplicated()

def time_add_overflow_pos_scalar(self):
self.checked_add(self.arr, 1)

def time_add_overflow_neg_scalar(self):
self.checked_add(self.arr, -1)

def time_add_overflow_zero_scalar(self):
self.checked_add(self.arr, 0)

def time_add_overflow_pos_arr(self):
self.checked_add(self.arr, self.arrpos)

def time_add_overflow_neg_arr(self):
self.checked_add(self.arr, self.arrneg)

def time_add_overflow_mixed_arr(self):
self.checked_add(self.arr, self.arrmixed)
13 changes: 12 additions & 1 deletion asv_bench/benchmarks/timedelta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .pandas_vb_common import *
from pandas import to_timedelta
from pandas import to_timedelta, Timestamp


class timedelta_convert_int(object):
Expand Down Expand Up @@ -47,3 +47,14 @@ def time_timedelta_convert_coerce(self):

def time_timedelta_convert_ignore(self):
to_timedelta(self.arr, errors='ignore')


class timedelta_add_overflow(object):
goal_time = 0.2

def setup(self):
self.td = to_timedelta(np.arange(1000000))
self.ts = Timestamp('2000')

def test_add_td_ts(self):
self.td + self.ts
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.19.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ Bug Fixes


- Bug in localizing an ambiguous timezone when a boolean is passed (:issue:`14402`)
- Bug in ``TimedeltaIndex`` addition with a Datetime-like object where addition overflow in the negative direction was not being caught (:issue:`14068`, :issue:`14453`)



Expand Down
37 changes: 33 additions & 4 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import pandas.hashtable as _hash
from pandas import compat, lib, algos, tslib
from pandas.compat.numpy import _np_version_under1p10
from pandas.types.common import (_ensure_int64, _ensure_object,
_ensure_float64, _get_dtype,
is_float, is_scalar,
Expand Down Expand Up @@ -829,9 +830,37 @@ def _checked_add_with_arr(arr, b):
Raises
------
OverflowError if any x + y exceeds the maximum int64 value.
OverflowError if any x + y exceeds the maximum or minimum int64 value.
"""
if (np.iinfo(np.int64).max - b < arr).any():
raise OverflowError("Python int too large to "
"convert to C long")
# For performance reasons, we broadcast 'b' to the new array 'b2'
# so that it has the same size as 'arr'.
if _np_version_under1p10:
if lib.isscalar(b):
b2 = np.empty(arr.shape)
b2.fill(b)
else:
b2 = b
else:
b2 = np.broadcast_to(b, arr.shape)

# gh-14324: For each element in 'arr' and its corresponding element
# in 'b2', we check the sign of the element in 'b2'. If it is positive,
# we then check whether its sum with the element in 'arr' exceeds
# np.iinfo(np.int64).max. If so, we have an overflow error. If it
# it is negative, we then check whether its sum with the element in
# 'arr' exceeds np.iinfo(np.int64).min. If so, we have an overflow
# error as well.
mask1 = b2 > 0
mask2 = b2 < 0

if not mask1.any():
to_raise = (np.iinfo(np.int64).min - b2 > arr).any()
elif not mask2.any():
to_raise = (np.iinfo(np.int64).max - b2 < arr).any()
else:
to_raise = ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]).any() or
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]).any())

if to_raise:
raise OverflowError("Overflow in int64 addition")
return arr + b
9 changes: 8 additions & 1 deletion pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1004,13 +1004,20 @@ def prng(self):

def test_int64_add_overflow():
# see gh-14068
msg = "too (big|large) to convert"
msg = "Overflow in int64 addition"
m = np.iinfo(np.int64).max
n = np.iinfo(np.int64).min

with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), m)
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, m]), np.array([m, m]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([n, n]), n)
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([n, n]), np.array([n, n]))
with tm.assertRaisesRegexp(OverflowError, msg):
nanops._checked_add_with_arr(np.array([m, n]), np.array([n, n]))
with tm.assertRaisesRegexp(OverflowError, msg):
with tm.assert_produces_warning(RuntimeWarning):
nanops._checked_add_with_arr(np.array([m, m]),
Expand Down
2 changes: 2 additions & 0 deletions pandas/tseries/tests/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -1957,6 +1957,8 @@ def test_add_overflow(self):
to_timedelta(106580, 'D') + Timestamp('2000')
with tm.assertRaisesRegexp(OverflowError, msg):
Timestamp('2000') + to_timedelta(106580, 'D')

msg = "Overflow in int64 addition"
with tm.assertRaisesRegexp(OverflowError, msg):
to_timedelta([106580], 'D') + Timestamp('2000')
with tm.assertRaisesRegexp(OverflowError, msg):
Expand Down