-
-
Notifications
You must be signed in to change notification settings - Fork 18.6k
Fix 40420: Interpret NaN in clip() as no bound. #40927
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
66f8331
7b0b69c
ff821c1
9ec3355
768a5eb
66708d2
a1f990f
e7acdeb
0bcb70d
4c2b3c5
751c8a9
c84df44
3e86f4a
1900542
51b9006
c3523c3
9c033bd
4c02269
ccede57
137b8d0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -7341,8 +7341,6 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): | |
return self._clip_with_scalar(None, threshold, inplace=inplace) | ||
return self._clip_with_scalar(threshold, None, inplace=inplace) | ||
|
||
subset = method(threshold, axis=axis) | isna(self) | ||
|
||
# GH #15390 | ||
# In order for where method to work, the threshold must | ||
# be transformed to NDFrame from other array like structure. | ||
|
@@ -7351,6 +7349,18 @@ def _clip_with_one_bound(self, threshold, method, axis, inplace): | |
threshold = self._constructor(threshold, index=self.index) | ||
else: | ||
threshold = align_method_FRAME(self, threshold, axis, flex=None)[1] | ||
|
||
# GH 40420 | ||
# Treat missing thresholds as no bounds, not clipping the values | ||
if is_list_like(threshold): | ||
fill_value = np.inf if method.__name__ == "le" else -np.inf | ||
threshold_inf = threshold.fillna(fill_value) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This causes an issue when working with datetime series (see here: #44785). Since downstream it will try to compare a float to a Timestamp object. There's probably a more robust solution, but a potential quick/easy fix would be to check the dtype here and use an appropriate alternative to np.inf (e.g. pd.Timestamp.max/min). |
||
else: | ||
threshold_inf = threshold | ||
|
||
subset = method(threshold_inf, axis=axis) | isna(self) | ||
|
||
# GH 40420 | ||
return self.where(subset, threshold, axis=axis, inplace=inplace) | ||
|
||
@overload | ||
|
@@ -7482,10 +7492,12 @@ def clip( | |
---------- | ||
lower : float or array_like, default None | ||
Minimum threshold value. All values below this | ||
threshold will be set to it. | ||
threshold will be set to it. A missing | ||
threshold (e.g `NA`) will not clip the value. | ||
upper : float or array_like, default None | ||
Maximum threshold value. All values above this | ||
threshold will be set to it. | ||
threshold will be set to it. A missing | ||
threshold (e.g `NA`) will not clip the value. | ||
axis : int or str axis name, optional | ||
Align object with lower and upper along the given axis. | ||
inplace : bool, default False | ||
|
@@ -7546,6 +7558,25 @@ def clip( | |
2 0 3 | ||
3 6 8 | ||
4 5 3 | ||
|
||
Clips using specific lower threshold per column element, with missing values: | ||
|
||
>>> t = pd.Series([2, -4, np.NaN, 6, 3]) | ||
>>> t | ||
0 2.0 | ||
1 -4.0 | ||
2 NaN | ||
3 6.0 | ||
4 3.0 | ||
dtype: float64 | ||
|
||
>>> df.clip(t, axis=0) | ||
col_0 col_1 | ||
0 9 2 | ||
1 -3 -4 | ||
2 0 6 | ||
3 6 8 | ||
4 5 3 | ||
""" | ||
inplace = validate_bool_kwarg(inplace, "inplace") | ||
|
||
|
@@ -7558,9 +7589,17 @@ def clip( | |
# so ignore | ||
# GH 19992 | ||
# numpy doesn't drop a list-like bound containing NaN | ||
if not is_list_like(lower) and np.any(isna(lower)): | ||
isna_lower = isna(lower) | ||
if not is_list_like(lower): | ||
if np.any(isna_lower): | ||
lower = None | ||
elif np.all(isna_lower): | ||
lower = None | ||
if not is_list_like(upper) and np.any(isna(upper)): | ||
isna_upper = isna(upper) | ||
if not is_list_like(upper): | ||
if np.any(isna_upper): | ||
upper = None | ||
elif np.all(isna_upper): | ||
upper = None | ||
|
||
# GH 2747 (arguments were reversed) | ||
|
Uh oh!
There was an error while loading. Please reload this page.