Skip to content

Commit 5736b96

Browse files
Restrict clipping of DataFrame.corr only when cov=False (#61214)
* clip correlation coefficient between -1 and 1 * Added test to check if corr within bounds * Added tuple to mistyped parameter * Transfered np.clip to algos.nancorr * Clip covxy / divsor instead of result * Clip covxy / divsor within nogil * Added whatsnew note * Replaced long entry with single entry * restricted clipping to when cov=False * Added test for covariance * Changed name of test variables * fixed test case for cov --------- Co-authored-by: John Hendricks <[email protected]>
1 parent af16382 commit 5736b96

File tree

2 files changed

+15
-4
lines changed

2 files changed

+15
-4
lines changed

pandas/_libs/algos.pyx

+5-4
Original file line numberDiff line numberDiff line change
@@ -391,10 +391,11 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
391391
# clip `covxy / divisor` to ensure coeff is within bounds
392392
if divisor != 0:
393393
val = covxy / divisor
394-
if val > 1.0:
395-
val = 1.0
396-
elif val < -1.0:
397-
val = -1.0
394+
if not cov:
395+
if val > 1.0:
396+
val = 1.0
397+
elif val < -1.0:
398+
val = -1.0
398399
result[xi, yi] = result[yi, xi] = val
399400
else:
400401
result[xi, yi] = result[yi, xi] = NaN

pandas/tests/frame/methods/test_cov_corr.py

+10
Original file line numberDiff line numberDiff line change
@@ -497,3 +497,13 @@ def test_corr_within_bounds(self):
497497
corr_matrix = df2.corr()
498498
assert corr_matrix.min().min() >= -1.0
499499
assert corr_matrix.max().max() <= 1.0
500+
501+
def test_cov_with_missing_values(self):
502+
df = DataFrame({"A": [1, 2, None, 4], "B": [2, 4, None, 9]})
503+
expected = DataFrame(
504+
{"A": [2.333333, 5.500000], "B": [5.5, 13.0]}, index=["A", "B"]
505+
)
506+
result1 = df.cov()
507+
result2 = df.dropna().cov()
508+
tm.assert_frame_equal(result1, expected)
509+
tm.assert_frame_equal(result2, expected)

0 commit comments

Comments
 (0)