@@ -1253,6 +1253,166 @@ def nancorr_spearman(ndarray[float64_t, ndim=2] mat, Py_ssize_t minp=1):
1253
1253
return result
1254
1254
1255
1255
1256
+ # ----------------------------------------------------------------------
1257
+ # Rolling correlation
1258
+
1259
+ def roll_corr (ndarray[double_t] x , ndarray[double_t] y , int win , int minp ,
1260
+ int ddof = 1 ):
1261
+ """
1262
+ Numerically stable implementation using a Welford-like method, and
1263
+ detection of exactly matching sequences and exactly zero denominators.
1264
+ """
1265
+ cdef double val_x, val_y, prev_x, prev_y, rep_x = NaN, rep_y = NaN
1266
+ cdef double mean_x = 0 , mean_y = 0 , ssqdm_x = 0 , ssqdm_y = 0
1267
+ cdef double sproddm_xy = 0 , delta_x, delta_y
1268
+ cdef Py_ssize_t nobs = 0 , nrep_x = 0 , nrep_y = 0 , ndup = 0 , i, N = len (x)
1269
+ cdef bint val_not_nan, prev_not_nan
1270
+
1271
+ cdef ndarray[double_t] output = np.empty(N, dtype = float )
1272
+
1273
+ minp = _check_minp(win, minp, N)
1274
+
1275
+ for i from 0 <= i < N:
1276
+ val_x = x[i]
1277
+ val_y = y[i]
1278
+ val_not_nan = val_x == val_x and val_y == val_y
1279
+ if i < win:
1280
+ prev_x = prev_y = NaN
1281
+ prev_not_nan = 0
1282
+ else :
1283
+ prev_x = x[i - win]
1284
+ prev_y = y[i - win]
1285
+ prev_not_nan = prev_x == prev_x and prev_y == prev_y
1286
+
1287
+ # First, count the number of observations and consecutive repeats
1288
+ if prev_not_nan:
1289
+ # prev is not NaN, removing an observation...
1290
+ if nobs == nrep_x:
1291
+ # ...and removing a repeat from x
1292
+ nrep_x -= 1
1293
+ if nrep_x == 0 :
1294
+ rep_x = NaN
1295
+ if nobs == nrep_y:
1296
+ # ...and removing a repeat from y
1297
+ nrep_y -= 1
1298
+ if nrep_y == 0 :
1299
+ rep_y = NaN
1300
+ if nobs == ndup:
1301
+ # ...and removing a duplicate
1302
+ ndup -= 1
1303
+ nobs -= 1
1304
+ if val_not_nan:
1305
+ # val is not NaN, adding an observation
1306
+ if val_x == rep_x:
1307
+ # ...and adding a repeat to x
1308
+ nrep_x += 1
1309
+ else :
1310
+ # ...and resetting x repeats
1311
+ nrep_x = 1
1312
+ rep_x = val_x
1313
+ if val_y == rep_y:
1314
+ # ...and adding a repeat to y
1315
+ nrep_y += 1
1316
+ else :
1317
+ # ...and resetting y repeats
1318
+ nrep_y = 1
1319
+ rep_y = val_y
1320
+ if val_x == val_y:
1321
+ # ...and adding a duplicate
1322
+ ndup += 1
1323
+ else :
1324
+ # ...and resetting duplicates
1325
+ ndup = 0
1326
+ nobs += 1
1327
+
1328
+ # Then, compute the new mean, sums of squared differences from the
1329
+ # mean and sum of product of differences from the mean
1330
+ if nobs == nrep_x and nobs == nrep_y:
1331
+ if nobs > 0 :
1332
+ mean_x = rep_x
1333
+ mean_y = rep_y
1334
+ else :
1335
+ mean_x = mean_y = 0
1336
+ ssqdm_x = ssqdm_y = sproddm_xy = 0
1337
+ elif val_not_nan:
1338
+ # Adding one observation...
1339
+ if prev_not_nan:
1340
+ # ...and removing another
1341
+ delta_x = val_x - prev_x
1342
+ prev_x -= mean_x
1343
+ if nobs == nrep_x:
1344
+ mean_x = rep_x
1345
+ val_x = 0
1346
+ ssqdm_x = 0
1347
+ else :
1348
+ mean_x += delta_x / nobs
1349
+ val_x -= mean_x
1350
+ ssqdm_x += (val_x + prev_x) * delta_x
1351
+ delta_y = val_y - prev_y
1352
+ prev_y -= mean_y
1353
+ if nobs == nrep_y:
1354
+ mean_y = rep_y
1355
+ val_y = 0
1356
+ ssqdm_y = 0
1357
+ else :
1358
+ mean_y += delta_y / nobs
1359
+ val_y -= mean_y
1360
+ ssqdm_y += (val_y + prev_y) * delta_y
1361
+ sproddm_xy += (delta_x * (val_y + prev_y) +
1362
+ delta_y * (val_x + prev_x)) * 0.5
1363
+ else :
1364
+ # ...and not removing any
1365
+ if nobs == nrep_x:
1366
+ delta_x = 0
1367
+ mean_x = rep_x
1368
+ ssqdm_x = 0
1369
+ else :
1370
+ delta_x = val_x - mean_x
1371
+ mean_x += delta_x / nobs
1372
+ ssqdm_x += delta_x * (val_x - mean_x)
1373
+ if nobs == nrep_y:
1374
+ delta_y = 0
1375
+ mean_y = rep_y
1376
+ ssqdm_y = 0
1377
+ else :
1378
+ delta_y = val_y - mean_y
1379
+ mean_y += delta_y / nobs
1380
+ ssqdm_y += delta_y * (val_y - mean_y)
1381
+ sproddm_xy += ((val_x - mean_x) * delta_y +
1382
+ (val_y - mean_y) * delta_x) * 0.5
1383
+ elif prev_not_nan:
1384
+ # Adding no new observation, but removing one
1385
+ if nobs == nrep_x:
1386
+ delta_x = 0
1387
+ mean_x = rep_x
1388
+ ssqdm_x = 0
1389
+ else :
1390
+ delta_x = prev_x - mean_x
1391
+ mean_x -= delta_x / nobs
1392
+ ssqdm_x -= delta_x * (prev_x - mean_x)
1393
+ if nobs == nrep_y:
1394
+ delta_y = 0
1395
+ mean_y = rep_y
1396
+ ssqdm_y = 0
1397
+ else :
1398
+ delta_y = prev_y - mean_y
1399
+ mean_y -= delta_y / nobs
1400
+ ssqdm_y -= delta_y * (prev_y - mean_y)
1401
+ sproddm_xy -= ((prev_x - mean_x) * delta_y +
1402
+ (prev_y - mean_y) * delta_x) * 0.5
1403
+ # Correlation is unchanged if no observation is added or removed
1404
+
1405
+ # Finally, compute and write the rolling correlation to output
1406
+ if nobs < minp or nobs < ddof or ssqdm_x <= 0 or ssqdm_y <= 0 :
1407
+ output[i] = NaN
1408
+ elif nobs == ndup:
1409
+ output[i] = 1.0
1410
+ else :
1411
+ output[i] = sproddm_xy / sqrt(ssqdm_x * ssqdm_y)
1412
+
1413
+ return output
1414
+
1415
+
1256
1416
# ----------------------------------------------------------------------
1257
1417
# Rolling covariance
1258
1418
@@ -1261,8 +1421,8 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
1261
1421
"""
1262
1422
Numerically stable implementation using a Welford-like method.
1263
1423
"""
1264
- cdef double val_x, val_y, prev_x, prev_y, delta_x, delta_y, temp_x, temp_y
1265
- cdef double mean_x = 0 , mean_y = 0 , proddm_xy = 0
1424
+ cdef double val_x, val_y, prev_x, prev_y, delta_x, delta_y
1425
+ cdef double mean_x = 0 , mean_y = 0 , sproddm_xy = 0
1266
1426
cdef Py_ssize_t nobs = 0 , i, N = len (x)
1267
1427
cdef bint val_not_nan, prev_not_nan
1268
1428
@@ -1294,16 +1454,17 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
1294
1454
prev_y -= mean_y
1295
1455
mean_y += delta_y / nobs
1296
1456
val_y -= mean_y
1297
- proddm_xy += (delta_x * (val_y + prev_y) + delta_y * (val_x + prev_x)) / 2
1457
+ sproddm_xy += (delta_x * (val_y + prev_y) +
1458
+ delta_y * (val_x + prev_x)) * 0.5
1298
1459
else :
1299
1460
# ...and not removing any
1300
1461
nobs += 1
1301
1462
delta_x = val_x - mean_x
1302
1463
mean_x += delta_x / nobs
1303
1464
delta_y = val_y - mean_y
1304
1465
mean_y += delta_y / nobs
1305
- proddm_xy += ((val_x - mean_x) * delta_y +
1306
- (val_y - mean_y) * delta_x) * 0.5
1466
+ sproddm_xy += ((val_x - mean_x) * delta_y +
1467
+ (val_y - mean_y) * delta_x) * 0.5
1307
1468
elif prev_not_nan:
1308
1469
# Adding no new observation, but removing one
1309
1470
nobs -= 1
@@ -1312,15 +1473,15 @@ def roll_cov(ndarray[double_t] x, ndarray[double_t] y, int win, int minp,
1312
1473
mean_x -= delta_x / nobs
1313
1474
delta_y = prev_y - mean_y
1314
1475
mean_y -= delta_y / nobs
1315
- proddm_xy -= ((prev_x - mean_x) * delta_y +
1316
- (prev_y - mean_y) * delta_x) * 0.5
1476
+ sproddm_xy -= ((prev_x - mean_x) * delta_y +
1477
+ (prev_y - mean_y) * delta_x) * 0.5
1317
1478
else :
1318
- mean_x = mean_y = proddm_xy = 0
1479
+ mean_x = mean_y = sproddm_xy = 0
1319
1480
# Covariance is unchanged if no observation is added or removed
1320
1481
1321
1482
# Finally, compute and write the rolling covariance to output
1322
1483
if nobs >= minp and nobs > ddof:
1323
- output[i] = proddm_xy / (nobs - ddof)
1484
+ output[i] = sproddm_xy / (nobs - ddof)
1324
1485
else :
1325
1486
output[i] = NaN
1326
1487
@@ -1334,7 +1495,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1334
1495
"""
1335
1496
Numerically stable implementation using Welford's method.
1336
1497
"""
1337
- cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta, out
1498
+ cdef double val, prev, mean_x = 0 , ssqdm_x = 0 , nobs = 0 , delta
1338
1499
cdef Py_ssize_t i
1339
1500
cdef Py_ssize_t N = len (input )
1340
1501
@@ -1350,7 +1511,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1350
1511
# Adding one observation...
1351
1512
if prev == prev:
1352
1513
# ...and removing another
1353
- delta val - prev
1514
+ delta = val - prev
1354
1515
prev -= mean_x
1355
1516
mean_x += delta / nobs
1356
1517
val -= mean_x
@@ -1362,7 +1523,7 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1362
1523
mean_x += delta / nobs
1363
1524
ssqdm_x += delta * (val - mean_x)
1364
1525
elif prev == prev:
1365
- # adding no new observation, but removing one
1526
+ # Adding no new observation, but removing one
1366
1527
nobs -= 1
1367
1528
if nobs:
1368
1529
delta = prev - mean_x
@@ -1372,13 +1533,14 @@ def roll_var(ndarray[double_t] input, int win, int minp, int ddof=1):
1372
1533
mean_x = ssqdm_x = 0
1373
1534
# Variance is unchanged if no observation is added or removed
1374
1535
1536
+ # Sum of squared differences from the mean must be non-negative
1537
+ ssqdm_x = 0 if ssqdm_x < 0 else ssqdm_x
1538
+
1539
+ # Finally, compute and write the rolling variance to output
1375
1540
if nobs >= minp and nobs > ddof:
1376
- out = ssqdm_x / (nobs - ddof)
1377
- out = 0 if out < 0 else out
1541
+ output[i] = ssqdm_x / (nobs - ddof)
1378
1542
else :
1379
- out = NaN
1380
-
1381
- output[i] = out
1543
+ output[i] = NaN
1382
1544
1383
1545
return output
1384
1546
0 commit comments