From 7bfbe54ad180b92b2c31c994c81bc0ef1e460c0b Mon Sep 17 00:00:00 2001
From: Joe Ranalli <jar339@psu.edu>
Date: Fri, 9 Apr 2021 09:08:11 -0400
Subject: [PATCH 1/3] Add a theoretical DWT test to ensure that the sum of
 wavelets equals the original signal.

(cherry picked from commit 942cf0f59cfc445cb90fd144a82c4245733023a5)
---
 pvlib/tests/test_scaling.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/pvlib/tests/test_scaling.py b/pvlib/tests/test_scaling.py
index 09c81ae3c7..3ae5abd5be 100644
--- a/pvlib/tests/test_scaling.py
+++ b/pvlib/tests/test_scaling.py
@@ -97,6 +97,14 @@ def test_compute_wavelet_series(clear_sky_index, time,
     assert_almost_equal(wavelet[0:3, 5000:5005], expect_wavelet)
 
 
+def test_compute_wavelet_dwttheory(clear_sky_index, time,
+                                   expect_tmscale, expect_wavelet):
+    # Confirm detail coeffs sum to original signal
+    csi_series = pd.Series(clear_sky_index, index=time)
+    wavelet, tmscale = scaling._compute_wavelet(csi_series)
+    assert_almost_equal(np.sum(wavelet, 0), csi_series)
+
+
 def test_compute_wavelet_series_numindex(clear_sky_index, time,
                                          expect_tmscale, expect_wavelet):
     dtindex = pd.to_datetime(time, unit='s')

From 2c62ad719d760a207b7ec8dabcdee39927224faa Mon Sep 17 00:00:00 2001
From: Joe Ranalli <jar339@psu.edu>
Date: Fri, 9 Apr 2021 14:20:09 -0400
Subject: [PATCH 2/3] Change test expected values to self-referential derived
 outputs following the revised algorithm. Note that there's no independent
 theoretical verification for these values.

(cherry picked from commit e86ad55b1d22da64486c454dcea628d149008e10)
---
 pvlib/tests/test_scaling.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/pvlib/tests/test_scaling.py b/pvlib/tests/test_scaling.py
index 3ae5abd5be..1bf9ee8031 100644
--- a/pvlib/tests/test_scaling.py
+++ b/pvlib/tests/test_scaling.py
@@ -48,21 +48,21 @@ def positions():
 @pytest.fixture
 def expect_tmscale():
     # Expected timescales for dt = 1
-    return [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096]
+    return [2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 4096]
 
 
 @pytest.fixture
 def expect_wavelet():
-    # Expected wavelet for indices 5000:5004 for clear_sky_index above (Matlab)
-    return np.array([[-0.025, 0.05, 0., -0.05, 0.025],
-                     [0.025, 0., 0., 0., -0.025],
-                     [0., 0., 0., 0., 0.]])
+    # Expected wavelet for indices 5000:5004 for clear_sky_index above (Manual)
+    return np.array([[0., 0., 0.05, -0.1, 0.05],
+                     [0., -0.025, 0.05, 0., -0.05],
+                     [0., 0.025, 0., 0., 0.]])
 
 
 @pytest.fixture
 def expect_cs_smooth():
     # Expected smoothed clear sky index for indices 5000:5004 (Matlab)
-    return np.array([1., 1.0289, 1., 0.9711, 1.])
+    return np.array([1., 1., 1.0577, 0.9423, 1.])
 
 
 def test_latlon_to_xy_zero():

From 1fa4a8061af39a48f17c2890910253b02cfcc8ad Mon Sep 17 00:00:00 2001
From: Joe Ranalli <jar339@psu.edu>
Date: Fri, 9 Apr 2021 14:20:41 -0400
Subject: [PATCH 3/3] Updates to the _compute_wavelet algorithm to make the
 coefficients match their intended levels.

(cherry picked from commit 01fe853863cbb5fbb6fdadc507a5257668ef59dc)
---
 pvlib/scaling.py | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/pvlib/scaling.py b/pvlib/scaling.py
index b6a6caaf66..b05424e8f2 100644
--- a/pvlib/scaling.py
+++ b/pvlib/scaling.py
@@ -159,7 +159,12 @@ def latlon_to_xy(coordinates):
 
 def _compute_wavelet(clearsky_index, dt=None):
     """
-    Compute the wavelet transform on the input clear_sky time series.
+    Compute the wavelet transform on the input clear_sky time series. Uses a
+    Haar wavelet based on moving average. Output resembles a Stationary Wavelet
+    Transform (SWT), but has shifted time positions on the averages relative to
+    output from other wavelet codes. Returns one level of approximation
+    coefficient (CAn) and n levels of detail coefficients (CD1, CD2, ...,
+    CDn-1, CDn). Maximum scale is 4096 s (12 levels at a signal dt of 1s).
 
     Parameters
     ----------
@@ -174,7 +179,8 @@ def _compute_wavelet(clearsky_index, dt=None):
     Returns
     -------
     wavelet: numeric
-        The individual wavelets for the time series
+        The individual wavelets for the time series. Format follows increasing
+        scale (decreasing frequency): [CD1, CD2, ..., CDn, CAn]
 
     tmscales: numeric
         The timescales associated with the wavelets in seconds [s]
@@ -211,29 +217,31 @@ def _compute_wavelet(clearsky_index, dt=None):
     min_tmscale = np.ceil(np.log(dt)/np.log(2))  # Minimum wavelet timescale
     max_tmscale = int(12 - min_tmscale)  # maximum wavelet timescale
 
-    tmscales = np.zeros(max_tmscale)
-    csi_mean = np.zeros([max_tmscale, len(cs_long)])
-    # Loop for all time scales we will consider
-    for i in np.arange(0, max_tmscale):
-        j = i+1
-        tmscales[i] = 2**j * dt  # Wavelet integration time scale
-        intvlen = 2**j  # Wavelet integration time series interval
+    # Compute approximation coefficients via moving average (Haar wavelet)
+    csi_mean = np.zeros([max_tmscale+1, len(cs_long)])
+    csi_mean[0, :] = cs_long.values.flatten()  # CA0, the original signal
+    for i in np.arange(1, max_tmscale+1):
+        intvlen = 2**i  # Wavelet integration time series interval
         # Rolling average, retains only lower frequencies than interval
         df = cs_long.rolling(window=intvlen, center=True, min_periods=1).mean()
         # Fill nan's in both directions
         df = df.fillna(method='bfill').fillna(method='ffill')
         # Pop values back out of the dataframe and store
-        csi_mean[i, :] = df.values.flatten()
+        csi_mean[i, :] = df.values.flatten()  # Approximation Coeffs CA0,CA1,...
 
-    # Calculate the wavelets by isolating the rolling mean frequency ranges
+    # Calculate detail coeffs by difference between approximation levels
     wavelet_long = np.zeros(csi_mean.shape)
-    for i in np.arange(0, max_tmscale-1):
+    tmscales = np.zeros(max_tmscale+1)
+    for i in np.arange(0, max_tmscale):
+        # Compute detail coeffs, beginning with CD1 through CDn
         wavelet_long[i, :] = csi_mean[i, :] - csi_mean[i+1, :]
-    wavelet_long[max_tmscale-1, :] = csi_mean[max_tmscale-1, :]  # Lowest freq
+        tmscales[i] = 2**(i+1) * dt  # Wavelet integration time scale
+    wavelet_long[max_tmscale, :] = csi_mean[max_tmscale, :]  # Lowest freq (CAn)
+    tmscales[max_tmscale] = tmscales[max_tmscale-1]  # CAn and CDn share scale
 
     # Clip off the padding and just return the original time window
-    wavelet = np.zeros([max_tmscale, len(vals)])
-    for i in np.arange(0, max_tmscale):
-        wavelet[i, :] = wavelet_long[i, len(vals)+1: 2*len(vals)+1]
+    wavelet = np.zeros([max_tmscale+1, len(vals)])
+    for i in np.arange(0, max_tmscale+1):
+        wavelet[i, :] = wavelet_long[i, len(vals): 2*len(vals)]
 
     return wavelet, tmscales