Implemented waveform alignment algorithm.

kblaschke · kblaschke · commit 36fb09ce3091 · 2023-11-24T21:00:40.000+01:00
Also consolidated the waveform sample count constant to keep it aligned over all classes.
diff --git a/src/libprojectM/Audio/AudioConstants.hpp b/src/libprojectM/Audio/AudioConstants.hpp
@@ -1,10 +1,16 @@
 #pragma once
 
+#include <array>
+
 namespace libprojectM {
 namespace Audio {
 
-static constexpr int WaveformSamples = 576; //!< Number of waveform data samples available for rendering a frame.
-static constexpr int SpectrumSamples = 512; //!< Number of spectrum analyzer samples.
+static constexpr int AudioBufferSamples = 576; //!< Number of waveform data samples stored in the buffer for analysis.
+static constexpr int WaveformSamples = 480;    //!< Number of waveform data samples available for rendering a frame.
+static constexpr int SpectrumSamples = 512;    //!< Number of spectrum analyzer samples.
+
+using WaveformBuffer = std::array<float, AudioBufferSamples>; //!< Buffer with waveform data. Only the first WaveformSamples number of samples are valid.
+using SpectrumBuffer = std::array<float, SpectrumSamples>;    //!< Buffer with spectrum data.
 
 } // namespace Audio
 } // namespace libprojectM
diff --git a/src/libprojectM/Audio/CMakeLists.txt b/src/libprojectM/Audio/CMakeLists.txt
@@ -9,6 +9,8 @@ add_library(Audio OBJECT
         PCM.hpp
         Loudness.cpp
         Loudness.hpp
+        WaveformAligner.cpp
+        WaveformAligner.hpp
         )
 
 target_include_directories(Audio
diff --git a/src/libprojectM/Audio/PCM.cpp b/src/libprojectM/Audio/PCM.cpp
@@ -21,7 +21,7 @@ void PCM::AddToBuffer(
 
     for (size_t i = 0; i < sampleCount; i++)
     {
-        size_t const bufferOffset = (m_start + i) % WaveformSamples;
+        size_t const bufferOffset = (m_start + i) % AudioBufferSamples;
         m_inputBufferL[bufferOffset] = 128.0f * (static_cast<float>(samples[0 + i * channels]) - float(signalOffset)) / float(signalAmplitude);
         if (channels > 1)
         {
@@ -32,7 +32,7 @@ void PCM::AddToBuffer(
             m_inputBufferR[bufferOffset] = m_inputBufferL[bufferOffset];
         }
     }
-    m_start = (m_start + sampleCount) % WaveformSamples;
+    m_start = (m_start + sampleCount) % AudioBufferSamples;
 }
 
 void PCM::Add(float const* const samples, uint32_t channels, size_t const count)
@@ -53,16 +53,19 @@ void PCM::UpdateFrameAudioData(double secondsSinceLastFrame, uint32_t frame)
     // 1. Copy audio data from input buffer
     CopyNewWaveformData();
 
-    // 2. Align waveforms
-
-    // 3. Update spectrum analyzer data for both channels
+    // 2. Update spectrum analyzer data for both channels
     UpdateFftChannel(0);
     UpdateFftChannel(1);
 
+    // 3. Align waveforms
+    m_alignL.Align(m_waveformL);
+    m_alignR.Align(m_waveformR);
+
     // 4. Update beat detection values
     m_bass.Update(m_spectrumL, secondsSinceLastFrame, frame);
     m_middles.Update(m_spectrumL, secondsSinceLastFrame, frame);
     m_treble.Update(m_spectrumL, secondsSinceLastFrame, frame);
+
 }
 
 auto PCM::GetFrameAudioData() const -> FrameAudioData
@@ -92,14 +95,14 @@ void PCM::UpdateFftChannel(size_t const channel)
 {
     assert(channel == 0 || channel == 1);
 
-    std::vector<float> waveformSamples(WaveformSamples);
+    std::vector<float> waveformSamples(AudioBufferSamples);
     std::vector<float> spectrumValues;
 
     auto const& from = channel == 0 ? m_waveformL : m_waveformR;
     auto& spectrum = channel == 0 ? m_spectrumL : m_spectrumR;
 
     size_t oldI{0};
-    for (size_t i = 0; i < WaveformSamples; i++)
+    for (size_t i = 0; i < AudioBufferSamples; i++)
     {
         // Damp the input into the FFT a bit, to reduce high-frequency noise:
         waveformSamples[i] = 0.5f * (from[i] + from[oldI]);
@@ -114,11 +117,11 @@ void PCM::UpdateFftChannel(size_t const channel)
 void PCM::CopyNewWaveformData()
 {
     const auto& copyChannel =
-        [](size_t start, const std::array<float, WaveformSamples>& inputSamples, std::array<float, WaveformSamples>& outputSamples)
+        [](size_t start, const std::array<float, AudioBufferSamples>& inputSamples, std::array<float, AudioBufferSamples>& outputSamples)
     {
-        for (size_t i = 0; i < WaveformSamples; i++)
+        for (size_t i = 0; i < AudioBufferSamples; i++)
         {
-            outputSamples[i] = inputSamples[(start + i) % WaveformSamples];
+            outputSamples[i] = inputSamples[(start + i) % AudioBufferSamples];
         }
     };
 
@@ -127,5 +130,6 @@ void PCM::CopyNewWaveformData()
     copyChannel(bufferStartIndex, m_inputBufferR, m_waveformR);
 }
 
+
 } // namespace Audio
 } // namespace libprojectM
diff --git a/src/libprojectM/Audio/PCM.hpp b/src/libprojectM/Audio/PCM.hpp
@@ -8,14 +8,13 @@
 #pragma once
 
 #include "AudioConstants.hpp"
-
 #include "FrameAudioData.hpp"
 #include "Loudness.hpp"
 #include "MilkdropFFT.hpp"
+#include "WaveformAligner.hpp"
 
 #include <projectM-4/projectM_export.h>
 
-#include <array>
 #include <atomic>
 #include <cstdint>
 #include <cstdlib>
@@ -89,20 +88,24 @@ class PROJECTM_EXPORT PCM
     void CopyNewWaveformData();
 
     // External input buffer
-    std::array<float, WaveformSamples> m_inputBufferL{0.f}; //!< Circular buffer for left-channel PCM data.
-    std::array<float, WaveformSamples> m_inputBufferR{0.f}; //!< Circular buffer for right-channel PCM data.
-    std::atomic<size_t> m_start{0};                         //!< Circular buffer start index.
+    WaveformBuffer m_inputBufferL{0.f}; //!< Circular buffer for left-channel PCM data.
+    WaveformBuffer m_inputBufferR{0.f}; //!< Circular buffer for right-channel PCM data.
+    std::atomic<size_t> m_start{0};     //!< Circular buffer start index.
 
     // Frame waveform data
-    std::array<float, WaveformSamples> m_waveformL{0.f}; //!< Left-channel waveform data, aligned.
-    std::array<float, WaveformSamples> m_waveformR{0.f}; //!< Right-channel waveform data, aligned.
+    WaveformBuffer m_waveformL{0.f}; //!< Left-channel waveform data, aligned. Only the first WaveformSamples number of samples are valid.
+    WaveformBuffer m_waveformR{0.f}; //!< Right-channel waveform data, aligned. Only the first WaveformSamples number of samples are valid.
 
     // Frame spectrum data
-    std::array<float, SpectrumSamples> m_spectrumL{0.f}; //!< Left-channel spectrum data.
-    std::array<float, SpectrumSamples> m_spectrumR{0.f}; //!< Right-channel spectrum data.
+    SpectrumBuffer m_spectrumL{0.f}; //!< Left-channel spectrum data.
+    SpectrumBuffer m_spectrumR{0.f}; //!< Right-channel spectrum data.
 
     MilkdropFFT m_fft{WaveformSamples, SpectrumSamples, true}; //!< Spectrum analyzer instance.
 
+    // Alignment data
+    WaveformAligner m_alignL; //!< Left-channel waveform alignment.
+    WaveformAligner m_alignR; //!< Left-channel waveform alignment.
+
     // Frame beat detection values
     Loudness m_bass{Loudness::Band::Bass};       //!< Beat detection/volume for the "bass" band.
     Loudness m_middles{Loudness::Band::Middles}; //!< Beat detection/volume for the "middles" band.
diff --git a/src/libprojectM/Audio/WaveformAligner.cpp b/src/libprojectM/Audio/WaveformAligner.cpp
@@ -0,0 +1,177 @@
+#include "WaveformAligner.hpp"
+
+#include <cmath>
+#include <iterator>
+
+namespace libprojectM {
+namespace Audio {
+
+WaveformAligner::WaveformAligner()
+{
+    static const size_t maxOctaves{10};
+    static const size_t numOctaves{static_cast<size_t>(std::floor(std::log(AudioBufferSamples - WaveformSamples) / std::log(2.0f)))};
+    m_octaves = numOctaves > maxOctaves ? maxOctaves : numOctaves;
+
+    m_aligmentWeights.resize(m_octaves);
+    m_firstNonzeroWeights.resize(m_octaves);
+    m_lastNonzeroWeights.resize(m_octaves);
+    m_octaveSamples.resize(m_octaves);
+    m_octaveSampleSpacing.resize(m_octaves);
+    m_oldWaveformMips.resize(m_octaves);
+
+    m_octaveSamples[0] = AudioBufferSamples;
+    m_octaveSampleSpacing[0] = AudioBufferSamples - WaveformSamples;
+    for (size_t octave = 1; octave < m_octaves; octave++)
+    {
+        m_octaveSamples[octave] = m_octaveSamples[octave - 1] / 2;
+        m_octaveSampleSpacing[octave] = m_octaveSampleSpacing[octave - 1] / 2;
+    }
+}
+
+void WaveformAligner::Align(WaveformBuffer& newWaveform)
+{
+    if (m_octaves < 4)
+    {
+        return;
+    }
+
+    int alignOffset{};
+
+    std::vector<WaveformBuffer> newWaveformMips(m_octaves, WaveformBuffer());
+    std::copy(newWaveform.begin(), newWaveform.end(), newWaveformMips[0].begin());
+
+    // Calculate mip levels
+    for (size_t octave = 1; octave < m_octaves; octave++)
+    {
+        for (size_t sample = 0; sample < m_octaveSamples[octave]; sample++)
+        {
+            newWaveformMips[octave][sample] = 0.5f * (newWaveformMips[octave - 1][sample * 2] + newWaveformMips[octave - 1][sample * 2 + 1]);
+        }
+    }
+
+    if (!m_alignWaveReady)
+    {
+        m_alignWaveReady = true;
+        for (size_t octave = 0; octave < m_octaves; octave++)
+        {
+            // For example:
+            //  m_octaveSampleSpacing[octave] == 4
+            //  m_octaveSamples[octave] == 36
+            //  (so we test 32 samples, w/4 offsets)
+            size_t const compareSamples = m_octaveSamples[octave] - m_octaveSampleSpacing[octave];
+
+            for (size_t sample = 0; sample < compareSamples; sample++)
+            {
+                auto& tempVal = m_aligmentWeights[octave][sample];
+
+                // Start with pyramid-shaped PDF, from 0..1..0
+                if (sample < compareSamples / 2)
+                {
+                    tempVal = static_cast<float>(sample * 2) / static_cast<float>(compareSamples);
+                }
+                else
+                {
+                    tempVal = static_cast<float>((compareSamples - 1 - sample) * 2) / static_cast<float>(compareSamples);
+                }
+
+                // TWEAK how much the center matters, vs. the edges:
+                tempVal = (tempVal - 0.8f) * 5.0f + 0.8f;
+
+                // Clip
+                if (tempVal > 1.0f)
+                {
+                    tempVal = 1.0f;
+                }
+                if (tempVal < 0.0f)
+                {
+                    tempVal = 0.0f;
+                }
+            }
+
+            size_t sample{};
+            while (m_aligmentWeights[octave][sample] == 0 && sample < compareSamples)
+            {
+                sample++;
+            }
+            m_firstNonzeroWeights[octave] = sample;
+
+            sample = compareSamples - 1;
+            while (m_aligmentWeights[octave][sample] == 0 && sample >= 0)
+            {
+                sample--;
+            }
+            m_lastNonzeroWeights[octave] = sample;
+        }
+    }
+
+    int sample1{};
+    int sample2{static_cast<int>(m_octaveSampleSpacing[m_octaves - 1])};
+
+    // Find best match for alignment
+    for (int octave = static_cast<int>(m_octaves) - 1; octave >= 0; octave--)
+    {
+        int lowestErrorOffset{-1};
+        float lowestErrorAmount{};
+
+        for (int sample = sample1; sample < sample2; sample++)
+        {
+            float errorSum{};
+
+            for (size_t i = m_firstNonzeroWeights[octave]; i <= m_lastNonzeroWeights[octave]; i++)
+            {
+                errorSum += std::abs((newWaveformMips[octave][i + sample] - m_oldWaveformMips[octave][i + sample]) * m_aligmentWeights[octave][i]);
+            }
+
+            if (lowestErrorOffset == -1 || errorSum < lowestErrorAmount)
+            {
+                lowestErrorOffset = static_cast<int>(sample);
+                lowestErrorAmount = errorSum;
+            }
+        }
+
+        // Now use 'lowestErrorOffset' to guide bounds of search in next octave:
+        //  m_octaveSampleSpacing[octave] == 8
+        //  m_octaveSamples[octave] == 72
+        //     -say 'lowestErrorOffset' was 2
+        //     -that corresponds to samples 4 & 5 of the next octave
+        //     -also, expand about this by 2 samples?  YES.
+        //  (so we'd test 64 samples, w/8->4 offsets)
+        if (octave > 0)
+        {
+            sample1 = lowestErrorOffset * 2 - 1;
+            sample2 = lowestErrorOffset * 2 + 2 + 1;
+            if (sample1 < 0)
+            {
+                sample1 = 0;
+            }
+            if (sample2 > static_cast<int>(m_octaveSampleSpacing[octave - 1]))
+            {
+                sample2 = static_cast<int>(m_octaveSampleSpacing[octave - 1]);
+            }
+        }
+        else
+        {
+            alignOffset = lowestErrorOffset;
+        }
+    }
+
+    // Store mip levels for the next frame.
+    m_oldWaveformMips.clear();
+    std::copy(newWaveformMips.begin(), newWaveformMips.end(), std::back_inserter(m_oldWaveformMips));
+
+    // Finally, apply the results by scooting the aligned samples so that they start at index 0.
+    if (alignOffset > 0)
+    {
+        for (size_t sample = 0; sample < WaveformSamples; sample++)
+        {
+            newWaveform[sample] = newWaveform[sample + alignOffset];
+        }
+
+        // Set remaining samples to zero.
+        std::fill_n(newWaveform.begin() + WaveformSamples, AudioBufferSamples - WaveformSamples, 0.0f);
+    }
+}
+
+
+} // namespace Audio
+} // namespace libprojectM
diff --git a/src/libprojectM/Audio/WaveformAligner.hpp b/src/libprojectM/Audio/WaveformAligner.hpp
@@ -0,0 +1,55 @@
+/**
+ * @file WaveformAligner.hpp
+ * @brief Mip-based waveform alignment algorithm
+ *
+ * Calculates the absolute error between the previous and current waveforms over several octaves
+ * and sample offsets, then shifts the new waveform forward to best align with the previous frame.
+ * This will keep similar features in-place instead of randomly jumping around on each frame and creates
+ * for a smoother-looking waveform visualization.
+ */
+
+#pragma once
+
+#include "AudioConstants.hpp"
+
+#include <cstddef>
+#include <vector>
+
+namespace libprojectM {
+namespace Audio {
+
+/**
+ * @brief Mip-based waveform alignment algorithm
+ *
+ * Calculates the absolute error between the previous and current waveforms over several octaves
+ * and sample offsets, then shifts the new waveform forward to best align with the previous frame.
+ * This will keep similar features in-place instead of randomly jumping around on each frame and creates
+ * for a smoother-looking waveform visualization.
+ */
+class WaveformAligner
+{
+public:
+    WaveformAligner();
+
+    /**
+     * @brief Aligns waveforms to a best-fit match to the previous frame.
+     * @param[in,out] newWaveform The new waveform to be aligned.
+     */
+    void Align(WaveformBuffer& newWaveform);
+
+private:
+    bool m_alignWaveReady{false}; //!< Alignment needs special treatment for the first buffer fill.
+
+    std::vector<std::array<float, AudioBufferSamples>> m_aligmentWeights; //!< Sample weights per octave.
+
+    size_t m_octaves{};                        //!< Number of mip-levels/octaves.
+    std::vector<size_t> m_octaveSamples;       //!< Samples per octave.
+    std::vector<size_t> m_octaveSampleSpacing; //!< Space between samples per octave.
+
+    std::vector<WaveformBuffer> m_oldWaveformMips; //!< Mip levels of the previous frame's waveform.
+    std::vector<size_t> m_firstNonzeroWeights;     //!< First non-zero weight sample index for each octave.
+    std::vector<size_t> m_lastNonzeroWeights;      //!< Last non-zero weight sample index for each octave.
+};
+
+} // namespace Audio
+} // namespace libprojectM
diff --git a/src/libprojectM/MilkdropPreset/Constants.hpp b/src/libprojectM/MilkdropPreset/Constants.hpp
@@ -12,5 +12,4 @@ static constexpr int TVarCount = 8; //!< Number of T variables available.
 static constexpr int CustomWaveformCount = 4; //!< Number of custom waveforms (expression-driven) which can be used in a preset.
 static constexpr int CustomShapeCount = 4; //!< Number of custom shapes (expression-driven) which can be used in a preset.
 
-static constexpr int RenderWaveformSamples = 480; //!< Number of custom waveform data samples available for rendering a frame.
 static constexpr int WaveformMaxPoints = 512; //!< Maximum number of waveform points.
diff --git a/src/libprojectM/MilkdropPreset/CustomWaveform.cpp b/src/libprojectM/MilkdropPreset/CustomWaveform.cpp
diff --git a/src/libprojectM/MilkdropPreset/Waveform.cpp b/src/libprojectM/MilkdropPreset/Waveform.cpp
diff --git a/tests/libprojectM/PCMTest.cpp b/tests/libprojectM/PCMTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -9,6 +9,8 @@ add_library(Audio OBJECT`
`9`	`9`	`PCM.hpp`
`10`	`10`	`Loudness.cpp`
`11`	`11`	`Loudness.hpp`
	`12`	`+ WaveformAligner.cpp`
	`13`	`+ WaveformAligner.hpp`
`12`	`14`	`)`
`13`	`15`
`14`	`16`	`target_include_directories(Audio`
Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ void PCM::AddToBuffer(`
`21`	`21`
`22`	`22`	`for (size_t i = 0; i < sampleCount; i++)`
`23`	`23`	`{`
`24`		`- size_t const bufferOffset = (m_start + i) % WaveformSamples;`
	`24`	`+ size_t const bufferOffset = (m_start + i) % AudioBufferSamples;`
`25`	`25`	`m_inputBufferL[bufferOffset] = 128.0f * (static_cast<float>(samples[0 + i * channels]) - float(signalOffset)) / float(signalAmplitude);`
`26`	`26`	`if (channels > 1)`
`27`	`27`	`{`
`@@ -32,7 +32,7 @@ void PCM::AddToBuffer(`
`32`	`32`	`m_inputBufferR[bufferOffset] = m_inputBufferL[bufferOffset];`
`33`	`33`	`}`
`34`	`34`	`}`
`35`		`- m_start = (m_start + sampleCount) % WaveformSamples;`
	`35`	`+ m_start = (m_start + sampleCount) % AudioBufferSamples;`
`36`	`36`	`}`
`37`	`37`
`38`	`38`	`void PCM::Add(float const* const samples, uint32_t channels, size_t const count)`
`@@ -53,16 +53,19 @@ void PCM::UpdateFrameAudioData(double secondsSinceLastFrame, uint32_t frame)`
`53`	`53`	`// 1. Copy audio data from input buffer`
`54`	`54`	`CopyNewWaveformData();`
`55`	`55`
`56`		`- // 2. Align waveforms`
`57`		`-`
`58`		`- // 3. Update spectrum analyzer data for both channels`
	`56`	`+ // 2. Update spectrum analyzer data for both channels`
`59`	`57`	`UpdateFftChannel(0);`
`60`	`58`	`UpdateFftChannel(1);`
`61`	`59`
	`60`	`+ // 3. Align waveforms`
	`61`	`+ m_alignL.Align(m_waveformL);`
	`62`	`+ m_alignR.Align(m_waveformR);`
	`63`	`+`
`62`	`64`	`// 4. Update beat detection values`
`63`	`65`	`m_bass.Update(m_spectrumL, secondsSinceLastFrame, frame);`
`64`	`66`	`m_middles.Update(m_spectrumL, secondsSinceLastFrame, frame);`
`65`	`67`	`m_treble.Update(m_spectrumL, secondsSinceLastFrame, frame);`
	`68`	`+`
`66`	`69`	`}`
`67`	`70`
`68`	`71`	`auto PCM::GetFrameAudioData() const -> FrameAudioData`
`@@ -92,14 +95,14 @@ void PCM::UpdateFftChannel(size_t const channel)`
`92`	`95`	`{`
`93`	`96`	`assert(channel == 0 \|\| channel == 1);`
`94`	`97`
`95`		`- std::vector<float> waveformSamples(WaveformSamples);`
	`98`	`+ std::vector<float> waveformSamples(AudioBufferSamples);`
`96`	`99`	`std::vector<float> spectrumValues;`
`97`	`100`
`98`	`101`	`auto const& from = channel == 0 ? m_waveformL : m_waveformR;`
`99`	`102`	`auto& spectrum = channel == 0 ? m_spectrumL : m_spectrumR;`
`100`	`103`
`101`	`104`	`size_t oldI{0};`
`102`		`- for (size_t i = 0; i < WaveformSamples; i++)`
	`105`	`+ for (size_t i = 0; i < AudioBufferSamples; i++)`
`103`	`106`	`{`
`104`	`107`	`// Damp the input into the FFT a bit, to reduce high-frequency noise:`
`105`	`108`	`waveformSamples[i] = 0.5f * (from[i] + from[oldI]);`
`@@ -114,11 +117,11 @@ void PCM::UpdateFftChannel(size_t const channel)`
`114`	`117`	`void PCM::CopyNewWaveformData()`
`115`	`118`	`{`
`116`	`119`	`const auto& copyChannel =`
`117`		`- [](size_t start, const std::array<float, WaveformSamples>& inputSamples, std::array<float, WaveformSamples>& outputSamples)`
	`120`	`+ [](size_t start, const std::array<float, AudioBufferSamples>& inputSamples, std::array<float, AudioBufferSamples>& outputSamples)`
`118`	`121`	`{`
`119`		`- for (size_t i = 0; i < WaveformSamples; i++)`
	`122`	`+ for (size_t i = 0; i < AudioBufferSamples; i++)`
`120`	`123`	`{`
`121`		`- outputSamples[i] = inputSamples[(start + i) % WaveformSamples];`
	`124`	`+ outputSamples[i] = inputSamples[(start + i) % AudioBufferSamples];`
`122`	`125`	`}`
`123`	`126`	`};`
`124`	`127`
`@@ -127,5 +130,6 @@ void PCM::CopyNewWaveformData()`
`127`	`130`	`copyChannel(bufferStartIndex, m_inputBufferR, m_waveformR);`
`128`	`131`	`}`
`129`	`132`
	`133`	`+`
`130`	`134`	`} // namespace Audio`
`131`	`135`	`} // namespace libprojectM`