added spectral_coverage and filter_width_factor

DrMarc · DrMarc · commit cc885927303c · 2025-04-06T10:44:17.000+02:00
diff --git a/docs/sounds.rst b/docs/sounds.rst
@@ -215,10 +215,12 @@ existing matplotlib.pyplot axis supplied with the :attr:`axis` argument.
 .. _spectral_features:
 
 You can also extract common features from sounds, such as the :meth:`.crest_factor` (a measure of how 'peaky'
-the waveform is), or the average :meth:`.onset_slope` (a measure of how fast the on-ramps in the sound are---important
-for sound localization). Features of the spectral content are bundled in the :meth:`.spectral_feature` method.
-It can compute spectral centroid, flux, flatness, and rolloff, either for an entire sound (suitable for stationary
-sounds), or for successive time windows (frames, suitable for time-varying sounds).
+the waveform is), the average :meth:`.onset_slope` (a measure of how fast the on-ramps in the sound are---important
+for sound localization), or the :meth:`.spectral_coverage` (the fraction of the spectrogram containing energy as a measure of the masking ability of a sound).
+
+Features of the spectral content are bundled in the :meth:`.spectral_feature` method. It can compute spectral
+centroid, flux, flatness, and rolloff, either for an entire sound (suitable for stationary sounds), or for
+successive time windows (frames, suitable for time-varying sounds).
 * The centroid is a measure of the center of mass of a spectrum (i.e. the 'center' frequency).
 * The flux measures how quickly the power spectrum is changing by comparing the power spectrum for one frame against the
 power spectrum from the previous frame; flatness measures how tone-like a sound is, as opposed to being noise-like, and
diff --git a/slab/filter.py b/slab/filter.py
@@ -296,8 +296,7 @@ def tf(self, channels='all', n_bins=None, show=True, axis=None):
             return w, h
 
     @staticmethod
-    # TODO: oversampling factor needed for cochleagram!
-    def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, pass_bands=False, n_filters=None, samplerate=None):
+    def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, pass_bands=False, n_filters=None, filter_width_factor=1, samplerate=None):
         """
         Generate a set of Fourier filters. Each filter's transfer function is given by the positive phase of a
         cosine wave. The amplitude of the cosine is that filters central frequency. Following the organization of the
@@ -314,8 +313,9 @@ def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, p
             pass_bands (bool): Whether to include a half cosine at the filter bank's lower and upper edge frequency.
                 If True, allows reconstruction of original bandwidth when collapsing subbands.
             n_filters (int | None): Number of filters. When this is not None, the `bandwidth` argument is ignored.
+            filter_width_factor (float): Multiplier for the width of the filters. Default is 1; use smaller values to make the filter coverage sparser (undersampled) and larger values to make it denser (oversampled).
             samplerate (int | None): the samplerate of the sound that the filter shall be applied to.
-                If None, use the default samplerate.s
+                If None, use the default samplerate.
         Examples::
 
             sig = slab.Sound.pinknoise(samplerate=44100)
@@ -335,14 +335,15 @@ def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, p
         n_freqs = len(freq_bins)
         center_freqs, bandwidth, erb_spacing = Filter._center_freqs(
             low_cutoff=low_cutoff, high_cutoff=high_cutoff, bandwidth=bandwidth, pass_bands=pass_bands, n_filters=n_filters)
+        erb_spacing = erb_spacing * filter_width_factor
         n_filters = len(center_freqs)
         filts = numpy.zeros((n_freqs, n_filters))
         freqs_erb = Filter._freq2erb(freq_bins)
         for i in range(n_filters):
             l = center_freqs[i] - erb_spacing
             h = center_freqs[i] + erb_spacing
             avg = center_freqs[i]  # center of filter
-            width = erb_spacing * 2  # width of filter
+            width = erb_spacing * 2 # width of filter
             filts[(freqs_erb > l) & (freqs_erb < h), i] = numpy.cos(
                 (freqs_erb[(freqs_erb > l) & (freqs_erb < h)] - avg) / width * numpy.pi)
         return Filter(data=filts, samplerate=samplerate, fir='TF')
diff --git a/slab/sound.py b/slab/sound.py
@@ -1180,7 +1180,7 @@ def spectrogram(self, window_dur=0.005, dyn_range=120, upper_frequency=None, oth
         else:
             return freqs, times, power
 
-    def cochleagram(self, bandwidth=1 / 5, n_bands=None, show=True, axis=None):
+    def cochleagram(self, bandwidth=1/5, n_bands=None, filter_width_factor=1, show=True, axis=None):
         """
         Computes a cochleagram of the sound by filtering with a bank of cosine-shaped filters
         and applying a cube-root compression to the resulting envelopes. The number of bands
@@ -1191,16 +1191,19 @@ def cochleagram(self, bandwidth=1 / 5, n_bands=None, show=True, axis=None):
             bandwidth (float): filter bandwidth in octaves.
             n_bands (int | None): number of bands in the cochleagram. If this is not
                 None, the `bandwidth` argument is ignored.
-            show (bool): whether to show the plot right after drawing. Note that if show is False and no `axis` is
-                passed, no plot will be created
+            filter_width_factor (float): Default 1; use higher values to make the filter coverage
+                denser (oversampled).
+            show (bool): whether to show the plot right after drawing. Note that if show is False
+                and no `axis` is passed, no plot will be created
             axis (matplotlib.axes.Axes | None): axis to plot to. If None create a new plot.
         Returns:
-            (None | numpy.ndarray): If `show == True` or an axis was passed, a plot is drawn and nothing is returned.
-                Else, an array with the envelope is returned.
+            (None | numpy.ndarray): If `show == True` or an axis was passed, a plot is drawn and
+                nothing is returned. Else, an array with the envelope is returned.
         """
         fbank = Filter.cos_filterbank(bandwidth=bandwidth, low_cutoff=20,
                                       high_cutoff=None, n_filters=n_bands,
-                                      samplerate=self.samplerate)
+                                      filter_width_factor=filter_width_factor,
+                                      pass_bands=True, samplerate=self.samplerate)
         freqs = fbank.filter_bank_center_freqs()
         subbands = fbank.apply(self.channel(0))
         envs = subbands.envelope()
@@ -1342,24 +1345,30 @@ def spectral_feature(self, feature='centroid', mean='rms', frame_duration=None,
             out_all = Signal(data=out_all, samplerate=self.samplerate)  # cast as Signal
         return out_all
 
-    def vocode(self, bandwidth=1 / 3):
+    def vocode(self, bandwidth=1/3, filter_width_factor=1):
         """
         Returns a noise vocoded version of the sound by computing the envelope in different frequency subbands,
         filling these envelopes with noise, and collapsing the subbands into one sound. This removes most spectral
         information but retains temporal information in a speech sound.
 
         Arguments:
             bandwidth (float): width of the subbands in octaves.
+            filter_width_factor (float): Multiplier for the width of the filters. Default is 1;
+                use smaller values to make the filter coverage sparser and larger values to make
+                it denser. Intended to keep energetic masking constant when changing bandwidth.
         Returns:
             (slab.Sound): a vocoded copy of the sound.
         """
-        fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth,
-                                      low_cutoff=30, pass_bands=True, samplerate=self.samplerate)
+        fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth, low_cutoff=30,
+                                      pass_bands=True, samplerate=self.samplerate)
         subbands = fbank.apply(self.channel(0))
         envs = subbands.envelope()
         envs.data[envs.data < 1e-9] = 0  # remove small values that cause waring with numpy.power
         noise = Sound.whitenoise(duration=self.n_samples,
                                  samplerate=self.samplerate)  # make white noise
+        fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth, low_cutoff=30,
+                                      pass_bands=True, filter_width_factor=filter_width_factor,
+                                      samplerate=self.samplerate)
         subbands_noise = fbank.apply(noise)  # divide into same subbands as sound
         subbands_noise *= envs  # apply envelopes
         subbands_noise.level = subbands.level
@@ -1402,6 +1411,36 @@ def onset_slope(self):
         norm = hist / hist.sum()  # normalize histogram so that it sums to 1
         return numpy.sum(bin_centers * norm)  # compute centroid of histogram
 
+    def spectral_coverage(self, threshold=-50, low_cutoff=20, high_cutoff=None):
+        """
+        Computes the fraction of a sounds spectrogram bins which exceed a certain threshold
+        relative to the the sound's rms level. The default threshold is -50 dB.
+        When threshold is set to 'otsu', a value is automatically determined to optimally
+        split the spectrogram level histogram using Otsu's method [Otsu 1979, IEEE].
+
+        Arguments:
+            threshold (int | float): threshold for 'foreground' regions in the cochleagram. Default -50.
+            low_cut_off (int | float): lower frequency edge of the spectrum to be taken into account.
+            high_cutoff (int | float): higher frequency edge of the spectrum to be taken into account.
+
+        Returns:
+            (float): The spectral coverage provided by the sound between 0 and 1, 1 indicating full coverage.
+        """
+        def otsu_var(data,th): # helper function to compute Otsu interclass variance
+            return numpy.nansum(
+                [numpy.mean(cls) * numpy.var(data,where=cls) for cls in [data>=th,data<th]])
+        fbank = Filter.cos_filterbank(low_cutoff=low_cutoff, high_cutoff=high_cutoff,
+                                      filter_width_factor=0.75,
+                                      pass_bands=True, samplerate=self.samplerate)
+        subbands = fbank.apply(self.channel(0))
+        envs = subbands.envelope(kind='dB').data
+        if threshold == 'otsu':
+            threshold = min(
+                range(int(numpy.min(envs)) + 1, int(numpy.max(envs))),
+                key=lambda th: otsu_var(envs,th) )
+        coverage = numpy.where(envs>threshold,1,0).sum()/envs.size
+        return coverage
+
     def frames(self, duration=1024):
         """
         A generator that steps through the sound in overlapping, windowed frames.