Skip to content

Commit cc88592

Browse files
committed
added spectral_coverage and filter_width_factor
1 parent 0d10322 commit cc88592

File tree

3 files changed

+59
-17
lines changed

3 files changed

+59
-17
lines changed

docs/sounds.rst

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -215,10 +215,12 @@ existing matplotlib.pyplot axis supplied with the :attr:`axis` argument.
215215
.. _spectral_features:
216216

217217
You can also extract common features from sounds, such as the :meth:`.crest_factor` (a measure of how 'peaky'
218-
the waveform is), or the average :meth:`.onset_slope` (a measure of how fast the on-ramps in the sound are---important
219-
for sound localization). Features of the spectral content are bundled in the :meth:`.spectral_feature` method.
220-
It can compute spectral centroid, flux, flatness, and rolloff, either for an entire sound (suitable for stationary
221-
sounds), or for successive time windows (frames, suitable for time-varying sounds).
218+
the waveform is), the average :meth:`.onset_slope` (a measure of how fast the on-ramps in the sound are---important
219+
for sound localization), or the :meth:`.spectral_coverage` (the fraction of the spectrogram containing energy as a measure of the masking ability of a sound).
220+
221+
Features of the spectral content are bundled in the :meth:`.spectral_feature` method. It can compute spectral
222+
centroid, flux, flatness, and rolloff, either for an entire sound (suitable for stationary sounds), or for
223+
successive time windows (frames, suitable for time-varying sounds).
222224
* The centroid is a measure of the center of mass of a spectrum (i.e. the 'center' frequency).
223225
* The flux measures how quickly the power spectrum is changing by comparing the power spectrum for one frame against the
224226
power spectrum from the previous frame; flatness measures how tone-like a sound is, as opposed to being noise-like, and

slab/filter.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -296,8 +296,7 @@ def tf(self, channels='all', n_bins=None, show=True, axis=None):
296296
return w, h
297297

298298
@staticmethod
299-
# TODO: oversampling factor needed for cochleagram!
300-
def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, pass_bands=False, n_filters=None, samplerate=None):
299+
def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, pass_bands=False, n_filters=None, filter_width_factor=1, samplerate=None):
301300
"""
302301
Generate a set of Fourier filters. Each filter's transfer function is given by the positive phase of a
303302
cosine wave. The amplitude of the cosine is that filters central frequency. Following the organization of the
@@ -314,8 +313,9 @@ def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, p
314313
pass_bands (bool): Whether to include a half cosine at the filter bank's lower and upper edge frequency.
315314
If True, allows reconstruction of original bandwidth when collapsing subbands.
316315
n_filters (int | None): Number of filters. When this is not None, the `bandwidth` argument is ignored.
316+
filter_width_factor (float): Multiplier for the width of the filters. Default is 1; use smaller values to make the filter coverage sparser (undersampled) and larger values to make it denser (oversampled).
317317
samplerate (int | None): the samplerate of the sound that the filter shall be applied to.
318-
If None, use the default samplerate.s
318+
If None, use the default samplerate.
319319
Examples::
320320
321321
sig = slab.Sound.pinknoise(samplerate=44100)
@@ -335,14 +335,15 @@ def cos_filterbank(length=5000, bandwidth=1/3, low_cutoff=0, high_cutoff=None, p
335335
n_freqs = len(freq_bins)
336336
center_freqs, bandwidth, erb_spacing = Filter._center_freqs(
337337
low_cutoff=low_cutoff, high_cutoff=high_cutoff, bandwidth=bandwidth, pass_bands=pass_bands, n_filters=n_filters)
338+
erb_spacing = erb_spacing * filter_width_factor
338339
n_filters = len(center_freqs)
339340
filts = numpy.zeros((n_freqs, n_filters))
340341
freqs_erb = Filter._freq2erb(freq_bins)
341342
for i in range(n_filters):
342343
l = center_freqs[i] - erb_spacing
343344
h = center_freqs[i] + erb_spacing
344345
avg = center_freqs[i] # center of filter
345-
width = erb_spacing * 2 # width of filter
346+
width = erb_spacing * 2 # width of filter
346347
filts[(freqs_erb > l) & (freqs_erb < h), i] = numpy.cos(
347348
(freqs_erb[(freqs_erb > l) & (freqs_erb < h)] - avg) / width * numpy.pi)
348349
return Filter(data=filts, samplerate=samplerate, fir='TF')

slab/sound.py

Lines changed: 48 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1180,7 +1180,7 @@ def spectrogram(self, window_dur=0.005, dyn_range=120, upper_frequency=None, oth
11801180
else:
11811181
return freqs, times, power
11821182

1183-
def cochleagram(self, bandwidth=1 / 5, n_bands=None, show=True, axis=None):
1183+
def cochleagram(self, bandwidth=1/5, n_bands=None, filter_width_factor=1, show=True, axis=None):
11841184
"""
11851185
Computes a cochleagram of the sound by filtering with a bank of cosine-shaped filters
11861186
and applying a cube-root compression to the resulting envelopes. The number of bands
@@ -1191,16 +1191,19 @@ def cochleagram(self, bandwidth=1 / 5, n_bands=None, show=True, axis=None):
11911191
bandwidth (float): filter bandwidth in octaves.
11921192
n_bands (int | None): number of bands in the cochleagram. If this is not
11931193
None, the `bandwidth` argument is ignored.
1194-
show (bool): whether to show the plot right after drawing. Note that if show is False and no `axis` is
1195-
passed, no plot will be created
1194+
filter_width_factor (float): Default 1; use higher values to make the filter coverage
1195+
denser (oversampled).
1196+
show (bool): whether to show the plot right after drawing. Note that if show is False
1197+
and no `axis` is passed, no plot will be created
11961198
axis (matplotlib.axes.Axes | None): axis to plot to. If None create a new plot.
11971199
Returns:
1198-
(None | numpy.ndarray): If `show == True` or an axis was passed, a plot is drawn and nothing is returned.
1199-
Else, an array with the envelope is returned.
1200+
(None | numpy.ndarray): If `show == True` or an axis was passed, a plot is drawn and
1201+
nothing is returned. Else, an array with the envelope is returned.
12001202
"""
12011203
fbank = Filter.cos_filterbank(bandwidth=bandwidth, low_cutoff=20,
12021204
high_cutoff=None, n_filters=n_bands,
1203-
samplerate=self.samplerate)
1205+
filter_width_factor=filter_width_factor,
1206+
pass_bands=True, samplerate=self.samplerate)
12041207
freqs = fbank.filter_bank_center_freqs()
12051208
subbands = fbank.apply(self.channel(0))
12061209
envs = subbands.envelope()
@@ -1342,24 +1345,30 @@ def spectral_feature(self, feature='centroid', mean='rms', frame_duration=None,
13421345
out_all = Signal(data=out_all, samplerate=self.samplerate) # cast as Signal
13431346
return out_all
13441347

1345-
def vocode(self, bandwidth=1 / 3):
1348+
def vocode(self, bandwidth=1/3, filter_width_factor=1):
13461349
"""
13471350
Returns a noise vocoded version of the sound by computing the envelope in different frequency subbands,
13481351
filling these envelopes with noise, and collapsing the subbands into one sound. This removes most spectral
13491352
information but retains temporal information in a speech sound.
13501353
13511354
Arguments:
13521355
bandwidth (float): width of the subbands in octaves.
1356+
filter_width_factor (float): Multiplier for the width of the filters. Default is 1;
1357+
use smaller values to make the filter coverage sparser and larger values to make
1358+
it denser. Intended to keep energetic masking constant when changing bandwidth.
13531359
Returns:
13541360
(slab.Sound): a vocoded copy of the sound.
13551361
"""
1356-
fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth,
1357-
low_cutoff=30, pass_bands=True, samplerate=self.samplerate)
1362+
fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth, low_cutoff=30,
1363+
pass_bands=True, samplerate=self.samplerate)
13581364
subbands = fbank.apply(self.channel(0))
13591365
envs = subbands.envelope()
13601366
envs.data[envs.data < 1e-9] = 0 # remove small values that cause waring with numpy.power
13611367
noise = Sound.whitenoise(duration=self.n_samples,
13621368
samplerate=self.samplerate) # make white noise
1369+
fbank = Filter.cos_filterbank(length=self.n_samples, bandwidth=bandwidth, low_cutoff=30,
1370+
pass_bands=True, filter_width_factor=filter_width_factor,
1371+
samplerate=self.samplerate)
13631372
subbands_noise = fbank.apply(noise) # divide into same subbands as sound
13641373
subbands_noise *= envs # apply envelopes
13651374
subbands_noise.level = subbands.level
@@ -1402,6 +1411,36 @@ def onset_slope(self):
14021411
norm = hist / hist.sum() # normalize histogram so that it sums to 1
14031412
return numpy.sum(bin_centers * norm) # compute centroid of histogram
14041413

1414+
def spectral_coverage(self, threshold=-50, low_cutoff=20, high_cutoff=None):
1415+
"""
1416+
Computes the fraction of a sounds spectrogram bins which exceed a certain threshold
1417+
relative to the the sound's rms level. The default threshold is -50 dB.
1418+
When threshold is set to 'otsu', a value is automatically determined to optimally
1419+
split the spectrogram level histogram using Otsu's method [Otsu 1979, IEEE].
1420+
1421+
Arguments:
1422+
threshold (int | float): threshold for 'foreground' regions in the cochleagram. Default -50.
1423+
low_cut_off (int | float): lower frequency edge of the spectrum to be taken into account.
1424+
high_cutoff (int | float): higher frequency edge of the spectrum to be taken into account.
1425+
1426+
Returns:
1427+
(float): The spectral coverage provided by the sound between 0 and 1, 1 indicating full coverage.
1428+
"""
1429+
def otsu_var(data,th): # helper function to compute Otsu interclass variance
1430+
return numpy.nansum(
1431+
[numpy.mean(cls) * numpy.var(data,where=cls) for cls in [data>=th,data<th]])
1432+
fbank = Filter.cos_filterbank(low_cutoff=low_cutoff, high_cutoff=high_cutoff,
1433+
filter_width_factor=0.75,
1434+
pass_bands=True, samplerate=self.samplerate)
1435+
subbands = fbank.apply(self.channel(0))
1436+
envs = subbands.envelope(kind='dB').data
1437+
if threshold == 'otsu':
1438+
threshold = min(
1439+
range(int(numpy.min(envs)) + 1, int(numpy.max(envs))),
1440+
key=lambda th: otsu_var(envs,th) )
1441+
coverage = numpy.where(envs>threshold,1,0).sum()/envs.size
1442+
return coverage
1443+
14051444
def frames(self, duration=1024):
14061445
"""
14071446
A generator that steps through the sound in overlapping, windowed frames.

0 commit comments

Comments
 (0)