Skip to content

Commit 7c44200

Browse files
committed
change folder name
1 parent 438ed2e commit 7c44200

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

68 files changed

+8905
-1
lines changed

AcousticEchoCancellation/LMS.py

+116
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
"""
2+
@FileName: LMS.py
3+
@Description: Implement LMS AEC
4+
@Author: Ryuk
5+
@CreateDate: 2020/07/01
6+
@LastEditTime: 2020/07/04
7+
@LastEditors: Please set LastEditors
8+
@Version: v0.1
9+
"""
10+
11+
import librosa
12+
import numpy as np
13+
from tqdm import tqdm
14+
import matplotlib.pyplot as plt
15+
16+
17+
far, sr = librosa.load("./far.wav", sr=16000)
18+
near, sr = librosa.load("./near.wav", sr=16000)
19+
20+
L = 128 # 滤波器抽头系数
21+
N = len(far) # 迭代长度
22+
T = 0.92 # 双端检测阈值
23+
lambda_DTD=0.95 # DTD更新系数
24+
DTDbegin=20000 # DTD 开始检测时间
25+
26+
w = np.zeros(L)
27+
xin = np.zeros(L)
28+
29+
# DTD相关参数
30+
varMIC = np.zeros(N)
31+
r_em = np.zeros(N)
32+
33+
x = far
34+
d = near
35+
36+
mu = 0.014
37+
y = np.zeros(N)
38+
e = np.zeros(N)
39+
threshold = np.zeros(N)
40+
decision_statistic = np.zeros(N)
41+
powerD = np.zeros(N)
42+
powerE = np.zeros(N)
43+
ERLE = np.zeros(N)
44+
45+
for i in tqdm(range(N)):
46+
for j in range(L - 1, 0, -1):
47+
xin[j] = xin[j - 1]
48+
49+
# LMS
50+
xin[0] = x[i]
51+
52+
y[i] = np.dot(w, xin)
53+
error = d[i] - y[i]
54+
e[i] = error
55+
wtemp = w + np.multiply(2 * mu * error, xin)
56+
57+
# DTD
58+
threshold[i] = T
59+
if i < DTDbegin:
60+
w = wtemp
61+
else:
62+
r_em[i] = lambda_DTD * (r_em[i - 1]) + (1 - lambda_DTD) * e[i] * d[i]
63+
varMIC[i] = np.sqrt(lambda_DTD * (varMIC[i - 1] ** 2) + (1 - lambda_DTD) * d[i] * d[i])
64+
decision_statistic[i] = 1 - (r_em[i] / varMIC[i]) ** 2
65+
66+
if decision_statistic[i] > threshold[i]:
67+
w = wtemp
68+
69+
# ERLE
70+
powerD[i] = np.abs(d[i]) ** 2 # Power of Microphone signal
71+
powerE[i] = np.abs(e[i]) ** 2 # power of Error signal
72+
ERLE[i]=10 * np.log10(np.mean(powerD[i:i+L])/np.mean(powerE[i:i+L]))
73+
74+
# 画图
75+
time = np.arange(0, len(far)) * (1.0 / sr)
76+
fig = plt.figure(figsize=(10, 16))
77+
78+
# near
79+
ax = fig.add_subplot(6, 1, 1)
80+
ax.plot(time, near,'b')
81+
plt.ylabel("Near")
82+
plt.tight_layout()
83+
84+
# far
85+
ax = fig.add_subplot(6, 1, 2)
86+
ax.plot(time, far, 'b')
87+
plt.ylabel("Far")
88+
plt.tight_layout()
89+
90+
# output
91+
ax = fig.add_subplot(6, 1, 3)
92+
ax.plot(time, y, 'b')
93+
plt.ylabel("Output")
94+
plt.tight_layout()
95+
96+
# error
97+
ax = fig.add_subplot(6, 1, 4)
98+
ax.plot(time, e, 'b')
99+
plt.ylabel("Error")
100+
plt.tight_layout()
101+
102+
# Decision_statistic
103+
ax = fig.add_subplot(6, 1, 5)
104+
ax.plot(time, decision_statistic, 'b')
105+
plt.axhline(y=T,ls=":",c="red")
106+
plt.ylabel("Decision_statistic")
107+
plt.tight_layout()
108+
109+
# ERLE
110+
ax = fig.add_subplot(6, 1, 6)
111+
ax.plot(time, ERLE, 'b')
112+
plt.ylabel("ERLE")
113+
plt.tight_layout()
114+
115+
plt.show()
116+
librosa.output.write_wav("./output.wav", (near - y).astype(np.float32), sr)

AcousticEchoCancellation/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
This is a simply acoustic echo cancellation demo with LMS filter.

AcousticEchoCancellation/far.wav

1.52 MB
Binary file not shown.

AcousticEchoCancellation/near.wav

1.52 MB
Binary file not shown.

AcousticEchoCancellation/output.wav

3.04 MB
Binary file not shown.

CommandRecognition/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Command Recognition uses CNN
2+
3+
usage: python train.py or sh train.sh
+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Environment sound classification baseline use XGboost as classfier. There are three features including MFCC, spectral contrast and ZCR
+88
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
"""
2+
@FileName: baseline.py
3+
@Description: Implement baseline for ESC
4+
@Author: Ryuk
5+
@CreateDate: 2020/08/02
6+
@LastEditTime: 2020/08/02
7+
@LastEditors: Please set LastEditors
8+
@Version: v0.1
9+
"""
10+
11+
12+
import os
13+
import librosa
14+
import numpy as np
15+
from tqdm import tqdm
16+
from sklearn.model_selection import train_test_split
17+
from xgboost import XGBClassifier
18+
from sklearn.metrics import accuracy_score
19+
20+
RATE = 44100
21+
FRAME = 512
22+
NUM = 400
23+
24+
def compute_mfcc(wav):
25+
melspectrogram = librosa.feature.melspectrogram(wav, sr=RATE, hop_length=FRAME)
26+
logamplitude = librosa.core.amplitude_to_db(melspectrogram)
27+
mfcc = np.mean(librosa.feature.mfcc(S=logamplitude, n_mfcc=13),axis=1)
28+
return mfcc
29+
30+
def compute_spectral_contrast(wav):
31+
spectral_contrast = librosa.feature.spectral_contrast(wav, RATE, hop_length=512)
32+
spectral_contrast = np.mean(spectral_contrast, axis=1)
33+
return spectral_contrast
34+
35+
def compute_zcr(wav):
36+
zcr = []
37+
frames = librosa.util.frame(wav,hop_length=FRAME)
38+
frames = frames.T
39+
for i in range(len(frames)):
40+
zcr.append(np.mean(0.5 * np.abs(np.diff(np.sign(frames[i])))))
41+
42+
zcr_mean = np.mean(zcr)
43+
zcr_std = np.std(zcr)
44+
return np.array([zcr_mean, zcr_std])
45+
46+
47+
path = "D:\\Samples\\ESC-10\\"
48+
folders = os.listdir(path)[:10]
49+
print(folders)
50+
51+
feature = np.zeros([NUM, 22])
52+
label = np.zeros(NUM)
53+
count = 0
54+
for i in tqdm(range(len(folders))):
55+
dirname = path + str(folders[i])
56+
wavefiles = os.listdir(dirname)
57+
58+
for j in range(len(wavefiles)):
59+
wavname = dirname + "\\" + wavefiles[j]
60+
wav,RATE = librosa.load(wavname, RATE)
61+
mfcc = compute_mfcc(wav)
62+
sc = compute_spectral_contrast(wav)
63+
zcr = compute_zcr(wav)
64+
feature[count] = np.hstack([mfcc, sc, zcr])
65+
label[count] = i
66+
count += 1
67+
68+
# training
69+
print("Start Training")
70+
X_train,X_test, y_train, y_test = train_test_split(feature,label,test_size=0.3, random_state=2020)
71+
clf = XGBClassifier(
72+
learning_rate=0.1,
73+
n_estimators=100,
74+
max_depth=6,
75+
min_child_weight = 1,
76+
gamma=0.,
77+
subsample=0.8,
78+
colsample_btree=0.8,
79+
objective='multi:softmax',
80+
scale_pos_weight=1,
81+
random_state=2020
82+
)
83+
clf.fit(X_train, y_train)
84+
85+
print("Start Testing")
86+
y_pred = clf.predict(X_test)
87+
accuracy = accuracy_score(y_test, y_pred)
88+
print("accuarcy: %.2f%%" % (accuracy * 100.0))

GenderClassify/README.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Gender classify use bi-LSTM with VAD.
2+
3+
usage: python train.py

GenderClassify/test_vad.txt

-1
This file was deleted.

Resample/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Resample speech with interpolation, Lagrange interpolation and sine interpolation.

SoundSourceLocalization/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Sound source localization employs TDOA-based algorithm.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
"""
2+
@FileName: SoundSourceLocalization.py
3+
@Description: Implement SoundSourceLocalization
4+
@Author: Ryuk
5+
@CreateDate: 2020/09/01
6+
@LastEditTime: 2020/09/06
7+
@LastEditors: Please set LastEditors
8+
@Version: v0.1
9+
"""
10+
11+
import numpy as np
12+
import librosa
13+
from tqdm import tqdm
14+
import matplotlib.pyplot as plt
15+
16+
17+
def gcc_phat(ref, sig, sr):
18+
n_point = 2 * ref.shape[0] - 1
19+
X = np.fft.fft(ref, n_point)
20+
Y = np.fft.fft(sig, n_point)
21+
XY = X * np.conj(Y)
22+
23+
c = XY / (abs(X) * abs(Y) + 10e-6)
24+
c = np.real(np.fft.ifft(c))
25+
end = len(c)
26+
center_point = end // 2
27+
28+
#fft shift
29+
c = np.hstack((c[center_point + 1:], c[:center_point + 1]))
30+
lag = np.argmax(abs(c)) - len(ref) + 1
31+
tau = lag / sr
32+
return tau
33+
34+
35+
SOUND_SPEED = 340.0
36+
MIC_DISTANCE = 0.15
37+
sample_rate = 48000
38+
MAX_TDOA = MIC_DISTANCE / float(SOUND_SPEED)
39+
40+
org_ref, sr = librosa.load("./data/ref.wav", sr=sample_rate)
41+
org_sig, sr = librosa.load("./data/sig.wav", sr=sample_rate)
42+
43+
44+
ref = librosa.util.frame(org_ref, 1024, 256).T
45+
sig = librosa.util.frame(org_sig, 1024, 256).T
46+
fai = []
47+
for i in tqdm(range(len(ref))):
48+
tau = gcc_phat(ref[i], sig[i], sample_rate)
49+
theta = np.arcsin(tau / MAX_TDOA) * 180 / np.pi
50+
fai.append(theta)
51+
52+
53+
plt.subplot(211)
54+
plt.ylabel('DOA ')
55+
plt.xlabel('Frame index')
56+
plt.title('DOA')
57+
plt.plot(fai)
58+
plt.subplot(212)
59+
plt.ylabel('Amplitude')
60+
plt.xlabel('Frame index')
61+
plt.title('Waveform')
62+
plt.plot(org_ref)
63+
plt.tight_layout()
64+
plt.show()

SoundSourceLocalization/data/ref.wav

1.4 MB
Binary file not shown.

SoundSourceLocalization/data/sig.wav

1.4 MB
Binary file not shown.

SpectralSubtraction/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Spectral substraction algorithm for noise reduction.

SpeechAugmentation/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Add echo, noise, howling and reverber into clean speech.

SpeechAugmentation/addNoise.py

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
"""
2+
@FileName: addNoise.py
3+
@Description: Implement addNoise
4+
@Author: Ryuk
5+
@CreateDate: 2020/05/11
6+
@LastEditTime: 2020/05/11
7+
@LastEditors: Please set LastEditors
8+
@Version: v0.1
9+
"""
10+
import numpy as np
11+
from scipy import signal
12+
from numpy.linalg import norm
13+
14+
15+
def add_noise(clean, noise, snr):
16+
if len(noise) > len(clean):
17+
noise = noise[:len(clean)]
18+
else:
19+
times = len(clean) // len(noise)
20+
noise = np.tile(noise, times)
21+
padding = [0] * (len(clean) - len(noise))
22+
noise = np.hstack([noise, padding])
23+
24+
noise = noise / norm(noise) * norm(clean) / (10.0 ** (0.05 * snr))
25+
mix = clean + noise
26+
return mix
27+
28+
29+
def addEcho(clean, sr, alpha, beta=0.5, delay=0.1, type=1):
30+
"""
31+
add echo signal to raw speech
32+
:param clean: clean speech
33+
:param sr: sample rate
34+
:param alpha: parameters for type1
35+
:param beta: parameters for type2
36+
:param delay: parameters for type2
37+
:param type: echo type
38+
:return: mix signal
39+
"""
40+
if type == 1:
41+
h = [1]
42+
h.extend([0] * int(alpha * sr))
43+
h.extend([0.5])
44+
h.extend([0] * int(alpha * sr))
45+
h.extend([0.25])
46+
mix = signal.convolve(clean, h)
47+
else:
48+
mix = clean.copy()
49+
shift = int(delay * sr)
50+
for i in range(shift, len(clean)):
51+
mix[i] = beta * clean[i] + (1 - beta) * clean[i - shift]
52+
return mix
53+
54+
55+
56+
def add_reverberation(clean, alpha=0.8, R=2000):
57+
b = [0] * (R+1)
58+
b[0], b[-1] = alpha, 1
59+
a = [0] * (R+1)
60+
a[0], a[-1] = 1, 0.5
61+
mix = signal.filtfilt(b, a, clean)
62+
return mix
63+
64+
def add_howl(clean, K=0.2):
65+
g = np.loadtxt("./path.txt")
66+
c = np.array([0,0,0,0,1]).T
67+
h = np.zeros(201)
68+
h[100] = 1
69+
70+
xs1 = np.zeros(c.shape[0])
71+
xs2 = np.zeros(g.shape)
72+
xs3 = np.zeros(h.shape)
73+
74+
mix = np.zeros(len(clean))
75+
temp = 0
76+
77+
for i in range(len(clean)):
78+
xs1[1:] = xs1[: - 1]
79+
xs1[0] = clean[i] + temp
80+
mix[i] = K * np.dot(xs1.T, c)
81+
82+
xs3[1:] = xs3[: - 1]
83+
xs3[0] = mix[i]
84+
mix[i] = np.dot(xs3.T, h)
85+
86+
mix[i] = min(1, mix[i])
87+
mix[i] = max(-1, mix[i])
88+
89+
xs2[1:] = xs2[: - 1]
90+
xs2[0] = mix[i]
91+
temp = np.dot(xs2.T, g)
92+
return mix

SpeechAugmentation/clean.wav

156 KB
Binary file not shown.

SpeechAugmentation/echoic.wav

312 KB
Binary file not shown.

SpeechAugmentation/howling.wav

312 KB
Binary file not shown.

SpeechAugmentation/noisy.wav

312 KB
Binary file not shown.

0 commit comments

Comments
 (0)