Skip to content

Commit 6d0395d

Browse files
b-loved-dreamerdandhlee
authored andcommitted
fix: migrated samples to speech 2.0.0 (#78)
* I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * docs: I updated the comment on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * chore: I updated the comments on the transcribe_async file to reflect time limitations on local files for the long_running_recognize * fix: resolved conflicts * fix: migrated samples to speech 2.0.0 * fix: migrated to speech 2.0.0 * fix: fixed lint issues
1 parent 97f39e7 commit 6d0395d

18 files changed

+182
-208
lines changed

speech/microphone/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
google-cloud-speech==1.3.2
1+
google-cloud-speech==2.0.0
22
pyaudio==0.2.11
33
six==1.15.0
44

speech/microphone/transcribe_streaming_infinite.py

Lines changed: 35 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,9 @@
4141
SAMPLE_RATE = 16000
4242
CHUNK_SIZE = int(SAMPLE_RATE / 10) # 100ms
4343

44-
RED = "\033[0;31m"
45-
GREEN = "\033[0;32m"
46-
YELLOW = "\033[0;33m"
44+
RED = '\033[0;31m'
45+
GREEN = '\033[0;32m'
46+
YELLOW = '\033[0;33m'
4747

4848

4949
def get_current_time():
@@ -123,14 +123,12 @@ def generator(self):
123123
if self.bridging_offset > self.final_request_end_time:
124124
self.bridging_offset = self.final_request_end_time
125125

126-
chunks_from_ms = round(
127-
(self.final_request_end_time - self.bridging_offset)
128-
/ chunk_time
129-
)
126+
chunks_from_ms = round((self.final_request_end_time -
127+
self.bridging_offset) / chunk_time)
130128

131-
self.bridging_offset = round(
132-
(len(self.last_audio_input) - chunks_from_ms) * chunk_time
133-
)
129+
self.bridging_offset = (round((
130+
len(self.last_audio_input) - chunks_from_ms)
131+
* chunk_time))
134132

135133
for i in range(chunks_from_ms, len(self.last_audio_input)):
136134
data.append(self.last_audio_input[i])
@@ -159,7 +157,7 @@ def generator(self):
159157
except queue.Empty:
160158
break
161159

162-
yield b"".join(data)
160+
yield b''.join(data)
163161

164162

165163
def listen_print_loop(responses, stream):
@@ -195,45 +193,42 @@ def listen_print_loop(responses, stream):
195193
transcript = result.alternatives[0].transcript
196194

197195
result_seconds = 0
198-
result_nanos = 0
196+
result_micros = 0
199197

200198
if result.result_end_time.seconds:
201199
result_seconds = result.result_end_time.seconds
202200

203-
if result.result_end_time.nanos:
204-
result_nanos = result.result_end_time.nanos
201+
if result.result_end_time.microseconds:
202+
result_micros = result.result_end_time.microseconds
205203

206-
stream.result_end_time = int((result_seconds * 1000) + (result_nanos / 1000000))
204+
stream.result_end_time = int((result_seconds * 1000) + (result_micros / 1000))
207205

208-
corrected_time = (
209-
stream.result_end_time
210-
- stream.bridging_offset
211-
+ (STREAMING_LIMIT * stream.restart_counter)
212-
)
206+
corrected_time = (stream.result_end_time - stream.bridging_offset
207+
+ (STREAMING_LIMIT * stream.restart_counter))
213208
# Display interim results, but with a carriage return at the end of the
214209
# line, so subsequent lines will overwrite them.
215210

216211
if result.is_final:
217212

218213
sys.stdout.write(GREEN)
219-
sys.stdout.write("\033[K")
220-
sys.stdout.write(str(corrected_time) + ": " + transcript + "\n")
214+
sys.stdout.write('\033[K')
215+
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\n')
221216

222217
stream.is_final_end_time = stream.result_end_time
223218
stream.last_transcript_was_final = True
224219

225220
# Exit recognition if any of the transcribed phrases could be
226221
# one of our keywords.
227-
if re.search(r"\b(exit|quit)\b", transcript, re.I):
222+
if re.search(r'\b(exit|quit)\b', transcript, re.I):
228223
sys.stdout.write(YELLOW)
229-
sys.stdout.write("Exiting...\n")
224+
sys.stdout.write('Exiting...\n')
230225
stream.closed = True
231226
break
232227

233228
else:
234229
sys.stdout.write(RED)
235-
sys.stdout.write("\033[K")
236-
sys.stdout.write(str(corrected_time) + ": " + transcript + "\r")
230+
sys.stdout.write('\033[K')
231+
sys.stdout.write(str(corrected_time) + ': ' + transcript + '\r')
237232

238233
stream.last_transcript_was_final = False
239234

@@ -245,39 +240,34 @@ def main():
245240
config = speech.RecognitionConfig(
246241
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
247242
sample_rate_hertz=SAMPLE_RATE,
248-
language_code="en-US",
249-
max_alternatives=1,
250-
)
243+
language_code='en-US',
244+
max_alternatives=1)
251245
streaming_config = speech.StreamingRecognitionConfig(
252-
config=config, interim_results=True
253-
)
246+
config=config,
247+
interim_results=True)
254248

255249
mic_manager = ResumableMicrophoneStream(SAMPLE_RATE, CHUNK_SIZE)
256250
print(mic_manager.chunk_size)
257251
sys.stdout.write(YELLOW)
258252
sys.stdout.write('\nListening, say "Quit" or "Exit" to stop.\n\n')
259-
sys.stdout.write("End (ms) Transcript Results/Status\n")
260-
sys.stdout.write("=====================================================\n")
253+
sys.stdout.write('End (ms) Transcript Results/Status\n')
254+
sys.stdout.write('=====================================================\n')
261255

262256
with mic_manager as stream:
263257

264258
while not stream.closed:
265259
sys.stdout.write(YELLOW)
266-
sys.stdout.write(
267-
"\n" + str(STREAMING_LIMIT * stream.restart_counter) + ": NEW REQUEST\n"
268-
)
260+
sys.stdout.write('\n' + str(
261+
STREAMING_LIMIT * stream.restart_counter) + ': NEW REQUEST\n')
269262

270263
stream.audio_input = []
271264
audio_generator = stream.generator()
272265

273-
requests = (
274-
speech.StreamingRecognizeRequest(audio_content=content)
275-
for content in audio_generator
276-
)
266+
requests = (speech.StreamingRecognizeRequest(
267+
audio_content=content)for content in audio_generator)
277268

278-
responses = client.streaming_recognize(
279-
requests=requests, config=streaming_config
280-
)
269+
responses = client.streaming_recognize(streaming_config,
270+
requests)
281271

282272
# Now, put the transcription responses to use.
283273
listen_print_loop(responses, stream)
@@ -291,11 +281,11 @@ def main():
291281
stream.restart_counter = stream.restart_counter + 1
292282

293283
if not stream.last_transcript_was_final:
294-
sys.stdout.write("\n")
284+
sys.stdout.write('\n')
295285
stream.new_stream = True
296286

297287

298-
if __name__ == "__main__":
288+
if __name__ == '__main__':
299289

300290
main()
301291

speech/microphone/transcribe_streaming_mic.py

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import sys
3333

3434
from google.cloud import speech
35+
3536
import pyaudio
3637
from six.moves import queue
3738

@@ -42,7 +43,6 @@
4243

4344
class MicrophoneStream(object):
4445
"""Opens a recording stream as a generator yielding the audio chunks."""
45-
4646
def __init__(self, rate, chunk):
4747
self._rate = rate
4848
self._chunk = chunk
@@ -57,10 +57,8 @@ def __enter__(self):
5757
format=pyaudio.paInt16,
5858
# The API currently only supports 1-channel (mono) audio
5959
# https://goo.gl/z757pE
60-
channels=1,
61-
rate=self._rate,
62-
input=True,
63-
frames_per_buffer=self._chunk,
60+
channels=1, rate=self._rate,
61+
input=True, frames_per_buffer=self._chunk,
6462
# Run the audio stream asynchronously to fill the buffer object.
6563
# This is necessary so that the input device's buffer doesn't
6664
# overflow while the calling thread makes network requests, etc.
@@ -105,7 +103,7 @@ def generator(self):
105103
except queue.Empty:
106104
break
107105

108-
yield b"".join(data)
106+
yield b''.join(data)
109107

110108

111109
def listen_print_loop(responses):
@@ -143,10 +141,10 @@ def listen_print_loop(responses):
143141
#
144142
# If the previous result was longer than this one, we need to print
145143
# some extra spaces to overwrite the previous result
146-
overwrite_chars = " " * (num_chars_printed - len(transcript))
144+
overwrite_chars = ' ' * (num_chars_printed - len(transcript))
147145

148146
if not result.is_final:
149-
sys.stdout.write(transcript + overwrite_chars + "\r")
147+
sys.stdout.write(transcript + overwrite_chars + '\r')
150148
sys.stdout.flush()
151149

152150
num_chars_printed = len(transcript)
@@ -156,8 +154,8 @@ def listen_print_loop(responses):
156154

157155
# Exit recognition if any of the transcribed phrases could be
158156
# one of our keywords.
159-
if re.search(r"\b(exit|quit)\b", transcript, re.I):
160-
print("Exiting..")
157+
if re.search(r'\b(exit|quit)\b', transcript, re.I):
158+
print('Exiting..')
161159
break
162160

163161
num_chars_printed = 0
@@ -166,33 +164,28 @@ def listen_print_loop(responses):
166164
def main():
167165
# See http://g.co/cloud/speech/docs/languages
168166
# for a list of supported languages.
169-
language_code = "en-US" # a BCP-47 language tag
167+
language_code = 'en-US' # a BCP-47 language tag
170168

171169
client = speech.SpeechClient()
172170
config = speech.RecognitionConfig(
173171
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
174172
sample_rate_hertz=RATE,
175-
language_code=language_code,
176-
)
173+
language_code=language_code)
177174
streaming_config = speech.StreamingRecognitionConfig(
178-
config=config, interim_results=True
179-
)
175+
config=config,
176+
interim_results=True)
180177

181178
with MicrophoneStream(RATE, CHUNK) as stream:
182179
audio_generator = stream.generator()
183-
requests = (
184-
speech.StreamingRecognizeRequest(audio_content=content)
185-
for content in audio_generator
186-
)
180+
requests = (speech.StreamingRecognizeRequest(audio_content=content)
181+
for content in audio_generator)
187182

188-
responses = client.streaming_recognize(
189-
requests=requests, config=streaming_config
190-
)
183+
responses = client.streaming_recognize(streaming_config, requests)
191184

192185
# Now, put the transcription responses to use.
193186
listen_print_loop(responses)
194187

195188

196-
if __name__ == "__main__":
189+
if __name__ == '__main__':
197190
main()
198191
# [END speech_transcribe_streaming_mic]

speech/microphone/transcribe_streaming_mic_test.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
import mock
2020

21-
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
21+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
2222

2323

2424
class MockPyAudio(object):
@@ -32,9 +32,8 @@ def open(self, stream_callback, rate, *args, **kwargs):
3232
self.rate = rate
3333
self.closed = threading.Event()
3434
self.stream_thread = threading.Thread(
35-
target=self.stream_audio,
36-
args=(self.audio_filename, stream_callback, self.closed),
37-
)
35+
target=self.stream_audio, args=(
36+
self.audio_filename, stream_callback, self.closed))
3837
self.stream_thread.start()
3938
return self
4039

@@ -48,25 +47,23 @@ def terminate(self):
4847
pass
4948

5049
def stream_audio(self, audio_filename, callback, closed, num_frames=512):
51-
with open(audio_filename, "rb") as audio_file:
50+
with open(audio_filename, 'rb') as audio_file:
5251
while not closed.is_set():
5352
# Approximate realtime by sleeping for the appropriate time for
5453
# the requested number of frames
5554
time.sleep(num_frames / float(self.rate))
5655
# audio is 16-bit samples, whereas python byte is 8-bit
5756
num_bytes = 2 * num_frames
58-
chunk = audio_file.read(num_bytes) or b"\0" * num_bytes
57+
chunk = audio_file.read(num_bytes) or b'\0' * num_bytes
5958
callback(chunk, None, None, None)
6059

6160

62-
@mock.patch.dict(
63-
"sys.modules",
64-
pyaudio=mock.MagicMock(PyAudio=MockPyAudio(os.path.join(RESOURCES, "quit.raw"))),
65-
)
61+
@mock.patch.dict('sys.modules', pyaudio=mock.MagicMock(
62+
PyAudio=MockPyAudio(os.path.join(RESOURCES, 'quit.raw'))))
6663
def test_main(capsys):
6764
import transcribe_streaming_mic
6865

6966
transcribe_streaming_mic.main()
7067
out, err = capsys.readouterr()
7168

72-
assert re.search(r"quit", out, re.DOTALL | re.I)
69+
assert re.search(r'quit', out, re.DOTALL | re.I)

0 commit comments

Comments
 (0)