Skip to content

Commit 717edb3

Browse files
dizcologynnegrey
authored andcommitted
add speech api multichannel samples (#2003)
* add speech api multichannel samples * udpate copyright year * test with multichannel audio data * flake * update comment
1 parent 0496a35 commit 717edb3

File tree

3 files changed

+134
-4
lines changed

3 files changed

+134
-4
lines changed

speech/cloud-client/transcribe_enhanced_model.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,15 @@
2222
"""
2323

2424
import argparse
25-
import io
2625

2726

2827
def transcribe_file_with_enhanced_model(path):
2928
"""Transcribe the given audio file using an enhanced model."""
3029
# [START speech_transcribe_enhanced_model]
31-
import io
32-
30+
import io
31+
3332
from google.cloud import speech
34-
33+
3534
client = speech.SpeechClient()
3635

3736
# path = 'resources/commercial_mono.wav'
Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright 2019 Google Inc. All Rights Reserved.
4+
#
5+
# Licensed under the Apache License, Version 2.0 (the "License");
6+
# you may not use this file except in compliance with the License.
7+
# You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing, software
12+
# distributed under the License is distributed on an "AS IS" BASIS,
13+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
# See the License for the specific language governing permissions and
15+
# limitations under the License.
16+
17+
"""Google Cloud Speech API sample that demonstrates multichannel recognition.
18+
19+
Example usage:
20+
python transcribe_multichannel.py resources/multi.wav
21+
python transcribe_multichannel.py \
22+
gs://cloud-samples-tests/speech/multi.wav
23+
"""
24+
25+
import argparse
26+
27+
28+
def transcribe_file_with_multichannel(speech_file):
29+
"""Transcribe the given audio file synchronously with
30+
multi channel."""
31+
# [START speech_transcribe_multichannel]
32+
from google.cloud import speech
33+
client = speech.SpeechClient()
34+
35+
with open(speech_file, 'rb') as audio_file:
36+
content = audio_file.read()
37+
38+
audio = speech.types.RecognitionAudio(content=content)
39+
40+
config = speech.types.RecognitionConfig(
41+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
42+
sample_rate_hertz=44100,
43+
language_code='en-US',
44+
audio_channel_count=2,
45+
enable_separate_recognition_per_channel=True)
46+
47+
response = client.recognize(config, audio)
48+
49+
for i, result in enumerate(response.results):
50+
alternative = result.alternatives[0]
51+
print('-' * 20)
52+
print('First alternative of result {}'.format(i))
53+
print(u'Transcript: {}'.format(alternative.transcript))
54+
print(u'Channel Tag: {}'.format(result.channel_tag))
55+
# [END speech_transcribe_multichannel]
56+
57+
58+
def transcribe_gcs_with_multichannel(gcs_uri):
59+
"""Transcribe the given audio file on GCS with
60+
multi channel."""
61+
# [START speech_transcribe_multichannel_gcs]
62+
from google.cloud import speech
63+
client = speech.SpeechClient()
64+
65+
audio = speech.types.RecognitionAudio(uri=gcs_uri)
66+
67+
config = speech.types.RecognitionConfig(
68+
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
69+
sample_rate_hertz=44100,
70+
language_code='en-US',
71+
audio_channel_count=2,
72+
enable_separate_recognition_per_channel=True)
73+
74+
response = client.recognize(config, audio)
75+
76+
for i, result in enumerate(response.results):
77+
alternative = result.alternatives[0]
78+
print('-' * 20)
79+
print('First alternative of result {}'.format(i))
80+
print(u'Transcript: {}'.format(alternative.transcript))
81+
print(u'Channel Tag: {}'.format(result.channel_tag))
82+
# [END speech_transcribe_multichannel_gcs]
83+
84+
85+
if __name__ == '__main__':
86+
parser = argparse.ArgumentParser(
87+
description=__doc__,
88+
formatter_class=argparse.RawDescriptionHelpFormatter)
89+
parser.add_argument(
90+
'path', help='File or GCS path for audio file to be recognized')
91+
args = parser.parse_args()
92+
if args.path.startswith('gs://'):
93+
transcribe_gcs_with_multichannel(args.path)
94+
else:
95+
transcribe_file_with_multichannel(args.path)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
# Copyright 2019, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
16+
from transcribe_multichannel import (
17+
transcribe_file_with_multichannel,
18+
transcribe_gcs_with_multichannel)
19+
20+
RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')
21+
22+
23+
def test_transcribe_multichannel_file(capsys):
24+
transcribe_file_with_multichannel(
25+
os.path.join(RESOURCES, 'multi.wav'))
26+
out, err = capsys.readouterr()
27+
28+
assert 'how are you doing' in out
29+
30+
31+
def test_transcribe_multichannel_gcs(capsys):
32+
transcribe_gcs_with_multichannel(
33+
'gs://cloud-samples-data/speech/multi.wav')
34+
out, err = capsys.readouterr()
35+
36+
assert 'how are you doing' in out

0 commit comments

Comments
 (0)