Skip to content

add speech api multichannel samples #2003

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Feb 22, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions speech/cloud-client/transcribe_multichannel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env python

# Copyright 2019 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Google Cloud Speech API sample that demonstrates enhanced models
and recognition metadata.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment update

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


Example usage:
python transcribe_multichannel.py resources/audio.raw
python transcribe_multichannel.py \
gs://cloud-samples-tests/speech/Google_Gnome.wav
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we put this in the storage-samples project?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

"""

import argparse
import io


def transcribe_file_with_multichannel(speech_file):
"""Transcribe the given audio file synchronously with
multi channel."""
# [START speech_transcribe_multichannel]
from google.cloud import speech
client = speech.SpeechClient()

with open(speech_file, 'rb') as audio_file:
content = audio_file.read()

audio = speech.types.RecognitionAudio(content=content)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
audio_channel_count=1,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel]


def transcribe_gcs_with_multichannel(gcs_uri):
"""Transcribe the given audio file on GCS with
multi channel."""
# [START speech_transcribe_multichannel_gcs]
from google.cloud import speech
client = speech.SpeechClient()

audio = speech.types.RecognitionAudio(uri=gcs_uri)

config = speech.types.RecognitionConfig(
encoding=speech.enums.RecognitionConfig.AudioEncoding.LINEAR16,
sample_rate_hertz=16000,
language_code='en-US',
audio_channel_count=1,
enable_separate_recognition_per_channel=True)

response = client.recognize(config, audio)

for i, result in enumerate(response.results):
alternative = result.alternatives[0]
print('-' * 20)
print('First alternative of result {}'.format(i))
print(u'Transcript: {}'.format(alternative.transcript))
print(u'Channel Tag: {}'.format(result.channel_tag))
# [END speech_transcribe_multichannel_gcs]


if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument(
'path', help='File or GCS path for audio file to be recognized')
args = parser.parse_args()
if args.path.startswith('gs://'):
transcribe_gcs_with_multichannel(args.path)
else:
transcribe_file_with_multichannel(args.path)
38 changes: 38 additions & 0 deletions speech/cloud-client/transcribe_multichannel_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2019, Google, Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os

from transcribe_multichannel import (
transcribe_file_with_multichannel,
transcribe_gcs_with_multichannel)

RESOURCES = os.path.join(os.path.dirname(__file__), 'resources')


def test_transcribe_multichannel_file(capsys):
transcribe_file_with_multichannel(
os.path.join(RESOURCES, 'Google_Gnome.wav'))
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out


def test_transcribe_multichannel_gcs(capsys):
transcribe_gcs_with_multichannel(
'gs://cloud-samples-tests/speech/Google_Gnome.wav')
out, err = capsys.readouterr()

assert 'OK Google stream stranger things from Netflix to my TV' in out