Adding async sample code to cloud speech. (#404)

xinjiez · Jon Wayne Parrott · commit ea476db5976f · 2016-07-11T10:12:02.000-07:00
diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt
@@ -1,4 +1,4 @@
 gcloud==0.17.0
 grpcio==0.15.0
 PyAudio==0.2.9
-grpc-google-cloud-speech==1.0.4
+grpc-google-cloud-speech-v1beta1==1.0.1
diff --git a/speech/api/speech_async_grpc.py b/speech/api/speech_async_grpc.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python
+# Copyright (C) 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#            http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage,
+using async GRPC."""
+
+import argparse
+import time
+
+from gcloud.credentials import get_credentials
+from google.cloud.speech.v1beta1 import cloud_speech_pb2
+from google.longrunning import operations_grpc_pb2
+from grpc.beta import implementations
+
+# Keep the request alive for this many seconds
+DEADLINE_SECS = 10
+SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
+
+
+def make_channel(host, port):
+    """Creates an SSL channel with auth credentials from the environment."""
+    # In order to make an https call, use an ssl channel with defaults
+    ssl_channel = implementations.ssl_channel_credentials(None, None, None)
+
+    # Grab application default credentials from the environment
+    creds = get_credentials().create_scoped([SPEECH_SCOPE])
+    # Add a plugin to inject the creds into the header
+    auth_header = (
+            'Authorization',
+            'Bearer ' + creds.get_access_token().access_token)
+    auth_plugin = implementations.metadata_call_credentials(
+            lambda _, cb: cb([auth_header], None),
+            name='google_creds')
+
+    # compose the two together for both ssl and google auth
+    composite_channel = implementations.composite_channel_credentials(
+            ssl_channel, auth_plugin)
+
+    return implementations.secure_channel(host, port, composite_channel)
+
+
+def main(input_uri, encoding, sample_rate):
+    channel = make_channel('speech.googleapis.com', 443)
+    service = cloud_speech_pb2.beta_create_Speech_stub(channel)
+    # The method and parameters can be inferred from the proto from which the
+    # grpc client lib was generated. See:
+    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
+    response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest(
+        config=cloud_speech_pb2.RecognitionConfig(
+            encoding=encoding,
+            sample_rate=sample_rate,
+        ),
+        audio=cloud_speech_pb2.RecognitionAudio(
+            uri=input_uri,
+        )
+    ), DEADLINE_SECS)
+
+    # Print the longrunning operation handle.
+    print(response)
+
+    # Construct a long running operation endpoint.
+    service = operations_grpc_pb2.beta_create_Operations_stub(channel)
+
+    name = response.name
+
+    while True:
+        # Give the server a few seconds to process.
+        print('Waiting for server processing...')
+        time.sleep(1)
+        # Get the long running operation with response.
+        response = service.GetOperation(
+            operations_grpc_pb2.GetOperationRequest(name=name),
+            DEADLINE_SECS)
+
+        if response.done:
+            break
+
+    # Print the recognition results.
+    results = cloud_speech_pb2.AsyncRecognizeResponse()
+    response.response.Unpack(results)
+    print(results)
+
+
+def _gcs_uri(text):
+    if not text.startswith('gs://'):
+        raise argparse.ArgumentTypeError(
+            'Cloud Storage uri must be of the form gs://bucket/path/')
+    return text
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('input_uri', type=_gcs_uri)
+    parser.add_argument(
+        '--encoding', default='FLAC', choices=[
+            'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
+        help='How the audio file is encoded. See {}#L67'.format(
+            'https://github.com/googleapis/googleapis/blob/master/'
+            'google/cloud/speech/v1beta1/cloud_speech.proto'))
+    parser.add_argument('--sample_rate', default=16000)
+
+    args = parser.parse_args()
+    main(args.input_uri, args.encoding, args.sample_rate)
diff --git a/speech/api/speech_async_grpc_test.py b/speech/api/speech_async_grpc_test.py
@@ -0,0 +1,38 @@
+# Copyright 2016, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import sys
+
+import pytest
+from speech_async_grpc import _gcs_uri
+from speech_async_grpc import main
+
+
+@pytest.mark.skipif(
+        sys.version_info >= (3, 0),
+        reason=("grpc doesn't yet support python3 "
+                'https://github.com/grpc/grpc/issues/282'))
+def test_main(cloud_config, capsys):
+    input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
+
+    main(input_uri, 'FLAC', 16000)
+
+    out, err = capsys.readouterr()
+    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
+
+
+def test_gcs_uri():
+    _gcs_uri('gs://bucket/path')
+    with pytest.raises(ValueError):
+        _gcs_uri('/local/path')
diff --git a/speech/api/speech_async_rest.py b/speech/api/speech_async_rest.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Google Cloud Speech API sample application using the REST API for async
+batch processing."""
+
+# [START import_libraries]
+import argparse
+import base64
+import json
+import time
+
+from googleapiclient import discovery
+from oauth2client.client import GoogleCredentials
+# [END import_libraries]
+
+
+# [START authenticating]
+DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
+                 'version={apiVersion}')
+
+
+# Application default credentials provided by env variable
+# GOOGLE_APPLICATION_CREDENTIALS
+def get_speech_service():
+    credentials = GoogleCredentials.get_application_default().create_scoped(
+        ['https://www.googleapis.com/auth/cloud-platform'])
+
+    return discovery.build(
+        'speech', 'v1beta1', credentials=credentials,
+        discoveryServiceUrl=DISCOVERY_URL)
+# [END authenticating]
+
+
+def main(speech_file):
+    """Transcribe the given audio file asynchronously.
+
+    Args:
+        speech_file: the name of the audio file.
+    """
+    # [START construct_request]
+    with open(speech_file, 'rb') as speech:
+        # Base64 encode the binary audio file for inclusion in the request.
+        speech_content = base64.b64encode(speech.read())
+
+    service = get_speech_service()
+    service_request = service.speech().asyncrecognize(
+        body={
+            'config': {
+                'encoding': 'LINEAR16',
+                'sampleRate': 16000
+            },
+            'audio': {
+                'content': speech_content.decode('UTF-8')
+                }
+            })
+    # [END construct_request]
+    # [START send_request]
+    response = service_request.execute()
+    print(json.dumps(response))
+    # [END send_request]
+
+    name = response['name']
+    # Construct a GetOperation request.
+    service_request = service.operations().get(name=name)
+
+    while True:
+        # Give the server a few seconds to process.
+        print('Waiting for server processing...')
+        time.sleep(1)
+        # Get the long running operation with response.
+        response = service_request.execute()
+
+        if 'done' in response and response['done']:
+            break
+
+    print(json.dumps(response['response']['results']))
+
+
+# [START run_application]
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'speech_file', help='Full path of audio file to be recognized')
+    args = parser.parse_args()
+    main(args.speech_file)
+    # [END run_application]
diff --git a/speech/api/speech_async_rest_test.py b/speech/api/speech_async_rest_test.py
@@ -0,0 +1,23 @@
+# Copyright 2016, Google, Inc.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+
+from speech_async_rest import main
+
+
+def test_main(resource, capsys):
+    main(resource('audio.raw'))
+    out, err = capsys.readouterr()
+
+    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
diff --git a/speech/api/speech_grpc.py b/speech/api/speech_grpc.py
diff --git a/speech/api/speech_grpc_test.py b/speech/api/speech_grpc_test.py
@@ -14,8 +14,8 @@
 import sys
 
 import pytest
-from speech_gcs import _gcs_uri
-from speech_gcs import main
+from speech_grpc import _gcs_uri
+from speech_grpc import main
 
 
 @pytest.mark.skipif(