Skip to content

Commit ea476db

Browse files
xinjiezJon Wayne Parrott
authored and
Jon Wayne Parrott
committed
Adding async sample code to cloud speech. (#404)
1 parent d1912d4 commit ea476db

7 files changed

+277
-3
lines changed
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
gcloud==0.17.0
22
grpcio==0.15.0
33
PyAudio==0.2.9
4-
grpc-google-cloud-speech==1.0.4
4+
grpc-google-cloud-speech-v1beta1==1.0.1

speech/api/speech_async_grpc.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
#!/usr/bin/env python
2+
# Copyright (C) 2016 Google Inc.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
"""Sample that transcribes a FLAC audio file stored in Google Cloud Storage,
17+
using async GRPC."""
18+
19+
import argparse
20+
import time
21+
22+
from gcloud.credentials import get_credentials
23+
from google.cloud.speech.v1beta1 import cloud_speech_pb2
24+
from google.longrunning import operations_grpc_pb2
25+
from grpc.beta import implementations
26+
27+
# Keep the request alive for this many seconds
28+
DEADLINE_SECS = 10
29+
SPEECH_SCOPE = 'https://www.googleapis.com/auth/cloud-platform'
30+
31+
32+
def make_channel(host, port):
33+
"""Creates an SSL channel with auth credentials from the environment."""
34+
# In order to make an https call, use an ssl channel with defaults
35+
ssl_channel = implementations.ssl_channel_credentials(None, None, None)
36+
37+
# Grab application default credentials from the environment
38+
creds = get_credentials().create_scoped([SPEECH_SCOPE])
39+
# Add a plugin to inject the creds into the header
40+
auth_header = (
41+
'Authorization',
42+
'Bearer ' + creds.get_access_token().access_token)
43+
auth_plugin = implementations.metadata_call_credentials(
44+
lambda _, cb: cb([auth_header], None),
45+
name='google_creds')
46+
47+
# compose the two together for both ssl and google auth
48+
composite_channel = implementations.composite_channel_credentials(
49+
ssl_channel, auth_plugin)
50+
51+
return implementations.secure_channel(host, port, composite_channel)
52+
53+
54+
def main(input_uri, encoding, sample_rate):
55+
channel = make_channel('speech.googleapis.com', 443)
56+
service = cloud_speech_pb2.beta_create_Speech_stub(channel)
57+
# The method and parameters can be inferred from the proto from which the
58+
# grpc client lib was generated. See:
59+
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
60+
response = service.AsyncRecognize(cloud_speech_pb2.AsyncRecognizeRequest(
61+
config=cloud_speech_pb2.RecognitionConfig(
62+
encoding=encoding,
63+
sample_rate=sample_rate,
64+
),
65+
audio=cloud_speech_pb2.RecognitionAudio(
66+
uri=input_uri,
67+
)
68+
), DEADLINE_SECS)
69+
70+
# Print the longrunning operation handle.
71+
print(response)
72+
73+
# Construct a long running operation endpoint.
74+
service = operations_grpc_pb2.beta_create_Operations_stub(channel)
75+
76+
name = response.name
77+
78+
while True:
79+
# Give the server a few seconds to process.
80+
print('Waiting for server processing...')
81+
time.sleep(1)
82+
# Get the long running operation with response.
83+
response = service.GetOperation(
84+
operations_grpc_pb2.GetOperationRequest(name=name),
85+
DEADLINE_SECS)
86+
87+
if response.done:
88+
break
89+
90+
# Print the recognition results.
91+
results = cloud_speech_pb2.AsyncRecognizeResponse()
92+
response.response.Unpack(results)
93+
print(results)
94+
95+
96+
def _gcs_uri(text):
97+
if not text.startswith('gs://'):
98+
raise argparse.ArgumentTypeError(
99+
'Cloud Storage uri must be of the form gs://bucket/path/')
100+
return text
101+
102+
103+
if __name__ == '__main__':
104+
parser = argparse.ArgumentParser()
105+
parser.add_argument('input_uri', type=_gcs_uri)
106+
parser.add_argument(
107+
'--encoding', default='FLAC', choices=[
108+
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
109+
help='How the audio file is encoded. See {}#L67'.format(
110+
'https://github.com/googleapis/googleapis/blob/master/'
111+
'google/cloud/speech/v1beta1/cloud_speech.proto'))
112+
parser.add_argument('--sample_rate', default=16000)
113+
114+
args = parser.parse_args()
115+
main(args.input_uri, args.encoding, args.sample_rate)

speech/api/speech_async_grpc_test.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import re
15+
import sys
16+
17+
import pytest
18+
from speech_async_grpc import _gcs_uri
19+
from speech_async_grpc import main
20+
21+
22+
@pytest.mark.skipif(
23+
sys.version_info >= (3, 0),
24+
reason=("grpc doesn't yet support python3 "
25+
'https://github.com/grpc/grpc/issues/282'))
26+
def test_main(cloud_config, capsys):
27+
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
28+
29+
main(input_uri, 'FLAC', 16000)
30+
31+
out, err = capsys.readouterr()
32+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
33+
34+
35+
def test_gcs_uri():
36+
_gcs_uri('gs://bucket/path')
37+
with pytest.raises(ValueError):
38+
_gcs_uri('/local/path')

speech/api/speech_async_rest.py

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
#!/usr/bin/env python
2+
# Copyright 2016 Google Inc. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Google Cloud Speech API sample application using the REST API for async
16+
batch processing."""
17+
18+
# [START import_libraries]
19+
import argparse
20+
import base64
21+
import json
22+
import time
23+
24+
from googleapiclient import discovery
25+
from oauth2client.client import GoogleCredentials
26+
# [END import_libraries]
27+
28+
29+
# [START authenticating]
30+
DISCOVERY_URL = ('https://{api}.googleapis.com/$discovery/rest?'
31+
'version={apiVersion}')
32+
33+
34+
# Application default credentials provided by env variable
35+
# GOOGLE_APPLICATION_CREDENTIALS
36+
def get_speech_service():
37+
credentials = GoogleCredentials.get_application_default().create_scoped(
38+
['https://www.googleapis.com/auth/cloud-platform'])
39+
40+
return discovery.build(
41+
'speech', 'v1beta1', credentials=credentials,
42+
discoveryServiceUrl=DISCOVERY_URL)
43+
# [END authenticating]
44+
45+
46+
def main(speech_file):
47+
"""Transcribe the given audio file asynchronously.
48+
49+
Args:
50+
speech_file: the name of the audio file.
51+
"""
52+
# [START construct_request]
53+
with open(speech_file, 'rb') as speech:
54+
# Base64 encode the binary audio file for inclusion in the request.
55+
speech_content = base64.b64encode(speech.read())
56+
57+
service = get_speech_service()
58+
service_request = service.speech().asyncrecognize(
59+
body={
60+
'config': {
61+
'encoding': 'LINEAR16',
62+
'sampleRate': 16000
63+
},
64+
'audio': {
65+
'content': speech_content.decode('UTF-8')
66+
}
67+
})
68+
# [END construct_request]
69+
# [START send_request]
70+
response = service_request.execute()
71+
print(json.dumps(response))
72+
# [END send_request]
73+
74+
name = response['name']
75+
# Construct a GetOperation request.
76+
service_request = service.operations().get(name=name)
77+
78+
while True:
79+
# Give the server a few seconds to process.
80+
print('Waiting for server processing...')
81+
time.sleep(1)
82+
# Get the long running operation with response.
83+
response = service_request.execute()
84+
85+
if 'done' in response and response['done']:
86+
break
87+
88+
print(json.dumps(response['response']['results']))
89+
90+
91+
# [START run_application]
92+
if __name__ == '__main__':
93+
parser = argparse.ArgumentParser()
94+
parser.add_argument(
95+
'speech_file', help='Full path of audio file to be recognized')
96+
args = parser.parse_args()
97+
main(args.speech_file)
98+
# [END run_application]

speech/api/speech_async_rest_test.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Copyright 2016, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import re
15+
16+
from speech_async_rest import main
17+
18+
19+
def test_main(resource, capsys):
20+
main(resource('audio.raw'))
21+
out, err = capsys.readouterr()
22+
23+
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
File renamed without changes.

speech/api/speech_gcs_test.py renamed to speech/api/speech_grpc_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
import sys
1515

1616
import pytest
17-
from speech_gcs import _gcs_uri
18-
from speech_gcs import main
17+
from speech_grpc import _gcs_uri
18+
from speech_grpc import main
1919

2020

2121
@pytest.mark.skipif(

0 commit comments

Comments
 (0)