Skip to content

Commit 43c8ec6

Browse files
author
Jon Wayne Parrott
committed
Revert "Changing cloud speech code samples to work with v1beta1 (#399)"
This reverts commit bfef34d.
1 parent bfef34d commit 43c8ec6

6 files changed

+51
-50
lines changed

speech/api/grpc_auth.py

Whitespace-only changes.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
gcloud==0.17.0
22
grpcio==0.14.0
33
PyAudio==0.2.9
4-
grpc-google-cloud-speech-v1beta1==1.0.0
4+
grpc-google-cloud-speech==1.0.4

speech/api/speech_gcs.py

Lines changed: 13 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
import argparse
1919

2020
from gcloud.credentials import get_credentials
21-
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
21+
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
2222
from grpc.beta import implementations
2323

2424
# Keep the request alive for this many seconds
@@ -48,23 +48,25 @@ def make_channel(host, port):
4848
return implementations.secure_channel(host, port, composite_channel)
4949

5050

51-
def main(input_uri, encoding, sample_rate):
51+
def main(input_uri, output_uri, encoding, sample_rate):
5252
service = cloud_speech.beta_create_Speech_stub(
5353
make_channel('speech.googleapis.com', 443))
5454
# The method and parameters can be inferred from the proto from which the
5555
# grpc client lib was generated. See:
56-
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
57-
response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
58-
config=cloud_speech.RecognitionConfig(
56+
# https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
57+
response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
58+
initial_request=cloud_speech.InitialRecognizeRequest(
5959
encoding=encoding,
6060
sample_rate=sample_rate,
61+
output_uri=output_uri,
6162
),
62-
audio=cloud_speech.RecognitionAudio(
63+
audio_request=cloud_speech.AudioRequest(
6364
uri=input_uri,
6465
)
6566
), DEADLINE_SECS)
66-
# Print the recognition results.
67-
print(response.results)
67+
# This shouldn't actually print anything, since the transcription is output
68+
# to the GCS uri specified
69+
print(response.responses)
6870

6971

7072
def _gcs_uri(text):
@@ -75,15 +77,16 @@ def _gcs_uri(text):
7577

7678

7779
PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
78-
'google/cloud/speech/v1beta1/cloud_speech.proto')
80+
'google/cloud/speech/v1/cloud_speech.proto')
7981
if __name__ == '__main__':
8082
parser = argparse.ArgumentParser()
8183
parser.add_argument('input_uri', type=_gcs_uri)
84+
parser.add_argument('output_uri', type=_gcs_uri)
8285
parser.add_argument(
8386
'--encoding', default='FLAC', choices=[
8487
'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
8588
help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
8689
parser.add_argument('--sample_rate', default=16000)
8790

8891
args = parser.parse_args()
89-
main(args.input_uri, args.encoding, args.sample_rate)
92+
main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)

speech/api/speech_gcs_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
1313

14-
import re
1514
import sys
1615

1716
import pytest
@@ -25,11 +24,12 @@
2524
'https://github.com/grpc/grpc/issues/282'))
2625
def test_main(cloud_config, capsys):
2726
input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
27+
output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)
2828

29-
main(input_uri, 'FLAC', 16000)
29+
main(input_uri, output_uri, 'FLAC', 16000)
3030

3131
out, err = capsys.readouterr()
32-
assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
32+
assert '[]\n' == out
3333

3434

3535
def test_gcs_uri():

speech/api/speech_rest.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def get_speech_service():
4040
credentials.authorize(http)
4141

4242
return discovery.build(
43-
'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
43+
'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
4444
# [END authenticating]
4545

4646

@@ -57,13 +57,13 @@ def main(speech_file):
5757
speech_content = base64.b64encode(speech.read())
5858

5959
service = get_speech_service()
60-
service_request = service.speech().syncrecognize(
60+
service_request = service.speech().recognize(
6161
body={
62-
'config': {
62+
'initialRequest': {
6363
'encoding': 'LINEAR16',
6464
'sampleRate': 16000
6565
},
66-
'audio': {
66+
'audioRequest': {
6767
'content': speech_content.decode('UTF-8')
6868
}
6969
})

speech/api/speech_streaming.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -14,22 +14,20 @@
1414
# limitations under the License.
1515
"""Sample that streams audio to the Google Cloud Speech API via GRPC."""
1616

17-
from __future__ import division
18-
1917
import contextlib
2018
import re
2119
import threading
2220

2321
from gcloud.credentials import get_credentials
24-
from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
22+
from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
2523
from google.rpc import code_pb2
2624
from grpc.beta import implementations
2725
import pyaudio
2826

2927
# Audio recording parameters
3028
RATE = 16000
3129
CHANNELS = 1
32-
CHUNK = int(RATE / 10) # 100ms
30+
CHUNK = RATE // 10 # 100ms
3331

3432
# Keep the request alive for this many seconds
3533
DEADLINE_SECS = 8 * 60 * 60
@@ -45,15 +43,15 @@ def make_channel(host, port):
4543
creds = get_credentials().create_scoped([SPEECH_SCOPE])
4644
# Add a plugin to inject the creds into the header
4745
auth_header = (
48-
'Authorization',
49-
'Bearer ' + creds.get_access_token().access_token)
46+
'Authorization',
47+
'Bearer ' + creds.get_access_token().access_token)
5048
auth_plugin = implementations.metadata_call_credentials(
51-
lambda _, cb: cb([auth_header], None),
52-
name='google_creds')
49+
lambda _, cb: cb([auth_header], None),
50+
name='google_creds')
5351

5452
# compose the two together for both ssl and google auth
5553
composite_channel = implementations.composite_channel_credentials(
56-
ssl_channel, auth_plugin)
54+
ssl_channel, auth_plugin)
5755

5856
return implementations.secure_channel(host, port, composite_channel)
5957

@@ -77,40 +75,41 @@ def record_audio(channels, rate, chunk):
7775

7876

7977
def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
80-
"""Yields `StreamingRecognizeRequest`s constructed from a recording audio
81-
stream.
78+
"""Yields `RecognizeRequest`s constructed from a recording audio stream.
8279
8380
Args:
8481
stop_audio: A threading.Event object stops the recording when set.
8582
channels: How many audio channels to record.
8683
rate: The sampling rate.
8784
chunk: Buffer audio into chunks of this size before sending to the api.
8885
"""
89-
# The initial request must contain metadata about the stream, so the
90-
# server knows how to interpret it.
91-
recognition_config = cloud_speech.RecognitionConfig(
92-
encoding='LINEAR16', sample_rate=rate)
93-
streaming_config = cloud_speech.StreamingRecognitionConfig(
94-
config=recognition_config,
95-
# Note that setting interim_results to True means that you'll likely
96-
# get multiple results for the same bit of audio, as the system
97-
# re-interprets audio in the context of subsequent audio. However, this
98-
# will give us quick results without having to tell the server when to
99-
# finalize a piece of audio.
100-
interim_results=True, single_utterance=True
101-
)
102-
103-
yield cloud_speech.StreamingRecognizeRequest(
104-
streaming_config=streaming_config)
105-
10686
with record_audio(channels, rate, chunk) as audio_stream:
87+
# The initial request must contain metadata about the stream, so the
88+
# server knows how to interpret it.
89+
metadata = cloud_speech.InitialRecognizeRequest(
90+
encoding='LINEAR16', sample_rate=rate,
91+
# Note that setting interim_results to True means that you'll
92+
# likely get multiple results for the same bit of audio, as the
93+
# system re-interprets audio in the context of subsequent audio.
94+
# However, this will give us quick results without having to tell
95+
# the server when to finalize a piece of audio.
96+
interim_results=True, continuous=False,
97+
)
98+
data = audio_stream.read(chunk)
99+
audio_request = cloud_speech.AudioRequest(content=data)
100+
101+
yield cloud_speech.RecognizeRequest(
102+
initial_request=metadata,
103+
audio_request=audio_request)
104+
107105
while not stop_audio.is_set():
108106
data = audio_stream.read(chunk)
109107
if not data:
110108
raise StopIteration()
111-
112109
# Subsequent requests can all just have the content
113-
yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
110+
audio_request = cloud_speech.AudioRequest(content=data)
111+
112+
yield cloud_speech.RecognizeRequest(audio_request=audio_request)
114113

115114

116115
def listen_print_loop(recognize_stream):
@@ -137,8 +136,7 @@ def main():
137136
make_channel('speech.googleapis.com', 443)) as service:
138137
try:
139138
listen_print_loop(
140-
service.StreamingRecognize(
141-
request_stream(stop_audio), DEADLINE_SECS))
139+
service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
142140
finally:
143141
# Stop the request stream once we're done with the loop - otherwise
144142
# it'll keep going in the thread that the grpc lib makes for it..

0 commit comments

Comments
 (0)