Revert "Changing cloud speech code samples to work with v1beta1 (#399)"

Jon Wayne Parrott · Jon Wayne Parrott · commit 43c8ec6ae160 · 2016-07-06T09:38:26.000-07:00
This reverts commit bfef34d.
diff --git a/speech/api/grpc_auth.py b/speech/api/grpc_auth.py
diff --git a/speech/api/requirements-speech_grpc.txt b/speech/api/requirements-speech_grpc.txt
@@ -1,4 +1,4 @@
 gcloud==0.17.0
 grpcio==0.14.0
 PyAudio==0.2.9
-grpc-google-cloud-speech-v1beta1==1.0.0
+grpc-google-cloud-speech==1.0.4
diff --git a/speech/api/speech_gcs.py b/speech/api/speech_gcs.py
@@ -18,7 +18,7 @@
 import argparse
 
 from gcloud.credentials import get_credentials
-from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
+from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
 from grpc.beta import implementations
 
 # Keep the request alive for this many seconds
@@ -48,23 +48,25 @@ def make_channel(host, port):
     return implementations.secure_channel(host, port, composite_channel)
 
 
-def main(input_uri, encoding, sample_rate):
+def main(input_uri, output_uri, encoding, sample_rate):
     service = cloud_speech.beta_create_Speech_stub(
             make_channel('speech.googleapis.com', 443))
     # The method and parameters can be inferred from the proto from which the
     # grpc client lib was generated. See:
-    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1beta1/cloud_speech.proto
-    response = service.SyncRecognize(cloud_speech.SyncRecognizeRequest(
-        config=cloud_speech.RecognitionConfig(
+    # https://github.com/googleapis/googleapis/blob/master/google/cloud/speech/v1/cloud_speech.proto
+    response = service.NonStreamingRecognize(cloud_speech.RecognizeRequest(
+        initial_request=cloud_speech.InitialRecognizeRequest(
             encoding=encoding,
             sample_rate=sample_rate,
+            output_uri=output_uri,
         ),
-        audio=cloud_speech.RecognitionAudio(
+        audio_request=cloud_speech.AudioRequest(
             uri=input_uri,
         )
     ), DEADLINE_SECS)
-    # Print the recognition results.
-    print(response.results)
+    # This shouldn't actually print anything, since the transcription is output
+    # to the GCS uri specified
+    print(response.responses)
 
 
 def _gcs_uri(text):
@@ -75,15 +77,16 @@ def _gcs_uri(text):
 
 
 PROTO_URL = ('https://github.com/googleapis/googleapis/blob/master/'
-             'google/cloud/speech/v1beta1/cloud_speech.proto')
+             'google/cloud/speech/v1/cloud_speech.proto')
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
     parser.add_argument('input_uri', type=_gcs_uri)
+    parser.add_argument('output_uri', type=_gcs_uri)
     parser.add_argument(
         '--encoding', default='FLAC', choices=[
             'LINEAR16', 'FLAC', 'MULAW', 'AMR', 'AMR_WB'],
         help='How the audio file is encoded. See {}#L67'.format(PROTO_URL))
     parser.add_argument('--sample_rate', default=16000)
 
     args = parser.parse_args()
-    main(args.input_uri, args.encoding, args.sample_rate)
+    main(args.input_uri, args.output_uri, args.encoding, args.sample_rate)
diff --git a/speech/api/speech_gcs_test.py b/speech/api/speech_gcs_test.py
@@ -11,7 +11,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import re
 import sys
 
 import pytest
@@ -25,11 +24,12 @@
                 'https://github.com/grpc/grpc/issues/282'))
 def test_main(cloud_config, capsys):
     input_uri = 'gs://{}/speech/audio.flac'.format(cloud_config.storage_bucket)
+    output_uri = 'gs://{}/speech/audio.txt'.format(cloud_config.storage_bucket)
 
-    main(input_uri, 'FLAC', 16000)
+    main(input_uri, output_uri, 'FLAC', 16000)
 
     out, err = capsys.readouterr()
-    assert re.search(r'how old is the Brooklyn Bridge', out, re.DOTALL | re.I)
+    assert '[]\n' == out
 
 
 def test_gcs_uri():
diff --git a/speech/api/speech_rest.py b/speech/api/speech_rest.py
@@ -40,7 +40,7 @@ def get_speech_service():
     credentials.authorize(http)
 
     return discovery.build(
-        'speech', 'v1beta1', http=http, discoveryServiceUrl=DISCOVERY_URL)
+        'speech', 'v1', http=http, discoveryServiceUrl=DISCOVERY_URL)
 # [END authenticating]
 
 
@@ -57,13 +57,13 @@ def main(speech_file):
         speech_content = base64.b64encode(speech.read())
 
     service = get_speech_service()
-    service_request = service.speech().syncrecognize(
+    service_request = service.speech().recognize(
         body={
-            'config': {
+            'initialRequest': {
                 'encoding': 'LINEAR16',
                 'sampleRate': 16000
             },
-            'audio': {
+            'audioRequest': {
                 'content': speech_content.decode('UTF-8')
                 }
             })
diff --git a/speech/api/speech_streaming.py b/speech/api/speech_streaming.py
@@ -14,22 +14,20 @@
 # limitations under the License.
 """Sample that streams audio to the Google Cloud Speech API via GRPC."""
 
-from __future__ import division
-
 import contextlib
 import re
 import threading
 
 from gcloud.credentials import get_credentials
-from google.cloud.speech.v1beta1 import cloud_speech_pb2 as cloud_speech
+from google.cloud.speech.v1 import cloud_speech_pb2 as cloud_speech
 from google.rpc import code_pb2
 from grpc.beta import implementations
 import pyaudio
 
 # Audio recording parameters
 RATE = 16000
 CHANNELS = 1
-CHUNK = int(RATE / 10)  # 100ms
+CHUNK = RATE // 10  # 100ms
 
 # Keep the request alive for this many seconds
 DEADLINE_SECS = 8 * 60 * 60
@@ -45,15 +43,15 @@ def make_channel(host, port):
     creds = get_credentials().create_scoped([SPEECH_SCOPE])
     # Add a plugin to inject the creds into the header
     auth_header = (
-        'Authorization',
-        'Bearer ' + creds.get_access_token().access_token)
+            'Authorization',
+            'Bearer ' + creds.get_access_token().access_token)
     auth_plugin = implementations.metadata_call_credentials(
-        lambda _, cb: cb([auth_header], None),
-        name='google_creds')
+            lambda _, cb: cb([auth_header], None),
+            name='google_creds')
 
     # compose the two together for both ssl and google auth
     composite_channel = implementations.composite_channel_credentials(
-        ssl_channel, auth_plugin)
+            ssl_channel, auth_plugin)
 
     return implementations.secure_channel(host, port, composite_channel)
 
@@ -77,40 +75,41 @@ def record_audio(channels, rate, chunk):
 
 
 def request_stream(stop_audio, channels=CHANNELS, rate=RATE, chunk=CHUNK):
-    """Yields `StreamingRecognizeRequest`s constructed from a recording audio
-    stream.
+    """Yields `RecognizeRequest`s constructed from a recording audio stream.
 
     Args:
         stop_audio: A threading.Event object stops the recording when set.
         channels: How many audio channels to record.
         rate: The sampling rate.
         chunk: Buffer audio into chunks of this size before sending to the api.
     """
-    # The initial request must contain metadata about the stream, so the
-    # server knows how to interpret it.
-    recognition_config = cloud_speech.RecognitionConfig(
-        encoding='LINEAR16', sample_rate=rate)
-    streaming_config = cloud_speech.StreamingRecognitionConfig(
-        config=recognition_config,
-        # Note that setting interim_results to True means that you'll likely
-        # get multiple results for the same bit of audio, as the system
-        # re-interprets audio in the context of subsequent audio. However, this
-        # will give us quick results without having to tell the server when to
-        # finalize a piece of audio.
-        interim_results=True, single_utterance=True
-    )
-
-    yield cloud_speech.StreamingRecognizeRequest(
-        streaming_config=streaming_config)
-
     with record_audio(channels, rate, chunk) as audio_stream:
+        # The initial request must contain metadata about the stream, so the
+        # server knows how to interpret it.
+        metadata = cloud_speech.InitialRecognizeRequest(
+            encoding='LINEAR16', sample_rate=rate,
+            # Note that setting interim_results to True means that you'll
+            # likely get multiple results for the same bit of audio, as the
+            # system re-interprets audio in the context of subsequent audio.
+            # However, this will give us quick results without having to tell
+            # the server when to finalize a piece of audio.
+            interim_results=True, continuous=False,
+        )
+        data = audio_stream.read(chunk)
+        audio_request = cloud_speech.AudioRequest(content=data)
+
+        yield cloud_speech.RecognizeRequest(
+            initial_request=metadata,
+            audio_request=audio_request)
+
         while not stop_audio.is_set():
             data = audio_stream.read(chunk)
             if not data:
                 raise StopIteration()
-
             # Subsequent requests can all just have the content
-            yield cloud_speech.StreamingRecognizeRequest(audio_content=data)
+            audio_request = cloud_speech.AudioRequest(content=data)
+
+            yield cloud_speech.RecognizeRequest(audio_request=audio_request)
 
 
 def listen_print_loop(recognize_stream):
@@ -137,8 +136,7 @@ def main():
             make_channel('speech.googleapis.com', 443)) as service:
         try:
             listen_print_loop(
-                service.StreamingRecognize(
-                    request_stream(stop_audio), DEADLINE_SECS))
+                service.Recognize(request_stream(stop_audio), DEADLINE_SECS))
         finally:
             # Stop the request stream once we're done with the loop - otherwise
             # it'll keep going in the thread that the grpc lib makes for it..