Skip to content

Commit 7acd247

Browse files
committed
Alternate strategy,to just pass back responses.
1 parent a49643f commit 7acd247

File tree

14 files changed

+219
-285
lines changed

14 files changed

+219
-285
lines changed

docs/speech-streaming.rst

Lines changed: 9 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,33 +1,23 @@
1-
Speech StreamingResponseContainer
2-
=================================
1+
Streaming Speech Response
2+
=========================
33

4-
.. automodule:: google.cloud.speech.streaming.container
4+
.. automodule:: google.cloud.speech.streaming_response
55
:members:
66
:undoc-members:
77
:show-inheritance:
88

9-
Speech Streaming Request helpers
10-
================================
9+
Streaming Speech Result
10+
=======================
1111

12-
.. automodule:: google.cloud.speech.streaming.request
12+
.. automodule:: google.cloud.speech.streaming_result
1313
:members:
1414
:undoc-members:
1515
:show-inheritance:
1616

17-
Speech StreamingSpeechResponse
18-
==============================
17+
Streaming Endpointer Type
18+
=========================
1919

20-
.. automodule:: google.cloud.speech.streaming.response
21-
:members:
22-
:undoc-members:
23-
:show-inheritance:
24-
25-
26-
27-
Speech StreamingSpeechResult
28-
============================
29-
30-
.. automodule:: google.cloud.speech.streaming.result
20+
.. automodule:: google.cloud.speech.endpointer_type
3121
:members:
3222
:undoc-members:
3323
:show-inheritance:

docs/speech-usage.rst

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -51,10 +51,9 @@ See: `Speech Asynchronous Recognize`_
5151
5252
>>> import time
5353
>>> from google.cloud import speech
54-
>>> from google.cloud.speech.encoding import Encoding
5554
>>> client = speech.Client()
5655
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
57-
... encoding=Encoding.LINEAR16,
56+
... encoding=speech.Encoding.LINEAR16,
5857
... sample_rate=44100)
5958
>>> operation = client.async_recognize(sample, max_alternatives=2)
6059
>>> retry_count = 100
@@ -82,10 +81,9 @@ Great Britian.
8281
.. code-block:: python
8382
8483
>>> from google.cloud import speech
85-
>>> from google.cloud.speech.encoding import Encoding
8684
>>> client = speech.Client()
8785
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
88-
... encoding=Encoding.FLAC,
86+
... encoding=speech.Encoding.FLAC,
8987
... sample_rate=44100)
9088
>>> operation = client.async_recognize(sample, max_alternatives=2)
9189
>>> alternatives = client.sync_recognize(
@@ -107,10 +105,9 @@ Example of using the profanity filter.
107105
.. code-block:: python
108106
109107
>>> from google.cloud import speech
110-
>>> from google.cloud.speech.encoding import Encoding
111108
>>> client = speech.Client()
112109
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
113-
... encoding=Encoding.FLAC,
110+
... encoding=speech.Encoding.FLAC,
114111
... sample_rate=44100)
115112
>>> alternatives = client.sync_recognize(sample, max_alternatives=1,
116113
... profanity_filter=True)
@@ -129,10 +126,9 @@ words to the vocabulary of the recognizer.
129126
.. code-block:: python
130127
131128
>>> from google.cloud import speech
132-
>>> from google.cloud.speech.encoding import Encoding
133129
>>> client = speech.Client()
134130
>>> sample = client.sample(source_uri='gs://my-bucket/recording.flac',
135-
... encoding=Encoding.FLAC,
131+
... encoding=speech.Encoding.FLAC,
136132
... sample_rate=44100)
137133
>>> hints = ['hi', 'good afternoon']
138134
>>> alternatives = client.sync_recognize(sample, max_alternatives=2,
@@ -161,12 +157,11 @@ data to possible text alternatives on the fly.
161157
162158
>>> import io
163159
>>> from google.cloud import speech
164-
>>> from google.cloud.speech.encoding import Encoding
165160
>>> client = speech.Client()
166161
>>> with io.open('./hello.wav', 'rb') as stream:
167-
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
162+
... sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
168163
... sample_rate=16000)
169-
>>> for response in client.stream_recognize(sample):
164+
... for response in client.stream_recognize(sample):
170165
... print(response.transcript)
171166
hello
172167
... print(response.is_final)
@@ -182,12 +177,11 @@ result(s) are returned.
182177
183178
>>> import io
184179
>>> from google.cloud import speech
185-
>>> from google.cloud.speech.encoding import Encoding
186180
>>> client = speech.Client()
187181
>>> with io.open('./hello.wav', 'rb') as stream:
188-
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
182+
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
189183
... sample_rate=16000)
190-
>>> for response in client.stream_recognize(sample,
184+
... for response in client.stream_recognize(sample,
191185
... interim_results=True):
192186
... print(response.transcript)
193187
hell
@@ -211,9 +205,9 @@ See: `Single Utterance`_
211205
.. code-block:: python
212206
213207
>>> with io.open('./hello_pause_goodbye.wav', 'rb') as stream:
214-
>>> sample = client.sample(stream=stream, encoding=Encoding.LINEAR16,
208+
>>> sample = client.sample(stream=stream, encoding=speech.Encoding.LINEAR16,
215209
... sample_rate=16000)
216-
>>> stream_container = client.stream_recognize(sample,
210+
... stream_container = client.stream_recognize(sample,
217211
... single_utterance=True)
218212
>>> print(stream_container.get_full_text())
219213
hello

speech/google/cloud/speech/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@
1616

1717
from google.cloud.speech.client import Client
1818
from google.cloud.speech.connection import Connection
19+
from google.cloud.speech.encoding import Encoding

speech/google/cloud/speech/client.py

Lines changed: 165 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,17 @@
2424
from google.cloud.speech.connection import Connection
2525
from google.cloud.speech.encoding import Encoding
2626
from google.cloud.speech.operation import Operation
27-
from google.cloud.speech.streaming.request import _make_request_stream
2827
from google.cloud.speech.sample import Sample
29-
from google.cloud.speech.streaming.response import StreamingSpeechResponse
28+
from google.cloud.speech.streaming_response import StreamingSpeechResponse
3029

3130
try:
3231
from google.cloud.gapic.speech.v1beta1.speech_api import SpeechApi
32+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
33+
RecognitionConfig)
34+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
35+
StreamingRecognitionConfig)
36+
from google.cloud.grpc.speech.v1beta1.cloud_speech_pb2 import (
37+
StreamingRecognizeRequest)
3338
except ImportError: # pragma: NO COVER
3439
_HAVE_GAX = False
3540
else:
@@ -284,16 +289,9 @@ def stream_recognize(self, sample, language_code=None,
284289
with the is_final=false flag). If false or
285290
omitted, only is_final=true result(s) are
286291
returned.
287-
288-
:rtype: :class:`~streaming.StreamingResponseContainer`
289-
:returns: An instance of ``StreamingReponseContainer``.
290-
291292
"""
292293
if not _USE_GAX:
293-
raise EnvironmentError('GRPC is required to use this API.')
294-
295-
if sample.stream.closed:
296-
raise ValueError('Stream is closed.')
294+
raise EnvironmentError('gRPC is required to use this API.')
297295

298296
requests = _make_request_stream(sample, language_code=language_code,
299297
max_alternatives=max_alternatives,
@@ -379,3 +377,160 @@ def _build_request_data(sample, language_code=None, max_alternatives=None,
379377
}
380378

381379
return data
380+
381+
382+
def _make_request_stream(sample, language_code=None, max_alternatives=None,
383+
profanity_filter=None, speech_context=None,
384+
single_utterance=None, interim_results=None):
385+
"""Generate stream of requests from sample.
386+
387+
:type sample: :class:`~google.cloud.speech.sample.Sample`
388+
:param sample: Instance of ``Sample`` containing audio information.
389+
390+
:type language_code: str
391+
:param language_code: (Optional) The language of the supplied audio as
392+
BCP-47 language tag. Example: ``'en-GB'``.
393+
If omitted, defaults to ``'en-US'``.
394+
395+
:type max_alternatives: int
396+
:param max_alternatives: (Optional) Maximum number of recognition
397+
hypotheses to be returned. The server may
398+
return fewer than maxAlternatives.
399+
Valid values are 0-30. A value of 0 or 1
400+
will return a maximum of 1. Defaults to 1
401+
402+
:type profanity_filter: bool
403+
:param profanity_filter: If True, the server will attempt to filter
404+
out profanities, replacing all but the
405+
initial character in each filtered word with
406+
asterisks, e.g. ``'f***'``. If False or
407+
omitted, profanities won't be filtered out.
408+
409+
:type speech_context: list
410+
:param speech_context: A list of strings (max 50) containing words and
411+
phrases "hints" so that the speech recognition
412+
is more likely to recognize them. This can be
413+
used to improve the accuracy for specific words
414+
and phrases. This can also be used to add new
415+
words to the vocabulary of the recognizer.
416+
417+
:type single_utterance: boolean
418+
:param single_utterance: [Optional] If false or omitted, the recognizer
419+
will perform continuous recognition
420+
(continuing to process audio even if the user
421+
pauses speaking) until the client closes the
422+
output stream (gRPC API) or when the maximum
423+
time limit has been reached. Multiple
424+
SpeechRecognitionResults with the is_final
425+
flag set to true may be returned.
426+
427+
If true, the recognizer will detect a single
428+
spoken utterance. When it detects that the
429+
user has paused or stopped speaking, it will
430+
return an END_OF_UTTERANCE event and cease
431+
recognition. It will return no more than one
432+
SpeechRecognitionResult with the is_final flag
433+
set to true.
434+
435+
:type interim_results: boolean
436+
:param interim_results: [Optional] If true, interim results (tentative
437+
hypotheses) may be returned as they become
438+
available (these interim results are indicated
439+
with the is_final=false flag). If false or
440+
omitted, only is_final=true result(s) are
441+
returned.
442+
"""
443+
config_request = _make_streaming_config(
444+
sample, language_code=language_code, max_alternatives=max_alternatives,
445+
profanity_filter=profanity_filter, speech_context=speech_context,
446+
single_utterance=single_utterance, interim_results=interim_results)
447+
448+
# The config request MUST go first and not contain any audio data.
449+
yield config_request
450+
451+
while True:
452+
data = sample.stream.read(sample.chunk_size)
453+
if not data:
454+
break
455+
# Optimize the request data size to around 100ms.
456+
yield StreamingRecognizeRequest(audio_content=data)
457+
458+
459+
def _make_streaming_config(sample, language_code,
460+
max_alternatives, profanity_filter,
461+
speech_context, single_utterance,
462+
interim_results):
463+
"""Build streaming configuration.
464+
465+
:type sample: :class:`~google.cloud.speech.sample.Sample`
466+
:param sample: Instance of ``Sample`` containing audio information.
467+
468+
:type language_code: str
469+
:param language_code: (Optional) The language of the supplied audio as
470+
BCP-47 language tag. Example: ``'en-GB'``.
471+
If omitted, defaults to ``'en-US'``.
472+
473+
:type max_alternatives: int
474+
:param max_alternatives: (Optional) Maximum number of recognition
475+
hypotheses to be returned. The server may
476+
return fewer than maxAlternatives.
477+
Valid values are 0-30. A value of 0 or 1
478+
will return a maximum of 1. Defaults to 1
479+
480+
:type profanity_filter: bool
481+
:param profanity_filter: If True, the server will attempt to filter
482+
out profanities, replacing all but the
483+
initial character in each filtered word with
484+
asterisks, e.g. ``'f***'``. If False or
485+
omitted, profanities won't be filtered out.
486+
487+
:type speech_context: list
488+
:param speech_context: A list of strings (max 50) containing words and
489+
phrases "hints" so that the speech recognition
490+
is more likely to recognize them. This can be
491+
used to improve the accuracy for specific words
492+
and phrases. This can also be used to add new
493+
words to the vocabulary of the recognizer.
494+
495+
:type single_utterance: boolean
496+
:param single_utterance: [Optional] If false or omitted, the recognizer
497+
will perform continuous recognition
498+
(continuing to process audio even if the user
499+
pauses speaking) until the client closes the
500+
output stream (gRPC API) or when the maximum
501+
time limit has been reached. Multiple
502+
SpeechRecognitionResults with the is_final
503+
flag set to true may be returned.
504+
505+
If true, the recognizer will detect a single
506+
spoken utterance. When it detects that the
507+
user has paused or stopped speaking, it will
508+
return an END_OF_UTTERANCE event and cease
509+
recognition. It will return no more than one
510+
SpeechRecognitionResult with the is_final flag
511+
set to true.
512+
513+
:type interim_results: boolean
514+
:param interim_results: [Optional] If true, interim results (tentative
515+
hypotheses) may be returned as they become
516+
available (these interim results are indicated
517+
with the is_final=false flag). If false or
518+
omitted, only is_final=true result(s) are
519+
returned.
520+
521+
:rtype: :class:`~StreamingRecognitionConfig`
522+
:returns: Instance of ``StreamingRecognitionConfig``.
523+
"""
524+
config = RecognitionConfig(
525+
encoding=sample.encoding, sample_rate=sample.sample_rate,
526+
language_code=language_code, max_alternatives=max_alternatives,
527+
profanity_filter=profanity_filter, speech_context=speech_context)
528+
529+
streaming_config = StreamingRecognitionConfig(
530+
config=config, single_utterance=single_utterance,
531+
interim_results=interim_results)
532+
533+
config_request = StreamingRecognizeRequest(
534+
streaming_config=streaming_config)
535+
536+
return config_request

speech/google/cloud/speech/sample.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class Sample(object):
5252

5353
def __init__(self, content=None, source_uri=None, stream=None,
5454
encoding=None, sample_rate=None):
55-
if [content, source_uri, stream].count(None) != 2:
55+
if (content, source_uri, stream).count(None) != 2:
5656
raise ValueError('Supply only one of \'content\', \'source_uri\''
5757
' or stream.')
5858

speech/google/cloud/speech/streaming/endpointer_type.py

Lines changed: 0 additions & 14 deletions
This file was deleted.

0 commit comments

Comments
 (0)