Skip to content

Commit cc2137d

Browse files
nitsanshaiAJ Morozoffparthea
authored
docs(samples): Update code samples for adaptation and VAD (#462)
Co-authored-by: AJ Morozoff <[email protected]> Co-authored-by: Anthonios Partheniou <[email protected]>
1 parent bcc47c1 commit cc2137d

15 files changed

+390
-17
lines changed

speech/snippets/adaptation_v2_custom_class_reference.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_i
4444
request = cloud_speech.CreateCustomClassRequest(
4545
parent=f"projects/{project_id}/locations/global",
4646
custom_class_id=custom_class_id,
47-
custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}]))
47+
custom_class=cloud_speech.CustomClass(items=[{"value": "fare"}]))
4848

4949
operation = client.create_custom_class(request=request)
5050
custom_class = operation.result()
@@ -70,10 +70,6 @@ def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_i
7070
auto_decoding_config={}, adaptation=adaptation
7171
)
7272

73-
print(custom_class)
74-
print(phrase_set)
75-
print(config)
76-
7773
request = cloud_speech.RecognizeRequest(
7874
recognizer=recognizer.name, config=config, content=content
7975
)

speech/snippets/adaptation_v2_custom_class_reference_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,11 +48,11 @@ def test_adaptation_v2_custom_class_reference(capsys):
4848
phrase_set_id = "phrase-set-" + str(uuid4())
4949
custom_class_id = "custom-class-" + str(uuid4())
5050
response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
51-
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav")
51+
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "fair.wav")
5252
)
5353

5454
assert re.search(
55-
r"play Baby Keem",
55+
r"the word is fare",
5656
response.results[0].alternatives[0].transcript,
5757
re.DOTALL | re.I,
5858
)

speech/snippets/adaptation_v2_inline_custom_class.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file):
4141
content = f.read()
4242

4343
# Build inline phrase set to produce a more accurate transcript
44-
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}])
45-
custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}])
44+
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${fare}", "boost": 20}])
45+
custom_class = cloud_speech.CustomClass(name="fare", items=[{"value": "fare"}])
4646
adaptation = cloud_speech.SpeechAdaptation(
4747
phrase_sets=[
4848
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(

speech/snippets/adaptation_v2_inline_custom_class_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ def test_adaptation_v2_inline_custom_class(capsys):
3434

3535
recognizer_id = "recognizer-" + str(uuid4())
3636
response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
37-
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
37+
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
3838
)
3939

4040
assert re.search(
41-
r"play Baby Keem",
41+
r"the word is fare",
4242
response.results[0].alternatives[0].transcript,
4343
re.DOTALL | re.I,
4444
)

speech/snippets/adaptation_v2_inline_phrase_set.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file):
4141
content = f.read()
4242

4343
# Build inline phrase set to produce a more accurate transcript
44-
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])
44+
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "fare", "boost": 10}])
4545
adaptation = cloud_speech.SpeechAdaptation(
4646
phrase_sets=[
4747
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(

speech/snippets/adaptation_v2_inline_phrase_set_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,11 +34,11 @@ def test_adaptation_v2_inline_phrase_set(capsys):
3434

3535
recognizer_id = "recognizer-" + str(uuid4())
3636
response = adaptation_v2_inline_phrase_set.adaptation_v2_inline_phrase_set(
37-
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
37+
project_id, recognizer_id, os.path.join(RESOURCES, "fair.wav")
3838
)
3939

4040
assert re.search(
41-
r"play Baby Keem",
41+
r"the word is fare",
4242
response.results[0].alternatives[0].transcript,
4343
re.DOTALL | re.I,
4444
)

speech/snippets/adaptation_v2_phrase_set_reference.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def adaptation_v2_phrase_set_reference(project_id, recognizer_id, phrase_set_id,
4444
request = cloud_speech.CreatePhraseSetRequest(
4545
parent=f"projects/{project_id}/locations/global",
4646
phrase_set_id=phrase_set_id,
47-
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}]))
47+
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": "fare", "boost": 10}]))
4848

4949
operation = client.create_phrase_set(request=request)
5050
phrase_set = operation.result()

speech/snippets/adaptation_v2_phrase_set_reference_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,11 @@ def test_adaptation_v2_phrase_set_reference(capsys):
4141
recognizer_id = "recognizer-" + str(uuid4())
4242
phrase_set_id = "phrase-set-" + str(uuid4())
4343
response = adaptation_v2_phrase_set_reference.adaptation_v2_phrase_set_reference(
44-
project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "baby_keem.wav")
44+
project_id, recognizer_id, phrase_set_id, os.path.join(RESOURCES, "fair.wav")
4545
)
4646

4747
assert re.search(
48-
r"play Baby Keem",
48+
r"the word is fare",
4949
response.results[0].alternatives[0].transcript,
5050
re.DOTALL | re.I,
5151
)
Binary file not shown.
-158 KB
Binary file not shown.

speech/snippets/resources/fair.wav

60.5 KB
Binary file not shown.
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import argparse
17+
18+
# [START speech_transcribe_streaming_voice_activity_events]
19+
import io
20+
21+
from google.cloud.speech_v2 import SpeechClient
22+
from google.cloud.speech_v2.types import cloud_speech
23+
24+
25+
def transcribe_streaming_voice_activity_events(project_id, recognizer_id, audio_file):
26+
# Instantiates a client
27+
client = SpeechClient()
28+
29+
request = cloud_speech.CreateRecognizerRequest(
30+
parent=f"projects/{project_id}/locations/global",
31+
recognizer_id=recognizer_id,
32+
recognizer=cloud_speech.Recognizer(
33+
language_codes=["en-US"], model="latest_long"
34+
),
35+
)
36+
37+
# Creates a Recognizer
38+
operation = client.create_recognizer(request=request)
39+
recognizer = operation.result()
40+
41+
# Reads a file as bytes
42+
with io.open(audio_file, "rb") as f:
43+
content = f.read()
44+
45+
# In practice, stream should be a generator yielding chunks of audio data
46+
chunk_length = len(content) // 5
47+
stream = [
48+
content[start : start + chunk_length]
49+
for start in range(0, len(content), chunk_length)
50+
]
51+
audio_requests = (
52+
cloud_speech.StreamingRecognizeRequest(audio=audio) for audio in stream
53+
)
54+
55+
recognition_config = cloud_speech.RecognitionConfig(auto_decoding_config={})
56+
57+
# Sets the flag to enable voice activity events
58+
streaming_features = cloud_speech.StreamingRecognitionFeatures(
59+
enable_voice_activity_events=True
60+
)
61+
streaming_config = cloud_speech.StreamingRecognitionConfig(
62+
config=recognition_config, streaming_features=streaming_features
63+
)
64+
65+
config_request = cloud_speech.StreamingRecognizeRequest(
66+
recognizer=recognizer.name, streaming_config=streaming_config
67+
)
68+
69+
def requests(config, audio):
70+
yield config
71+
for message in audio:
72+
yield message
73+
74+
# Transcribes the audio into text
75+
responses_iterator = client.streaming_recognize(
76+
requests=requests(config_request, audio_requests)
77+
)
78+
responses = []
79+
for response in responses_iterator:
80+
responses.append(response)
81+
if (
82+
response.speech_event_type
83+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
84+
):
85+
print("Speech started.")
86+
if (
87+
response.speech_event_type
88+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_END
89+
):
90+
print("Speech ended.")
91+
for result in response.results:
92+
print("Transcript: {}".format(result.alternatives[0].transcript))
93+
94+
return responses
95+
# [END speech_transcribe_streaming_voice_activity_events]
96+
97+
98+
if __name__ == "__main__":
99+
parser = argparse.ArgumentParser(
100+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
101+
)
102+
parser.add_argument("project_id", help="project to create recognizer in")
103+
parser.add_argument("recognizer_id", help="name of recognizer to create")
104+
parser.add_argument("audio_file", help="audio file to stream")
105+
args = parser.parse_args()
106+
transcribe_streaming_voice_activity_events(
107+
args.project_id, args.recognizer_id, args.audio_file
108+
)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
# Copyright 2022, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
from uuid import uuid4
17+
18+
from google.cloud.speech_v2 import SpeechClient
19+
from google.cloud.speech_v2.types import cloud_speech
20+
21+
import transcribe_streaming_voice_activity_events
22+
23+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
24+
25+
26+
def delete_recognizer(name):
27+
client = SpeechClient()
28+
request = cloud_speech.DeleteRecognizerRequest(name=name)
29+
client.delete_recognizer(request=request)
30+
31+
32+
def test_transcribe_streaming_voice_activity_events(capsys):
33+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
34+
35+
recognizer_id = "recognizer-" + str(uuid4())
36+
responses = transcribe_streaming_voice_activity_events.transcribe_streaming_voice_activity_events(
37+
project_id, recognizer_id, os.path.join(RESOURCES, "audio.wav")
38+
)
39+
40+
transcript = ""
41+
for response in responses:
42+
for result in response.results:
43+
transcript += result.alternatives[0].transcript
44+
45+
assert (
46+
responses[0].speech_event_type
47+
== cloud_speech.StreamingRecognizeResponse.SpeechEventType.SPEECH_ACTIVITY_BEGIN
48+
)
49+
50+
assert re.search(
51+
r"how old is the Brooklyn Bridge",
52+
transcript,
53+
re.DOTALL | re.I,
54+
)
55+
56+
delete_recognizer(
57+
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
58+
)

0 commit comments

Comments
 (0)