Skip to content

Commit d1e7642

Browse files
nitsanshaiNitsan Shai
and
Nitsan Shai
authored
samples: Code samples for Speech-to-Text V2 (#460)
Co-authored-by: Nitsan Shai <[email protected]>
1 parent 2cee8e4 commit d1e7642

17 files changed

+882
-4
lines changed
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START speech_adaptation_v2_custom_class_reference]
17+
import io
18+
19+
from google.cloud.speech_v2 import SpeechClient
20+
from google.cloud.speech_v2.types import cloud_speech
21+
22+
23+
def adaptation_v2_custom_class_reference(project_id, recognizer_id, phrase_set_id, custom_class_id, audio_file):
24+
# Instantiates a client
25+
client = SpeechClient()
26+
27+
request = cloud_speech.CreateRecognizerRequest(
28+
parent=f"projects/{project_id}/locations/global",
29+
recognizer_id=recognizer_id,
30+
recognizer=cloud_speech.Recognizer(
31+
language_codes=["en-US"], model="latest_short"
32+
),
33+
)
34+
35+
# Creates a Recognizer
36+
operation = client.create_recognizer(request=request)
37+
recognizer = operation.result()
38+
39+
# Reads a file as bytes
40+
with io.open(audio_file, "rb") as f:
41+
content = f.read()
42+
43+
# Create a persistent CustomClass to reference in phrases
44+
request = cloud_speech.CreateCustomClassRequest(
45+
parent=f"projects/{project_id}/locations/global",
46+
custom_class_id=custom_class_id,
47+
custom_class=cloud_speech.CustomClass(items=[{"value": "Keem"}]))
48+
49+
operation = client.create_custom_class(request=request)
50+
custom_class = operation.result()
51+
52+
# Create a persistent PhraseSet to reference in a recognition request
53+
request = cloud_speech.CreatePhraseSetRequest(
54+
parent=f"projects/{project_id}/locations/global",
55+
phrase_set_id=phrase_set_id,
56+
phrase_set=cloud_speech.PhraseSet(phrases=[{"value": f"${{{custom_class.name}}}", "boost": 20}]))
57+
58+
operation = client.create_phrase_set(request=request)
59+
phrase_set = operation.result()
60+
61+
# Add a reference of the PhraseSet into the recognition request
62+
adaptation = cloud_speech.SpeechAdaptation(
63+
phrase_sets=[
64+
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
65+
phrase_set=phrase_set.name
66+
)
67+
]
68+
)
69+
config = cloud_speech.RecognitionConfig(
70+
auto_decoding_config={}, adaptation=adaptation
71+
)
72+
73+
print(custom_class)
74+
print(phrase_set)
75+
print(config)
76+
77+
request = cloud_speech.RecognizeRequest(
78+
recognizer=recognizer.name, config=config, content=content
79+
)
80+
81+
# Transcribes the audio into text
82+
response = client.recognize(request=request)
83+
84+
for result in response.results:
85+
print("Transcript: {}".format(result.alternatives[0].transcript))
86+
87+
return response
88+
# [END speech_adaptation_v2_custom_class_reference]
89+
90+
91+
if __name__ == "__main__":
92+
adaptation_v2_custom_class_reference()
Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
# Copyright 2022, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
from uuid import uuid4
17+
18+
from google.cloud.speech_v2 import SpeechClient
19+
from google.cloud.speech_v2.types import cloud_speech
20+
21+
import adaptation_v2_custom_class_reference
22+
23+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
24+
25+
26+
def delete_recognizer(name):
27+
client = SpeechClient()
28+
request = cloud_speech.DeleteRecognizerRequest(name=name)
29+
client.delete_recognizer(request=request)
30+
31+
32+
def delete_phrase_set(name):
33+
client = SpeechClient()
34+
request = cloud_speech.DeletePhraseSetRequest(name=name)
35+
client.delete_phrase_set(request=request)
36+
37+
38+
def delete_custom_class(name):
39+
client = SpeechClient()
40+
request = cloud_speech.DeleteCustomClassRequest(name=name)
41+
client.delete_custom_class(request=request)
42+
43+
44+
def test_adaptation_v2_custom_class_reference(capsys):
45+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
46+
47+
recognizer_id = "recognizer-" + str(uuid4())
48+
phrase_set_id = "phrase-set-" + str(uuid4())
49+
custom_class_id = "custom-class-" + str(uuid4())
50+
response = adaptation_v2_custom_class_reference.adaptation_v2_custom_class_reference(
51+
project_id, recognizer_id, phrase_set_id, custom_class_id, os.path.join(RESOURCES, "baby_keem.wav")
52+
)
53+
54+
assert re.search(
55+
r"play Baby Keem",
56+
response.results[0].alternatives[0].transcript,
57+
re.DOTALL | re.I,
58+
)
59+
60+
delete_recognizer(
61+
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
62+
)
63+
64+
delete_phrase_set(
65+
f"projects/{project_id}/locations/global/phraseSets/{phrase_set_id}"
66+
)
67+
68+
delete_custom_class(
69+
f"projects/{project_id}/locations/global/customClasses/{custom_class_id}"
70+
)
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START speech_adaptation_v2_inline_custom_class]
17+
import io
18+
19+
from google.cloud.speech_v2 import SpeechClient
20+
from google.cloud.speech_v2.types import cloud_speech
21+
22+
23+
def adaptation_v2_inline_custom_class(project_id, recognizer_id, audio_file):
24+
# Instantiates a client
25+
client = SpeechClient()
26+
27+
request = cloud_speech.CreateRecognizerRequest(
28+
parent=f"projects/{project_id}/locations/global",
29+
recognizer_id=recognizer_id,
30+
recognizer=cloud_speech.Recognizer(
31+
language_codes=["en-US"], model="latest_short"
32+
),
33+
)
34+
35+
# Creates a Recognizer
36+
operation = client.create_recognizer(request=request)
37+
recognizer = operation.result()
38+
39+
# Reads a file as bytes
40+
with io.open(audio_file, "rb") as f:
41+
content = f.read()
42+
43+
# Build inline phrase set to produce a more accurate transcript
44+
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "${keem}", "boost": 20}])
45+
custom_class = cloud_speech.CustomClass(name="keem", items=[{"value": "Keem"}])
46+
adaptation = cloud_speech.SpeechAdaptation(
47+
phrase_sets=[
48+
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
49+
inline_phrase_set=phrase_set
50+
)
51+
],
52+
custom_classes=[custom_class]
53+
)
54+
config = cloud_speech.RecognitionConfig(
55+
auto_decoding_config={}, adaptation=adaptation
56+
)
57+
58+
request = cloud_speech.RecognizeRequest(
59+
recognizer=recognizer.name, config=config, content=content
60+
)
61+
62+
# Transcribes the audio into text
63+
response = client.recognize(request=request)
64+
65+
for result in response.results:
66+
print("Transcript: {}".format(result.alternatives[0].transcript))
67+
68+
return response
69+
# [END speech_adaptation_v2_inline_custom_class]
70+
71+
72+
if __name__ == "__main__":
73+
adaptation_v2_inline_custom_class()
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Copyright 2022, Google, Inc.
2+
# Licensed under the Apache License, Version 2.0 (the "License");
3+
# you may not use this file except in compliance with the License.
4+
# You may obtain a copy of the License at
5+
#
6+
# http://www.apache.org/licenses/LICENSE-2.0
7+
#
8+
# Unless required by applicable law or agreed to in writing, software
9+
# distributed under the License is distributed on an "AS IS" BASIS,
10+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
# See the License for the specific language governing permissions and
12+
# limitations under the License.
13+
14+
import os
15+
import re
16+
from uuid import uuid4
17+
18+
from google.cloud.speech_v2 import SpeechClient
19+
from google.cloud.speech_v2.types import cloud_speech
20+
21+
import adaptation_v2_inline_custom_class
22+
23+
RESOURCES = os.path.join(os.path.dirname(__file__), "resources")
24+
25+
26+
def delete_recognizer(name):
27+
client = SpeechClient()
28+
request = cloud_speech.DeleteRecognizerRequest(name=name)
29+
client.delete_recognizer(request=request)
30+
31+
32+
def test_adaptation_v2_inline_custom_class(capsys):
33+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
34+
35+
recognizer_id = "recognizer-" + str(uuid4())
36+
response = adaptation_v2_inline_custom_class.adaptation_v2_inline_custom_class(
37+
project_id, recognizer_id, os.path.join(RESOURCES, "baby_keem.wav")
38+
)
39+
40+
assert re.search(
41+
r"play Baby Keem",
42+
response.results[0].alternatives[0].transcript,
43+
re.DOTALL | re.I,
44+
)
45+
46+
delete_recognizer(
47+
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
48+
)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright 2022 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
# [START speech_adaptation_v2_inline_phrase_set]
17+
import io
18+
19+
from google.cloud.speech_v2 import SpeechClient
20+
from google.cloud.speech_v2.types import cloud_speech
21+
22+
23+
def adaptation_v2_inline_phrase_set(project_id, recognizer_id, audio_file):
24+
# Instantiates a client
25+
client = SpeechClient()
26+
27+
request = cloud_speech.CreateRecognizerRequest(
28+
parent=f"projects/{project_id}/locations/global",
29+
recognizer_id=recognizer_id,
30+
recognizer=cloud_speech.Recognizer(
31+
language_codes=["en-US"], model="latest_short"
32+
),
33+
)
34+
35+
# Creates a Recognizer
36+
operation = client.create_recognizer(request=request)
37+
recognizer = operation.result()
38+
39+
# Reads a file as bytes
40+
with io.open(audio_file, "rb") as f:
41+
content = f.read()
42+
43+
# Build inline phrase set to produce a more accurate transcript
44+
phrase_set = cloud_speech.PhraseSet(phrases=[{"value": "Keem", "boost": 10}])
45+
adaptation = cloud_speech.SpeechAdaptation(
46+
phrase_sets=[
47+
cloud_speech.SpeechAdaptation.AdaptationPhraseSet(
48+
inline_phrase_set=phrase_set
49+
)
50+
]
51+
)
52+
config = cloud_speech.RecognitionConfig(
53+
auto_decoding_config={}, adaptation=adaptation
54+
)
55+
56+
request = cloud_speech.RecognizeRequest(
57+
recognizer=recognizer.name, config=config, content=content
58+
)
59+
60+
# Transcribes the audio into text
61+
response = client.recognize(request=request)
62+
63+
for result in response.results:
64+
print("Transcript: {}".format(result.alternatives[0].transcript))
65+
66+
return response
67+
# [END speech_adaptation_v2_inline_phrase_set]
68+
69+
70+
if __name__ == "__main__":
71+
adaptation_v2_inline_phrase_set()

0 commit comments

Comments
 (0)