Skip to content

Commit 7ef57df

Browse files
authored
docs(samples): More STT v2 samples (#10443)
* docs(samples): More STT v2 samples Added samples for: * Enabling CMEK * BatchRecognize (GCS output, inline output, dynamic batching, multiple files) * Multiple languages recognition * Various recognizer features
1 parent da33589 commit 7ef57df

21 files changed

+1228
-9
lines changed

speech/snippets/create_recognizer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,11 @@ def create_recognizer(project_id: str, recognizer_id: str) -> cloud_speech.Recog
2727
request = cloud_speech.CreateRecognizerRequest(
2828
parent=f"projects/{project_id}/locations/global",
2929
recognizer_id=recognizer_id,
30-
recognizer=cloud_speech.Recognizer(language_codes=["en-US"], model="long"),
30+
recognizer=cloud_speech.Recognizer(
31+
default_recognition_config=cloud_speech.RecognitionConfig(
32+
language_codes=["en-US"], model="long"
33+
),
34+
),
3135
)
3236

3337
operation = client.create_recognizer(request=request)

speech/snippets/create_recognizer_test.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,19 @@ def delete_recognizer(name: str) -> None:
3131

3232

3333
@Retry()
34-
def test_create_recognizer(capsys: pytest.CaptureFixture) -> None:
34+
def test_create_recognizer(
35+
capsys: pytest.CaptureFixture, request: pytest.FixtureRequest
36+
) -> None:
3537
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
38+
recognizer_id = "recognizer-" + str(uuid4())
3639

37-
recognizer = create_recognizer.create_recognizer(
38-
project_id, "recognizer-" + str(uuid4())
39-
)
40-
delete_recognizer(recognizer.name)
40+
def cleanup():
41+
delete_recognizer(
42+
f"projects/{project_id}/locations/global/recognizers/{recognizer_id}"
43+
)
44+
45+
request.addfinalizer(cleanup)
46+
47+
recognizer = create_recognizer.create_recognizer(project_id, recognizer_id)
48+
49+
assert recognizer_id in recognizer.name

speech/snippets/enable_cmek.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import argparse
17+
18+
# [START speech_enable_cmek]
19+
from google.cloud.speech_v2 import SpeechClient
20+
from google.cloud.speech_v2.types import cloud_speech
21+
22+
23+
def enable_cmek(
24+
project_id: str,
25+
kms_key_name: str,
26+
) -> cloud_speech.RecognizeResponse:
27+
"""Enable CMEK in a project and region."""
28+
# Instantiates a client
29+
client = SpeechClient()
30+
31+
request = cloud_speech.UpdateConfigRequest(
32+
config=cloud_speech.Config(
33+
name=f"projects/{project_id}/locations/global/config",
34+
kms_key_name=kms_key_name,
35+
),
36+
update_mask={"paths": ["kms_key_name"]},
37+
)
38+
39+
# Updates the KMS key for the project and region.
40+
response = client.update_config(request=request)
41+
42+
print(f"Updated KMS key: {response.kms_key_name}")
43+
44+
return response
45+
46+
47+
# [END speech_enable_cmek]
48+
49+
50+
if __name__ == "__main__":
51+
parser = argparse.ArgumentParser(
52+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
53+
)
54+
parser.add_argument("project_id", help="GCP Project ID")
55+
parser.add_argument("kms_key_name", help="Resource path of a KMS key")
56+
args = parser.parse_args()
57+
enable_cmek(args.project_id, args.kms_key_name)

speech/snippets/enable_cmek_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
17+
from google.api_core.retry import Retry
18+
19+
import enable_cmek
20+
21+
22+
@Retry()
23+
def test_enable_cmek() -> None:
24+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
25+
26+
response = enable_cmek.enable_cmek(
27+
project_id,
28+
"",
29+
)
30+
31+
assert response.kms_key_name == ""
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import argparse
17+
18+
# [START speech_transcribe_batch_dynamic_batching_v2]
19+
from google.cloud.speech_v2 import SpeechClient
20+
from google.cloud.speech_v2.types import cloud_speech
21+
22+
23+
def transcribe_batch_dynamic_batching_v2(
24+
project_id: str,
25+
gcs_uri: str,
26+
) -> cloud_speech.BatchRecognizeResults:
27+
"""Transcribes audio from a Google Cloud Storage URI.
28+
29+
Args:
30+
project_id: The Google Cloud project ID.
31+
gcs_uri: The Google Cloud Storage URI.
32+
33+
Returns:
34+
The RecognizeResponse.
35+
"""
36+
# Instantiates a client
37+
client = SpeechClient()
38+
39+
config = cloud_speech.RecognitionConfig(
40+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
41+
language_codes=["en-US"],
42+
model="long",
43+
)
44+
45+
file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
46+
47+
request = cloud_speech.BatchRecognizeRequest(
48+
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
49+
config=config,
50+
files=[file_metadata],
51+
recognition_output_config=cloud_speech.RecognitionOutputConfig(
52+
inline_response_config=cloud_speech.InlineOutputConfig(),
53+
),
54+
processing_strategy=cloud_speech.BatchRecognizeRequest.ProcessingStrategy.DYNAMIC_BATCHING,
55+
)
56+
57+
# Transcribes the audio into text
58+
operation = client.batch_recognize(request=request)
59+
60+
print("Waiting for operation to complete...")
61+
response = operation.result(timeout=120)
62+
63+
for result in response.results[gcs_uri].transcript.results:
64+
print(f"Transcript: {result.alternatives[0].transcript}")
65+
66+
return response.results[gcs_uri].transcript
67+
68+
69+
# [END speech_transcribe_batch_dynamic_batching_v2]
70+
71+
72+
if __name__ == "__main__":
73+
parser = argparse.ArgumentParser(
74+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
75+
)
76+
parser.add_argument("project_id", help="GCP Project ID")
77+
parser.add_argument("gcs_uri", help="URI to GCS file")
78+
args = parser.parse_args()
79+
transcribe_batch_dynamic_batching_v2(args.project_id, args.gcs_uri)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import re
17+
18+
from flaky import flaky
19+
20+
import pytest
21+
22+
import transcribe_batch_dynamic_batching_v2
23+
24+
25+
_TEST_AUDIO_FILE_PATH = "gs://cloud-samples-data/speech/audio.flac"
26+
27+
28+
@flaky(max_runs=10, min_passes=1)
29+
def test_transcribe_batch_dynamic_batching_v2(
30+
capsys: pytest.CaptureFixture,
31+
) -> None:
32+
project_id = os.getenv("GOOGLE_CLOUD_PROJECT")
33+
34+
response = (
35+
transcribe_batch_dynamic_batching_v2.transcribe_batch_dynamic_batching_v2(
36+
project_id, _TEST_AUDIO_FILE_PATH
37+
)
38+
)
39+
40+
assert re.search(
41+
r"how old is the Brooklyn Bridge",
42+
response.results[0].alternatives[0].transcript,
43+
re.DOTALL | re.I,
44+
)
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
import argparse
17+
18+
# [START speech_transcribe_batch_gcs_input_gcs_output_v2]
19+
import re
20+
21+
from google.cloud import storage
22+
from google.cloud.speech_v2 import SpeechClient
23+
from google.cloud.speech_v2.types import cloud_speech
24+
25+
26+
def transcribe_batch_gcs_input_gcs_output_v2(
27+
project_id: str,
28+
gcs_uri: str,
29+
gcs_output_path: str,
30+
) -> cloud_speech.BatchRecognizeResults:
31+
"""Transcribes audio from a Google Cloud Storage URI.
32+
33+
Args:
34+
project_id: The Google Cloud project ID.
35+
gcs_uri: The Google Cloud Storage URI.
36+
gcs_output_path: The Cloud Storage URI to which to write the transcript.
37+
38+
Returns:
39+
The BatchRecognizeResults message.
40+
"""
41+
# Instantiates a client
42+
client = SpeechClient()
43+
44+
config = cloud_speech.RecognitionConfig(
45+
auto_decoding_config=cloud_speech.AutoDetectDecodingConfig(),
46+
language_codes=["en-US"],
47+
model="long",
48+
)
49+
50+
file_metadata = cloud_speech.BatchRecognizeFileMetadata(uri=gcs_uri)
51+
52+
request = cloud_speech.BatchRecognizeRequest(
53+
recognizer=f"projects/{project_id}/locations/global/recognizers/_",
54+
config=config,
55+
files=[file_metadata],
56+
recognition_output_config=cloud_speech.RecognitionOutputConfig(
57+
gcs_output_config=cloud_speech.GcsOutputConfig(
58+
uri=gcs_output_path,
59+
),
60+
),
61+
)
62+
63+
# Transcribes the audio into text
64+
operation = client.batch_recognize(request=request)
65+
66+
print("Waiting for operation to complete...")
67+
response = operation.result(timeout=120)
68+
69+
file_results = response.results[gcs_uri]
70+
71+
print(f"Operation finished. Fetching results from {file_results.uri}...")
72+
output_bucket, output_object = re.match(
73+
r"gs://([^/]+)/(.*)", file_results.uri
74+
).group(1, 2)
75+
76+
# Instantiates a Cloud Storage client
77+
storage_client = storage.Client()
78+
79+
# Fetch results from Cloud Storage
80+
bucket = storage_client.bucket(output_bucket)
81+
blob = bucket.blob(output_object)
82+
results_bytes = blob.download_as_bytes()
83+
batch_recognize_results = cloud_speech.BatchRecognizeResults.from_json(
84+
results_bytes, ignore_unknown_fields=True
85+
)
86+
87+
for result in batch_recognize_results.results:
88+
print(f"Transcript: {result.alternatives[0].transcript}")
89+
90+
return batch_recognize_results
91+
92+
93+
# [END speech_transcribe_batch_gcs_input_gcs_output_v2]
94+
95+
96+
if __name__ == "__main__":
97+
parser = argparse.ArgumentParser(
98+
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
99+
)
100+
parser.add_argument("project_id", help="GCP Project ID")
101+
parser.add_argument("gcs_uri", help="URI to GCS file")
102+
parser.add_argument(
103+
"gcs_output_path", help="GCS URI to which to write the transcript"
104+
)
105+
args = parser.parse_args()
106+
transcribe_batch_gcs_input_gcs_output_v2(
107+
args.project_id, args.gcs_uri, args.gcs_output_path
108+
)

0 commit comments

Comments
 (0)