Skip to content

Commit a132b09

Browse files
rogers140telpirion
authored andcommitted
docs(samples): add sample code for StreamingAnalyzeContent for live transcription. (#575)
1 parent fee4a65 commit a132b09

File tree

2 files changed

+149
-0
lines changed

2 files changed

+149
-0
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
# Copyright 2022 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import uuid
17+
18+
import pytest
19+
20+
import conversation_management
21+
import conversation_profile_management
22+
import participant_management
23+
24+
25+
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
26+
AUDIO_FILE_PATH = "{0}/resources/book_a_room.wav".format(
27+
os.path.realpath(os.path.dirname(__file__)),
28+
)
29+
30+
31+
@pytest.fixture
32+
def conversation_profile_display_name():
33+
return f"sample_conversation_profile_{uuid.uuid4()}"
34+
35+
36+
@pytest.fixture
37+
def conversation_profile_id(conversation_profile_display_name):
38+
# Create conversation profile.
39+
response = conversation_profile_management.create_conversation_profile_article_faq(
40+
project_id=PROJECT_ID,
41+
display_name=conversation_profile_display_name
42+
)
43+
conversation_profile_id = response.name.split("conversationProfiles/")[1].rstrip()
44+
45+
yield conversation_profile_id
46+
47+
# Delete the conversation profile.
48+
conversation_profile_management.delete_conversation_profile(
49+
PROJECT_ID, conversation_profile_id
50+
)
51+
52+
53+
@pytest.fixture
54+
def conversation_id(conversation_profile_id):
55+
# Create conversation.
56+
response = conversation_management.create_conversation(
57+
project_id=PROJECT_ID, conversation_profile_id=conversation_profile_id
58+
)
59+
conversation_id = response.name.split("conversations/")[1].rstrip()
60+
61+
yield conversation_id
62+
63+
# Complete the conversation.
64+
conversation_management.complete_conversation(project_id=PROJECT_ID, conversation_id=conversation_id)
65+
66+
67+
@pytest.fixture
68+
def participant_id(conversation_id):
69+
response = participant_management.create_participant(
70+
project_id=PROJECT_ID, conversation_id=conversation_id, role="END_USER"
71+
)
72+
participant_id = response.name.split("participants/")[1].rstrip()
73+
yield participant_id
74+
75+
76+
# Test live transcription with streaming_analyze_content.
77+
def test_analyze_content_audio_stream(capsys, conversation_id, participant_id):
78+
# Call StreamingAnalyzeContent to transcribe the audio.
79+
participant_management.analyze_content_audio_stream(
80+
project_id=PROJECT_ID,
81+
conversation_id=conversation_id,
82+
participant_id=participant_id ,
83+
audio_file_path=AUDIO_FILE_PATH,
84+
)
85+
out, _ = capsys.readouterr()
86+
assert "book a room" in out

dialogflow/participant_management.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,3 +109,66 @@ def analyze_content_text(project_id, conversation_id, participant_id, text):
109109

110110

111111
# [END dialogflow_analyze_content_text]
112+
113+
# [START dialogflow_analyze_content_audio_stream]
114+
def analyze_content_audio_stream(project_id, conversation_id, participant_id, audio_file_path):
115+
"""Analyze audio content for END_USER
116+
117+
Args:
118+
project_id: The GCP project linked with the conversation profile.
119+
conversation_id: Id of the conversation.
120+
participant_id: Id of the participant.
121+
audio_file_path: audio file in wav/mp3 format contains utterances of END_USER."""
122+
123+
# Initialize client that will be used to send requests across threads. This
124+
# client only needs to be created once, and can be reused for multiple requests.
125+
# After completing all of your requests, call the "__exit__()" method to safely
126+
# clean up any remaining background resources. Alternatively, use the client as
127+
# a context manager.
128+
client = dialogflow.ParticipantsClient()
129+
130+
participant_path = client.participant_path(
131+
project_id, conversation_id, participant_id
132+
)
133+
# Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
134+
audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16
135+
sample_rate_hertz = 16000
136+
137+
# Generates requests based on the audio files. Will by default use the first channel as
138+
# END_USER, and second channel as HUMAN_AGENT.
139+
def request_generator(audio_config, audio_file_path):
140+
141+
# The first request contains the configuration.
142+
yield dialogflow.StreamingAnalyzeContentRequest(
143+
participant=participant_path, audio_config=audio_config
144+
)
145+
146+
# Here we are reading small chunks of audio data from a local
147+
# audio file. In practice these chunks should come from
148+
# an audio input device.
149+
with open(audio_file_path, "rb") as audio_file:
150+
while True:
151+
chunk = audio_file.read(4096)
152+
if not chunk:
153+
break
154+
# The later requests contains audio data.
155+
yield dialogflow.StreamingAnalyzeContentRequest(input_audio=chunk)
156+
157+
audio_config = dialogflow.InputAudioConfig(
158+
audio_encoding=audio_encoding,
159+
language_code='en-US',
160+
sample_rate_hertz=sample_rate_hertz,
161+
single_utterance=True,
162+
model='phone_call',
163+
# Make sure your project is Dialogflow ES ENTERPRISE_TIER in order to "USE_ENHANCED" model.
164+
model_variant='USE_ENHANCED'
165+
)
166+
requests = request_generator(audio_config, audio_file_path)
167+
responses = client.streaming_analyze_content(requests=requests)
168+
print("=" * 20)
169+
for response in responses:
170+
print(f'Transcript: "{response.message.content}".')
171+
172+
print("=" * 20)
173+
174+
# [END dialogflow_analyze_content_audio_stream]

0 commit comments

Comments
 (0)