docs(samples): add sample code for StreamingAnalyzeContent for live transcription. (#575)

rogers140 · telpirion · commit a132b091fa36 · 2023-03-13T19:55:22.000Z
diff --git a/dialogflow/analyze_content_stream_test.py b/dialogflow/analyze_content_stream_test.py
@@ -0,0 +1,86 @@
+# Copyright 2022 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import uuid
+
+import pytest
+
+import conversation_management
+import conversation_profile_management
+import participant_management
+
+
+PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
+AUDIO_FILE_PATH = "{0}/resources/book_a_room.wav".format(
+    os.path.realpath(os.path.dirname(__file__)),
+)
+
+
+@pytest.fixture
+def conversation_profile_display_name():
+    return f"sample_conversation_profile_{uuid.uuid4()}"
+
+
+@pytest.fixture
+def conversation_profile_id(conversation_profile_display_name):
+    # Create conversation profile.
+    response = conversation_profile_management.create_conversation_profile_article_faq(
+        project_id=PROJECT_ID,
+        display_name=conversation_profile_display_name
+    )
+    conversation_profile_id = response.name.split("conversationProfiles/")[1].rstrip()
+
+    yield conversation_profile_id
+
+    # Delete the conversation profile.
+    conversation_profile_management.delete_conversation_profile(
+        PROJECT_ID, conversation_profile_id
+    )
+
+
+@pytest.fixture
+def conversation_id(conversation_profile_id):
+    # Create conversation.
+    response = conversation_management.create_conversation(
+        project_id=PROJECT_ID, conversation_profile_id=conversation_profile_id
+    )
+    conversation_id = response.name.split("conversations/")[1].rstrip()
+
+    yield conversation_id
+
+    # Complete the conversation.
+    conversation_management.complete_conversation(project_id=PROJECT_ID, conversation_id=conversation_id)
+
+
+@pytest.fixture
+def participant_id(conversation_id):
+    response = participant_management.create_participant(
+        project_id=PROJECT_ID, conversation_id=conversation_id, role="END_USER"
+    )
+    participant_id = response.name.split("participants/")[1].rstrip()
+    yield participant_id
+
+
+# Test live transcription with streaming_analyze_content.
+def test_analyze_content_audio_stream(capsys, conversation_id, participant_id):
+    # Call StreamingAnalyzeContent to transcribe the audio.
+    participant_management.analyze_content_audio_stream(
+        project_id=PROJECT_ID,
+        conversation_id=conversation_id,
+        participant_id=participant_id ,
+        audio_file_path=AUDIO_FILE_PATH,
+    )
+    out, _ = capsys.readouterr()
+    assert "book a room" in out
diff --git a/dialogflow/participant_management.py b/dialogflow/participant_management.py
@@ -109,3 +109,66 @@ def analyze_content_text(project_id, conversation_id, participant_id, text):
 
 
 # [END dialogflow_analyze_content_text]
+
+# [START dialogflow_analyze_content_audio_stream]
+def analyze_content_audio_stream(project_id, conversation_id, participant_id, audio_file_path):
+    """Analyze audio content for END_USER
+
+    Args:
+        project_id: The GCP project linked with the conversation profile.
+        conversation_id: Id of the conversation.
+        participant_id: Id of the participant.
+        audio_file_path: audio file in wav/mp3 format contains utterances of END_USER."""
+
+    # Initialize client that will be used to send requests across threads. This
+    # client only needs to be created once, and can be reused for multiple requests.
+    # After completing all of your requests, call the "__exit__()" method to safely
+    # clean up any remaining background resources. Alternatively, use the client as
+    # a context manager.
+    client = dialogflow.ParticipantsClient()
+
+    participant_path = client.participant_path(
+        project_id, conversation_id, participant_id
+    )
+    # Note: hard coding audio_encoding and sample_rate_hertz for simplicity.
+    audio_encoding = dialogflow.AudioEncoding.AUDIO_ENCODING_LINEAR_16
+    sample_rate_hertz = 16000
+
+    # Generates requests based on the audio files. Will by default use the first channel as
+    # END_USER, and second channel as HUMAN_AGENT.
+    def request_generator(audio_config, audio_file_path):
+
+        # The first request contains the configuration.
+        yield dialogflow.StreamingAnalyzeContentRequest(
+            participant=participant_path, audio_config=audio_config
+        )
+
+        # Here we are reading small chunks of audio data from a local
+        # audio file.  In practice these chunks should come from
+        # an audio input device.
+        with open(audio_file_path, "rb") as audio_file:
+            while True:
+                chunk = audio_file.read(4096)
+                if not chunk:
+                    break
+                # The later requests contains audio data.
+                yield dialogflow.StreamingAnalyzeContentRequest(input_audio=chunk)
+
+    audio_config = dialogflow.InputAudioConfig(
+        audio_encoding=audio_encoding,
+        language_code='en-US',
+        sample_rate_hertz=sample_rate_hertz,
+        single_utterance=True,
+        model='phone_call',
+        # Make sure your project is Dialogflow ES ENTERPRISE_TIER in order to "USE_ENHANCED" model.
+        model_variant='USE_ENHANCED'
+    )
+    requests = request_generator(audio_config, audio_file_path)
+    responses = client.streaming_analyze_content(requests=requests)
+    print("=" * 20)
+    for response in responses:
+        print(f'Transcript: "{response.message.content}".')
+
+    print("=" * 20)
+
+# [END dialogflow_analyze_content_audio_stream]