Add samples for speech diarization ga (auto-punctuation samples alrea… (#1744)

nnegrey · kurtisvg · commit 816eb785db15 · 2019-11-19T08:45:33.000-08:00
diff --git a/speech/cloud-client/pom.xml b/speech/cloud-client/pom.xml
@@ -40,7 +40,7 @@
     <dependency>
       <groupId>com.google.cloud</groupId>
       <artifactId>google-cloud-speech</artifactId>
-      <version>1.21.0</version>
+      <version>1.22.0</version>
     </dependency>
     <!-- [END speech_quickstart_dependencies] -->
     <dependency>
diff --git a/speech/cloud-client/src/main/java/com/example/speech/TranscribeDiarization.java b/speech/cloud-client/src/main/java/com/example/speech/TranscribeDiarization.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_diarization]
+
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.RecognizeResponse;
+import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.WordInfo;
+import com.google.protobuf.ByteString;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+class TranscribeDiarization {
+
+  static void transcribeDiarization() throws IOException {
+    // TODO(developer): Replace these variables before running the sample.
+    String fileName = "resources/commercial_mono.wav";
+    transcribeDiarization(fileName);
+  }
+
+  // Transcribe the given audio file using speaker diarization.
+  static void transcribeDiarization(String fileName) throws IOException {
+    Path path = Paths.get(fileName);
+    byte[] content = Files.readAllBytes(path);
+
+    // Initialize client that will be used to send requests. This client only needs to be created
+    // once, and can be reused for multiple requests. After completing all of your requests, call
+    // the "close" method on the client to safely clean up any remaining background resources.
+    try (SpeechClient client = SpeechClient.create()) {
+      // Get the contents of the local audio file
+      RecognitionAudio recognitionAudio =
+              RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
+      SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
+              .setEnableSpeakerDiarization(true)
+              .setMinSpeakerCount(2)
+              .setMaxSpeakerCount(2)
+              .build();
+      // Configure request to enable Speaker diarization
+      RecognitionConfig config = RecognitionConfig.newBuilder()
+              .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+              .setLanguageCode("en-US")
+              .setSampleRateHertz(8000)
+              .setDiarizationConfig(speakerDiarizationConfig)
+              .build();
+
+      // Perform the transcription request
+      RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);
+
+      // Speaker Tags are only included in the last result object, which has only one alternative.
+      SpeechRecognitionAlternative alternative =
+              recognizeResponse.getResults(
+                      recognizeResponse.getResultsCount() - 1).getAlternatives(0);
+      // The alternative is made up of WordInfo objects that contain the speaker_tag.
+      WordInfo wordInfo = alternative.getWords(0);
+      int currentSpeakerTag = wordInfo.getSpeakerTag();
+      // For each word, get all the words associated with one speaker, once the speaker changes,
+      // add a new line with the new speaker and their spoken words.
+      StringBuilder speakerWords = new StringBuilder(
+              String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+      for (int i = 1; i < alternative.getWordsCount(); i++) {
+        wordInfo = alternative.getWords(i);
+        if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+          speakerWords.append(" ");
+          speakerWords.append(wordInfo.getWord());
+        } else {
+          speakerWords.append(
+                  String.format("\nSpeaker %d: %s",
+                          wordInfo.getSpeakerTag(),
+                          wordInfo.getWord()));
+          currentSpeakerTag = wordInfo.getSpeakerTag();
+        }
+      }
+      System.out.println(speakerWords.toString());
+    }
+  }
+}
+// [END speech_transcribe_diarization]
diff --git a/speech/cloud-client/src/main/java/com/example/speech/TranscribeDiarizationGcs.java b/speech/cloud-client/src/main/java/com/example/speech/TranscribeDiarizationGcs.java
@@ -0,0 +1,101 @@
+/*
+ * Copyright 2019 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+// [START speech_transcribe_diarization_gcs]
+
+import com.google.api.gax.longrunning.OperationFuture;
+import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
+import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
+import com.google.cloud.speech.v1.RecognitionAudio;
+import com.google.cloud.speech.v1.RecognitionConfig;
+import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
+import com.google.cloud.speech.v1.SpeechClient;
+import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
+import com.google.cloud.speech.v1.WordInfo;
+
+import java.io.IOException;
+import java.util.concurrent.ExecutionException;
+
+public class TranscribeDiarizationGcs {
+
+  static void transcribeDiarizationGcs() throws IOException, ExecutionException,
+          InterruptedException {
+    // TODO(developer): Replace these variables before running the sample.
+    String gcsUri = "gs://cloud-samples-data/speech/commercial_mono.wav";
+    transcribeDiarizationGcs(gcsUri);
+  }
+
+  // Transcribe the give gcs file using speaker diarization
+  public static void transcribeDiarizationGcs(String gcsUri) throws IOException,
+          ExecutionException, InterruptedException {
+    // Initialize client that will be used to send requests. This client only needs to be created
+    // once, and can be reused for multiple requests. After completing all of your requests, call
+    // the "close" method on the client to safely clean up any remaining background resources.
+    try (SpeechClient speechClient = SpeechClient.create()) {
+      SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
+              .setEnableSpeakerDiarization(true)
+              .setMinSpeakerCount(2)
+              .setMaxSpeakerCount(2)
+              .build();
+      // Configure request to enable Speaker diarization
+      RecognitionConfig config =
+              RecognitionConfig.newBuilder()
+                      .setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
+                      .setLanguageCode("en-US")
+                      .setSampleRateHertz(8000)
+                      .setDiarizationConfig(speakerDiarizationConfig)
+                      .build();
+      // Set the remote path for the audio file
+      RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
+
+      // Use non-blocking call for getting file transcription
+      OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> future =
+              speechClient.longRunningRecognizeAsync(config, audio);
+      System.out.println("Waiting for response...");
+
+      // Speaker Tags are only included in the last result object, which has only one alternative.
+      LongRunningRecognizeResponse response = future.get();
+      SpeechRecognitionAlternative alternative =
+              response.getResults(
+                      response.getResultsCount() - 1)
+                      .getAlternatives(0);
+      // The alternative is made up of WordInfo objects that contain the speaker_tag.
+      WordInfo wordInfo = alternative.getWords(0);
+      int currentSpeakerTag = wordInfo.getSpeakerTag();
+      // For each word, get all the words associated with one speaker, once the speaker changes,
+      // add a new line with the new speaker and their spoken words.
+      StringBuilder speakerWords = new StringBuilder(
+              String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
+      for (int i = 1; i < alternative.getWordsCount(); i++) {
+        wordInfo = alternative.getWords(i);
+        if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
+          speakerWords.append(" ");
+          speakerWords.append(wordInfo.getWord());
+        } else {
+          speakerWords.append(
+                  String.format("\nSpeaker %d: %s",
+                          wordInfo.getSpeakerTag(),
+                          wordInfo.getWord()));
+          currentSpeakerTag = wordInfo.getSpeakerTag();
+        }
+      }
+      System.out.println(speakerWords.toString());
+    }
+  }
+}
+// [END speech_transcribe_diarization_gcs]
diff --git a/speech/cloud-client/src/test/java/com/example/speech/TranscribeDiarizationIT.java b/speech/cloud-client/src/test/java/com/example/speech/TranscribeDiarizationIT.java
@@ -0,0 +1,84 @@
+/*
+ * Copyright 2018 Google Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.example.speech;
+
+import static com.google.common.truth.Truth.assertThat;
+import static junit.framework.TestCase.assertNotNull;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.concurrent.ExecutionException;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+// Tests for speech Transcribe Diarization samples.
+@RunWith(JUnit4.class)
+@SuppressWarnings("checkstyle:abbreviationaswordinname")
+public class TranscribeDiarizationIT {
+  private ByteArrayOutputStream bout;
+  private PrintStream out;
+
+  // The path to the audio file to transcribe
+  private String recognitionAudioFile = "./resources/commercial_mono.wav";
+
+  private static void requireEnvVar(String varName) {
+    assertNotNull(
+            System.getenv(varName),
+            "Environment variable '%s' is required to perform these tests.".format(varName)
+    );
+  }
+
+  @BeforeClass
+  public static void checkRequirements() {
+    requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS");
+  }
+
+  @Before
+  public void setUp() {
+    bout = new ByteArrayOutputStream();
+    out = new PrintStream(bout);
+    System.setOut(out);
+  }
+
+  @After
+  public void tearDown() {
+    System.setOut(null);
+  }
+
+  @Test
+  public void testDiarization() throws IOException {
+    TranscribeDiarization.transcribeDiarization(recognitionAudioFile);
+    String got = bout.toString();
+    assertThat(got).contains("Speaker 1: I'm here");
+    assertThat(got).contains("Speaker 2: Hi, I'd like to buy a");
+  }
+
+  @Test
+  public void testDiarizationGcs() throws IOException, ExecutionException, InterruptedException {
+    TranscribeDiarizationGcs.transcribeDiarizationGcs(
+            "gs://cloud-samples-data/speech/commercial_mono.wav");
+    String got = bout.toString();
+    assertThat(got).contains("Speaker 1: I'm here");
+    assertThat(got).contains("Speaker 2: Hi, I'd like to buy a");
+  }
+}