Skip to content

Commit 4c18563

Browse files
nnegreychingor13
authored andcommitted
samples: Add samples for speech diarization ga (auto-punctuation samples alrea… (#1744)
1 parent 3c274eb commit 4c18563

File tree

3 files changed

+284
-0
lines changed

3 files changed

+284
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_transcribe_diarization]
20+
21+
import com.google.cloud.speech.v1.RecognitionAudio;
22+
import com.google.cloud.speech.v1.RecognitionConfig;
23+
import com.google.cloud.speech.v1.RecognizeResponse;
24+
import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
25+
import com.google.cloud.speech.v1.SpeechClient;
26+
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
27+
import com.google.cloud.speech.v1.WordInfo;
28+
import com.google.protobuf.ByteString;
29+
30+
import java.io.IOException;
31+
import java.nio.file.Files;
32+
import java.nio.file.Path;
33+
import java.nio.file.Paths;
34+
35+
class TranscribeDiarization {
36+
37+
static void transcribeDiarization() throws IOException {
38+
// TODO(developer): Replace these variables before running the sample.
39+
String fileName = "resources/commercial_mono.wav";
40+
transcribeDiarization(fileName);
41+
}
42+
43+
// Transcribe the given audio file using speaker diarization.
44+
static void transcribeDiarization(String fileName) throws IOException {
45+
Path path = Paths.get(fileName);
46+
byte[] content = Files.readAllBytes(path);
47+
48+
// Initialize client that will be used to send requests. This client only needs to be created
49+
// once, and can be reused for multiple requests. After completing all of your requests, call
50+
// the "close" method on the client to safely clean up any remaining background resources.
51+
try (SpeechClient client = SpeechClient.create()) {
52+
// Get the contents of the local audio file
53+
RecognitionAudio recognitionAudio =
54+
RecognitionAudio.newBuilder().setContent(ByteString.copyFrom(content)).build();
55+
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
56+
.setEnableSpeakerDiarization(true)
57+
.setMinSpeakerCount(2)
58+
.setMaxSpeakerCount(2)
59+
.build();
60+
// Configure request to enable Speaker diarization
61+
RecognitionConfig config = RecognitionConfig.newBuilder()
62+
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
63+
.setLanguageCode("en-US")
64+
.setSampleRateHertz(8000)
65+
.setDiarizationConfig(speakerDiarizationConfig)
66+
.build();
67+
68+
// Perform the transcription request
69+
RecognizeResponse recognizeResponse = client.recognize(config, recognitionAudio);
70+
71+
// Speaker Tags are only included in the last result object, which has only one alternative.
72+
SpeechRecognitionAlternative alternative =
73+
recognizeResponse.getResults(
74+
recognizeResponse.getResultsCount() - 1).getAlternatives(0);
75+
// The alternative is made up of WordInfo objects that contain the speaker_tag.
76+
WordInfo wordInfo = alternative.getWords(0);
77+
int currentSpeakerTag = wordInfo.getSpeakerTag();
78+
// For each word, get all the words associated with one speaker, once the speaker changes,
79+
// add a new line with the new speaker and their spoken words.
80+
StringBuilder speakerWords = new StringBuilder(
81+
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
82+
for (int i = 1; i < alternative.getWordsCount(); i++) {
83+
wordInfo = alternative.getWords(i);
84+
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
85+
speakerWords.append(" ");
86+
speakerWords.append(wordInfo.getWord());
87+
} else {
88+
speakerWords.append(
89+
String.format("\nSpeaker %d: %s",
90+
wordInfo.getSpeakerTag(),
91+
wordInfo.getWord()));
92+
currentSpeakerTag = wordInfo.getSpeakerTag();
93+
}
94+
}
95+
System.out.println(speakerWords.toString());
96+
}
97+
}
98+
}
99+
// [END speech_transcribe_diarization]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
/*
2+
* Copyright 2019 Google LLC
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
// [START speech_transcribe_diarization_gcs]
20+
21+
import com.google.api.gax.longrunning.OperationFuture;
22+
import com.google.cloud.speech.v1.LongRunningRecognizeMetadata;
23+
import com.google.cloud.speech.v1.LongRunningRecognizeResponse;
24+
import com.google.cloud.speech.v1.RecognitionAudio;
25+
import com.google.cloud.speech.v1.RecognitionConfig;
26+
import com.google.cloud.speech.v1.SpeakerDiarizationConfig;
27+
import com.google.cloud.speech.v1.SpeechClient;
28+
import com.google.cloud.speech.v1.SpeechRecognitionAlternative;
29+
import com.google.cloud.speech.v1.WordInfo;
30+
31+
import java.io.IOException;
32+
import java.util.concurrent.ExecutionException;
33+
34+
public class TranscribeDiarizationGcs {
35+
36+
static void transcribeDiarizationGcs() throws IOException, ExecutionException,
37+
InterruptedException {
38+
// TODO(developer): Replace these variables before running the sample.
39+
String gcsUri = "gs://cloud-samples-data/speech/commercial_mono.wav";
40+
transcribeDiarizationGcs(gcsUri);
41+
}
42+
43+
// Transcribe the give gcs file using speaker diarization
44+
public static void transcribeDiarizationGcs(String gcsUri) throws IOException,
45+
ExecutionException, InterruptedException {
46+
// Initialize client that will be used to send requests. This client only needs to be created
47+
// once, and can be reused for multiple requests. After completing all of your requests, call
48+
// the "close" method on the client to safely clean up any remaining background resources.
49+
try (SpeechClient speechClient = SpeechClient.create()) {
50+
SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
51+
.setEnableSpeakerDiarization(true)
52+
.setMinSpeakerCount(2)
53+
.setMaxSpeakerCount(2)
54+
.build();
55+
// Configure request to enable Speaker diarization
56+
RecognitionConfig config =
57+
RecognitionConfig.newBuilder()
58+
.setEncoding(RecognitionConfig.AudioEncoding.LINEAR16)
59+
.setLanguageCode("en-US")
60+
.setSampleRateHertz(8000)
61+
.setDiarizationConfig(speakerDiarizationConfig)
62+
.build();
63+
// Set the remote path for the audio file
64+
RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();
65+
66+
// Use non-blocking call for getting file transcription
67+
OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> future =
68+
speechClient.longRunningRecognizeAsync(config, audio);
69+
System.out.println("Waiting for response...");
70+
71+
// Speaker Tags are only included in the last result object, which has only one alternative.
72+
LongRunningRecognizeResponse response = future.get();
73+
SpeechRecognitionAlternative alternative =
74+
response.getResults(
75+
response.getResultsCount() - 1)
76+
.getAlternatives(0);
77+
// The alternative is made up of WordInfo objects that contain the speaker_tag.
78+
WordInfo wordInfo = alternative.getWords(0);
79+
int currentSpeakerTag = wordInfo.getSpeakerTag();
80+
// For each word, get all the words associated with one speaker, once the speaker changes,
81+
// add a new line with the new speaker and their spoken words.
82+
StringBuilder speakerWords = new StringBuilder(
83+
String.format("Speaker %d: %s", wordInfo.getSpeakerTag(), wordInfo.getWord()));
84+
for (int i = 1; i < alternative.getWordsCount(); i++) {
85+
wordInfo = alternative.getWords(i);
86+
if (currentSpeakerTag == wordInfo.getSpeakerTag()) {
87+
speakerWords.append(" ");
88+
speakerWords.append(wordInfo.getWord());
89+
} else {
90+
speakerWords.append(
91+
String.format("\nSpeaker %d: %s",
92+
wordInfo.getSpeakerTag(),
93+
wordInfo.getWord()));
94+
currentSpeakerTag = wordInfo.getSpeakerTag();
95+
}
96+
}
97+
System.out.println(speakerWords.toString());
98+
}
99+
}
100+
}
101+
// [END speech_transcribe_diarization_gcs]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
/*
2+
* Copyright 2018 Google Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.example.speech;
18+
19+
import static com.google.common.truth.Truth.assertThat;
20+
import static junit.framework.TestCase.assertNotNull;
21+
22+
import java.io.ByteArrayOutputStream;
23+
import java.io.IOException;
24+
import java.io.PrintStream;
25+
import java.util.concurrent.ExecutionException;
26+
27+
import org.junit.After;
28+
import org.junit.Before;
29+
import org.junit.BeforeClass;
30+
import org.junit.Test;
31+
import org.junit.runner.RunWith;
32+
import org.junit.runners.JUnit4;
33+
34+
// Tests for speech Transcribe Diarization samples.
35+
@RunWith(JUnit4.class)
36+
@SuppressWarnings("checkstyle:abbreviationaswordinname")
37+
public class TranscribeDiarizationIT {
38+
private ByteArrayOutputStream bout;
39+
private PrintStream out;
40+
41+
// The path to the audio file to transcribe
42+
private String recognitionAudioFile = "./resources/commercial_mono.wav";
43+
44+
private static void requireEnvVar(String varName) {
45+
assertNotNull(
46+
System.getenv(varName),
47+
"Environment variable '%s' is required to perform these tests.".format(varName)
48+
);
49+
}
50+
51+
@BeforeClass
52+
public static void checkRequirements() {
53+
requireEnvVar("GOOGLE_APPLICATION_CREDENTIALS");
54+
}
55+
56+
@Before
57+
public void setUp() {
58+
bout = new ByteArrayOutputStream();
59+
out = new PrintStream(bout);
60+
System.setOut(out);
61+
}
62+
63+
@After
64+
public void tearDown() {
65+
System.setOut(null);
66+
}
67+
68+
@Test
69+
public void testDiarization() throws IOException {
70+
TranscribeDiarization.transcribeDiarization(recognitionAudioFile);
71+
String got = bout.toString();
72+
assertThat(got).contains("Speaker 1: I'm here");
73+
assertThat(got).contains("Speaker 2: Hi, I'd like to buy a");
74+
}
75+
76+
@Test
77+
public void testDiarizationGcs() throws IOException, ExecutionException, InterruptedException {
78+
TranscribeDiarizationGcs.transcribeDiarizationGcs(
79+
"gs://cloud-samples-data/speech/commercial_mono.wav");
80+
String got = bout.toString();
81+
assertThat(got).contains("Speaker 1: I'm here");
82+
assertThat(got).contains("Speaker 2: Hi, I'd like to buy a");
83+
}
84+
}

0 commit comments

Comments
 (0)