Skip to content

Commit 0873c66

Browse files
docs(samples): refactors the export-to-gcs sample (#737)
* docs(samples): refactors the export-to-gcs sample * docs(samples): refactors the export-to-gcs sample * docs(samples): fixed lint * docs(samples): updated test file * docs(samples): udded imports * docs(samples): udded dependency to global pom * docs(samples): udded dependency to global pom * docs(samples): udded dependency to global pom * docs(samples): udated pom * docs(samples): fixed null error * docs(samples): rafactored test
1 parent 8c6d16a commit 0873c66

File tree

4 files changed

+76
-9
lines changed

4 files changed

+76
-9
lines changed

speech/.bash_profile

Whitespace-only changes.

speech/snippets/pom.xml

+5
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@
3838
</dependencyManagement>
3939

4040
<dependencies>
41+
<dependency>
42+
<groupId>org.json</groupId>
43+
<artifactId>json</artifactId>
44+
<version>20210307</version>
45+
</dependency>
4146
<dependency>
4247
<groupId>com.google.cloud</groupId>
4348
<artifactId>google-cloud-speech</artifactId>

speech/snippets/src/main/java/com/example/speech/ExportToStorageBeta.java

+51-4
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,34 @@
2727
import com.google.cloud.speech.v1p1beta1.RecognitionConfig.AudioEncoding;
2828
import com.google.cloud.speech.v1p1beta1.SpeechClient;
2929
import com.google.cloud.speech.v1p1beta1.TranscriptOutputConfig;
30+
import com.google.cloud.storage.Blob;
31+
import com.google.cloud.storage.BlobId;
32+
import com.google.cloud.storage.Storage;
33+
import com.google.cloud.storage.StorageOptions;
34+
import com.google.protobuf.util.JsonFormat;
3035
import java.io.IOException;
3136
import java.util.concurrent.ExecutionException;
3237
import java.util.stream.Collectors;
38+
import org.json.JSONObject;
3339

3440
public class ExportToStorageBeta {
3541

3642
public static void main(String[] args) throws Exception {
3743
String inputUri = "gs://YOUR_BUCKET_ID/path/to/your/audio_file.wav";
3844
String outputStorageUri = "gs://YOUR_BUCKET_ID/output_dir_prefix/";
45+
String objectName = "YOUR_OBJECT_NAME";
46+
String bucketName = "YOUR_BUCKET_ID";
3947
String encoding = "LINEAR16"; // encoding of the audio
4048
int sampleRateHertz = 8000;
4149
String languageCode = "en-US"; // language code BCP-47_LANGUAGE_CODE_OF_AUDIO
42-
exportToStorage(inputUri, outputStorageUri, encoding, sampleRateHertz, languageCode);
50+
exportToStorage(
51+
inputUri,
52+
outputStorageUri,
53+
encoding,
54+
sampleRateHertz,
55+
languageCode,
56+
bucketName,
57+
objectName);
4358
}
4459

4560
// Exports the recognized output to specified GCS destination.
@@ -48,7 +63,9 @@ public static void exportToStorage(
4863
String outputStorageUri,
4964
String encoding,
5065
int sampleRateHertz,
51-
String languageCode)
66+
String languageCode,
67+
String bucketName,
68+
String objectName)
5269
throws IOException, ExecutionException, InterruptedException {
5370
// Initialize client that will be used to send requests. This client only needs to be created
5471
// once, and can be reused for multiple requests. After completing all of your requests, call
@@ -58,6 +75,9 @@ public static void exportToStorage(
5875

5976
AudioEncoding audioEncoding = AudioEncoding.valueOf(encoding);
6077

78+
// Instantiates a client
79+
Storage storage = StorageOptions.getDefaultInstance().getService();
80+
6181
// Pass in the URI of the Cloud Storage bucket to hold the transcription
6282
TranscriptOutputConfig outputConfig =
6383
TranscriptOutputConfig.newBuilder().setGcsUri(outputStorageUri).build();
@@ -80,12 +100,39 @@ public static void exportToStorage(
80100
speechClient.longRunningRecognizeAsync(request);
81101

82102
System.out.println("Waiting for operation to complete...");
83-
LongRunningRecognizeResponse response = future.get();
103+
future.get();
104+
105+
// Get blob given bucket and object name
106+
Blob blob = storage.get(BlobId.of(bucketName, objectName));
107+
108+
// Extract byte contents from blob
109+
byte[] bytes = blob.getContent();
110+
111+
// Get decoded representation
112+
String decoded = new String(bytes, "UTF-8");
113+
114+
// Create json object
115+
JSONObject jsonObject = new JSONObject(decoded);
116+
117+
// Get json string
118+
String json = jsonObject.toString();
119+
120+
// Specefy the proto type message
121+
LongRunningRecognizeResponse.Builder builder = LongRunningRecognizeResponse.newBuilder();
122+
123+
// Construct a parser
124+
JsonFormat.Parser parser = JsonFormat.parser().ignoringUnknownFields();
125+
126+
// Parses from JSON into a protobuf message.
127+
parser.merge(json, builder);
128+
129+
// Get the converted values
130+
LongRunningRecognizeResponse storageResponse = builder.build();
84131

85132
System.out.println("Results saved to specified output Cloud Storage bucket.");
86133

87134
String output =
88-
response.getResultsList().stream()
135+
storageResponse.getResultsList().stream()
89136
.map(result -> String.valueOf(result.getAlternatives(0).getTranscript()))
90137
.collect(Collectors.joining("\n"));
91138
System.out.printf("Transcription: %s", output);

speech/snippets/src/test/java/com/example/speech/ExportToStorageBetaTest.java

+20-5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import com.google.api.gax.paging.Page;
2222
import com.google.cloud.storage.Blob;
23+
import com.google.cloud.storage.BucketInfo;
2324
import com.google.cloud.storage.Storage;
2425
import com.google.cloud.storage.StorageOptions;
2526
import java.io.ByteArrayOutputStream;
@@ -34,25 +35,32 @@ public class ExportToStorageBetaTest {
3435
private static final String PROJECT_ID = System.getenv("GOOGLE_CLOUD_PROJECT");
3536
private static final String AUDIO_STORAGE_URI =
3637
"gs://cloud-samples-data/speech/commercial_mono.wav";
37-
private static final String PREFIX = "EXPORT_TEST_OUTPUTS";
38+
private static final String BUCKET_PREFIX = "EXPORT_TRANSCRIPT_OUTPUT";
39+
private static final String UNIQUE_ID =
40+
UUID.randomUUID().toString().replace("-", "").substring(0, 8);
41+
private static String BUCKET_NAME = String.format("speech-%s", UNIQUE_ID);
3842
private static final String OUTPUT_STORAGE_URI =
39-
String.format("gs://%s/%s/%s/", PROJECT_ID, PREFIX, UUID.randomUUID());
43+
String.format("gs://%s/%s/", BUCKET_NAME, BUCKET_PREFIX);
4044
private static final String ENCODING = "LINEAR16";
4145
private static final String LANGUAGE_CODE = "en-US";
46+
private static Storage storage = StorageOptions.getDefaultInstance().getService();
4247

4348
private static final int SAMPLE_RATE_HERTZ = 8000;
4449

4550
private ByteArrayOutputStream bout;
4651
private PrintStream originalPrintStream;
4752
private PrintStream out;
4853

54+
private static void createBucket() {
55+
storage.create(BucketInfo.of(BUCKET_NAME));
56+
}
57+
4958
private static void cleanUpBucket() {
50-
Storage storage = StorageOptions.getDefaultInstance().getService();
5159
Page<Blob> blobs =
5260
storage.list(
5361
PROJECT_ID,
5462
Storage.BlobListOption.currentDirectory(),
55-
Storage.BlobListOption.prefix(PREFIX));
63+
Storage.BlobListOption.prefix(BUCKET_PREFIX));
5664

5765
deleteDirectory(storage, blobs);
5866
}
@@ -73,6 +81,7 @@ private static void deleteDirectory(Storage storage, Page<Blob> blobs) {
7381

7482
@Before
7583
public void setUp() {
84+
createBucket();
7685
bout = new ByteArrayOutputStream();
7786
out = new PrintStream(bout);
7887
originalPrintStream = System.out;
@@ -89,7 +98,13 @@ public void tearDown() {
8998
@Test
9099
public void testExportToStorageBeta() throws Exception {
91100
ExportToStorageBeta.exportToStorage(
92-
AUDIO_STORAGE_URI, OUTPUT_STORAGE_URI, ENCODING, SAMPLE_RATE_HERTZ, LANGUAGE_CODE);
101+
AUDIO_STORAGE_URI,
102+
OUTPUT_STORAGE_URI,
103+
ENCODING,
104+
SAMPLE_RATE_HERTZ,
105+
LANGUAGE_CODE,
106+
BUCKET_NAME,
107+
BUCKET_PREFIX);
93108
String got = bout.toString();
94109
assertThat(got).contains("Transcription:");
95110
}

0 commit comments

Comments
 (0)