Skip to content

Commit e171e54

Browse files
nirupa-kumarShabirmean
authored andcommitted
samples: Automl (#1158)
* Test push * Vision AutoML * Vision AutoML updates + Translate AutoML * Translate README fixes * Fixing Kokoro failure issue * Language AutoML * Vision AutoML * Translate AutoML files added * Triggering tests * Triggering tests
1 parent 33f6536 commit e171e54

File tree

6 files changed

+1054
-0
lines changed

6 files changed

+1054
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,314 @@
1+
/*
2+
* Copyright 2018 Google Inc.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package com.google.cloud.translate.automl;
18+
19+
// Imports the Google Cloud client library
20+
import com.google.cloud.automl.v1beta1.AutoMlClient;
21+
import com.google.cloud.automl.v1beta1.Dataset;
22+
import com.google.cloud.automl.v1beta1.DatasetName;
23+
import com.google.cloud.automl.v1beta1.GcsSource;
24+
import com.google.cloud.automl.v1beta1.GcsSource.Builder;
25+
import com.google.cloud.automl.v1beta1.InputConfig;
26+
import com.google.cloud.automl.v1beta1.ListDatasetsRequest;
27+
import com.google.cloud.automl.v1beta1.LocationName;
28+
import com.google.cloud.automl.v1beta1.TranslationDatasetMetadata;
29+
import com.google.protobuf.Empty;
30+
31+
import java.io.IOException;
32+
import java.io.PrintStream;
33+
34+
import net.sourceforge.argparse4j.ArgumentParsers;
35+
import net.sourceforge.argparse4j.inf.ArgumentParser;
36+
import net.sourceforge.argparse4j.inf.ArgumentParserException;
37+
import net.sourceforge.argparse4j.inf.Namespace;
38+
import net.sourceforge.argparse4j.inf.Subparser;
39+
import net.sourceforge.argparse4j.inf.Subparsers;
40+
41+
/**
42+
* Google Cloud AutoML Translate API sample application. Example usage: mvn package exec:java
43+
* -Dexec.mainClass ='com.google.cloud.translate.samples.DatasetAPI' -Dexec.args='create_dataset
44+
* test_dataset'
45+
*/
46+
public class DatasetApi {
47+
48+
// [START automl_translate_create_dataset]
49+
/**
50+
* Demonstrates using the AutoML client to create a dataset
51+
*
52+
* @param projectId the Google Cloud Project ID.
53+
* @param computeRegion the Region name. (e.g., "us-central1").
54+
* @param datasetName the name of the dataset to be created.
55+
* @param source the Source language
56+
* @param target the Target language
57+
* @throws IOException on Input/Output errors.
58+
*/
59+
public static void createDataset(
60+
String projectId, String computeRegion, String datasetName, String source, String target)
61+
throws IOException {
62+
// Instantiates a client
63+
AutoMlClient client = AutoMlClient.create();
64+
65+
// A resource that represents Google Cloud Platform location.
66+
LocationName projectLocation = LocationName.of(projectId, computeRegion);
67+
68+
// Specify the source and target language.
69+
TranslationDatasetMetadata translationDatasetMetadata =
70+
TranslationDatasetMetadata.newBuilder()
71+
.setSourceLanguageCode(source)
72+
.setTargetLanguageCode(target)
73+
.build();
74+
75+
// Set dataset name and dataset metadata.
76+
Dataset myDataset =
77+
Dataset.newBuilder()
78+
.setDisplayName(datasetName)
79+
.setTranslationDatasetMetadata(translationDatasetMetadata)
80+
.build();
81+
82+
// Create a dataset with the dataset metadata in the region.
83+
Dataset dataset = client.createDataset(projectLocation, myDataset);
84+
85+
// Display the dataset information.
86+
System.out.println(String.format("Dataset name: %s", dataset.getName()));
87+
System.out.println(
88+
String.format(
89+
"Dataset id: %s",
90+
dataset.getName().split("/")[dataset.getName().split("/").length - 1]));
91+
System.out.println(String.format("Dataset display name: %s", dataset.getDisplayName()));
92+
System.out.println("Translation dataset Metadata:");
93+
System.out.println(
94+
String.format(
95+
"\tSource language code: %s",
96+
dataset.getTranslationDatasetMetadata().getSourceLanguageCode()));
97+
System.out.println(
98+
String.format(
99+
"\tTarget language code: %s",
100+
dataset.getTranslationDatasetMetadata().getTargetLanguageCode()));
101+
System.out.println("Dataset create time:");
102+
System.out.println(String.format("\tseconds: %s", dataset.getCreateTime().getSeconds()));
103+
System.out.println(String.format("\tnanos: %s", dataset.getCreateTime().getNanos()));
104+
}
105+
// [END automl_translation_create_dataset]
106+
107+
// [START automl_translation_list_datasets]
108+
/**
109+
* Demonstrates using the AutoML client to list all datasets.
110+
*
111+
* @param projectId the Google Cloud Project ID.
112+
* @param computeRegion the Region name. (e.g., "us-central1").
113+
* @param filter the Filter expression.
114+
* @throws Exception on AutoML Client errors
115+
*/
116+
public static void listDatasets(String projectId, String computeRegion, String filter)
117+
throws IOException {
118+
// Instantiates a client
119+
AutoMlClient client = AutoMlClient.create();
120+
121+
// A resource that represents Google Cloud Platform location.
122+
LocationName projectLocation = LocationName.of(projectId, computeRegion);
123+
124+
ListDatasetsRequest request =
125+
ListDatasetsRequest.newBuilder()
126+
.setParent(projectLocation.toString())
127+
.setFilter(filter)
128+
.build();
129+
130+
// List all the datasets available in the region by applying filter.
131+
System.out.println("List of datasets:");
132+
for (Dataset dataset : client.listDatasets(request).iterateAll()) {
133+
// Display the dataset information
134+
System.out.println(String.format("\nDataset name: %s", dataset.getName()));
135+
System.out.println(
136+
String.format(
137+
"Dataset id: %s",
138+
dataset.getName().split("/")[dataset.getName().split("/").length - 1]));
139+
System.out.println(String.format("Dataset display name: %s", dataset.getDisplayName()));
140+
System.out.println("Translation dataset metadata:");
141+
System.out.println(
142+
String.format(
143+
"\tSource language code: %s",
144+
dataset.getTranslationDatasetMetadata().getSourceLanguageCode()));
145+
System.out.println(
146+
String.format(
147+
"\tTarget language code: %s",
148+
dataset.getTranslationDatasetMetadata().getTargetLanguageCode()));
149+
System.out.println("Dataset create time:");
150+
System.out.println(String.format("\tseconds: %s", dataset.getCreateTime().getSeconds()));
151+
System.out.println(String.format("\tnanos: %s", dataset.getCreateTime().getNanos()));
152+
}
153+
}
154+
// [END automl_translation_list_datasets]
155+
156+
// [START automl_translation_get_dataset]
157+
/**
158+
* Demonstrates using the AutoML client to get a dataset by ID.
159+
*
160+
* @param projectId the Google Cloud Project ID.
161+
* @param computeRegion the Region name. (e.g., "us-central1").
162+
* @param datasetId the Id of the dataset.
163+
* @throws Exception on AutoML Client errors
164+
*/
165+
public static void getDataset(String projectId, String computeRegion, String datasetId)
166+
throws Exception {
167+
// Instantiates a client
168+
AutoMlClient client = AutoMlClient.create();
169+
170+
// Get the complete path of the dataset.
171+
DatasetName datasetFullId = DatasetName.of(projectId, computeRegion, datasetId);
172+
173+
// Get all the information about a given dataset.
174+
Dataset dataset = client.getDataset(datasetFullId);
175+
176+
// Display the dataset information
177+
System.out.println(String.format("Dataset name: %s", dataset.getName()));
178+
System.out.println(
179+
String.format(
180+
"Dataset id: %s",
181+
dataset.getName().split("/")[dataset.getName().split("/").length - 1]));
182+
System.out.println(String.format("Dataset display name: %s", dataset.getDisplayName()));
183+
System.out.println("Translation dataset metadata:");
184+
System.out.println(
185+
String.format(
186+
"\tSource language code: %s",
187+
dataset.getTranslationDatasetMetadata().getSourceLanguageCode()));
188+
System.out.println(
189+
String.format(
190+
"\tTarget language code: %s",
191+
dataset.getTranslationDatasetMetadata().getTargetLanguageCode()));
192+
System.out.println("Dataset create time:");
193+
System.out.println(String.format("\tseconds: %s", dataset.getCreateTime().getSeconds()));
194+
System.out.println(String.format("\tnanos: %s", dataset.getCreateTime().getNanos()));
195+
}
196+
// [END automl_translation_get_dataset]
197+
198+
// [START automl_translation_import_data]
199+
/**
200+
* Import sentence pairs to the dataset.
201+
*
202+
* @param projectId the Google Cloud Project ID.
203+
* @param computeRegion the Region name. (e.g., "us-central1").
204+
* @param datasetId the Id of the dataset.
205+
* @param path the remote Path of the training data csv file.
206+
* @throws Exception on AutoML Client errors
207+
*/
208+
public static void importData(
209+
String projectId, String computeRegion, String datasetId, String path) throws Exception {
210+
// Instantiates a client
211+
AutoMlClient client = AutoMlClient.create();
212+
213+
// Get the complete path of the dataset.
214+
DatasetName datasetFullId = DatasetName.of(projectId, computeRegion, datasetId);
215+
216+
Builder gcsSource = GcsSource.newBuilder();
217+
218+
// Get multiple Google Cloud Storage URIs to import data from
219+
String[] inputUris = path.split(",");
220+
for (String inputUri : inputUris) {
221+
gcsSource.addInputUris(inputUri);
222+
}
223+
224+
// Import data from the input URI
225+
InputConfig inputConfig = InputConfig.newBuilder().setGcsSource(gcsSource).build();
226+
System.out.println("Processing import...");
227+
228+
Empty response = client.importDataAsync(datasetFullId, inputConfig).get();
229+
System.out.println(String.format("Dataset imported. %s", response));
230+
}
231+
// [END automl_translation_import_data]
232+
233+
// [START automl_translation_delete_dataset]
234+
/**
235+
* Delete a dataset.
236+
*
237+
* @param projectId the Google Cloud Project ID.
238+
* @param computeRegion the Region name. (e.g., "us-central1").
239+
* @param datasetId the Id of the dataset.
240+
* @throws Exception on AutoML Client errors
241+
*/
242+
public static void deleteDataset(String projectId, String computeRegion, String datasetId)
243+
throws Exception {
244+
// Instantiates a client
245+
AutoMlClient client = AutoMlClient.create();
246+
247+
// Get the full path of the dataset.
248+
DatasetName datasetFullId = DatasetName.of(projectId, computeRegion, datasetId);
249+
250+
// Delete a dataset.
251+
Empty response = client.deleteDatasetAsync(datasetFullId).get();
252+
253+
System.out.println(String.format("Dataset deleted. %s", response));
254+
}
255+
// [END automl_translation_delete_dataset]
256+
257+
public static void main(String[] args) throws Exception {
258+
DatasetApi datasetApi = new DatasetApi();
259+
datasetApi.argsHelper(args, System.out);
260+
}
261+
262+
public static void argsHelper(String[] args, PrintStream out) throws Exception {
263+
ArgumentParser parser = ArgumentParsers.newFor("").build();
264+
Subparsers subparsers = parser.addSubparsers().dest("command");
265+
266+
Subparser createDatasetParser = subparsers.addParser("create_dataset");
267+
createDatasetParser.addArgument("datasetName");
268+
createDatasetParser.addArgument("source");
269+
createDatasetParser.addArgument("target");
270+
271+
Subparser listDatasetParser = subparsers.addParser("list_datasets");
272+
listDatasetParser.addArgument("filter").nargs("?").setDefault("translation_dataset_metadata:*");
273+
274+
Subparser getDatasetParser = subparsers.addParser("get_dataset");
275+
getDatasetParser.addArgument("datasetId");
276+
277+
Subparser importDataParser = subparsers.addParser("import_data");
278+
importDataParser.addArgument("datasetId");
279+
importDataParser.addArgument("path");
280+
281+
Subparser deleteDatasetParser = subparsers.addParser("delete_dataset");
282+
deleteDatasetParser.addArgument("datasetId");
283+
284+
String projectId = System.getenv("PROJECT_ID");
285+
String computeRegion = System.getenv("REGION_NAME");
286+
287+
Namespace ns = null;
288+
try {
289+
ns = parser.parseArgs(args);
290+
if (ns.get("command").equals("create_dataset")) {
291+
createDataset(
292+
projectId,
293+
computeRegion,
294+
ns.getString("datasetName"),
295+
ns.getString("source"),
296+
ns.getString("target"));
297+
}
298+
if (ns.get("command").equals("list_datasets")) {
299+
listDatasets(projectId, computeRegion, ns.getString("filter"));
300+
}
301+
if (ns.get("command").equals("get_dataset")) {
302+
getDataset(projectId, computeRegion, ns.getString("datasetId"));
303+
}
304+
if (ns.get("command").equals("import_data")) {
305+
importData(projectId, computeRegion, ns.getString("datasetId"), ns.getString("path"));
306+
}
307+
if (ns.get("command").equals("delete_dataset")) {
308+
deleteDataset(projectId, computeRegion, ns.getString("datasetId"));
309+
}
310+
} catch (ArgumentParserException e) {
311+
parser.handleError(e);
312+
}
313+
}
314+
}

0 commit comments

Comments
 (0)