diff --git a/datalabeling/README.rst b/datalabeling/README.rst new file mode 100644 index 00000000000..bf5949b8cb7 --- /dev/null +++ b/datalabeling/README.rst @@ -0,0 +1,78 @@ +.. This file is automatically generated. Do not edit this file directly. + +Google Cloud Data Labeling Service Python Samples +=============================================================================== + +.. image:: https://gstatic.com/cloudssh/images/open-btn.png + :target: https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=datalabeling/README.rst + + +This directory contains samples for Google Cloud Data Labeling Service. `Google Cloud Data Labeling Service`_ allows developers to request having human labelers label a collection of data that you plan to use to train a custom machine learning model. + + + + +.. _Google Cloud Data Labeling Service: https://cloud.google.com/data-labeling/docs/ + +Setup +------------------------------------------------------------------------------- + + +Authentication +++++++++++++++ + +This sample requires you to have authentication setup. Refer to the +`Authentication Getting Started Guide`_ for instructions on setting up +credentials for applications. + +.. _Authentication Getting Started Guide: + https://cloud.google.com/docs/authentication/getting-started + +Install Dependencies +++++++++++++++++++++ + +#. Clone python-docs-samples and change directory to the sample directory you want to use. + + .. code-block:: bash + + $ git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git + +#. Install `pip`_ and `virtualenv`_ if you do not already have them. You may want to refer to the `Python Development Environment Setup Guide`_ for Google Cloud Platform for instructions. + + .. _Python Development Environment Setup Guide: + https://cloud.google.com/python/setup + +#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. + + .. code-block:: bash + + $ virtualenv env + $ source env/bin/activate + +#. Install the dependencies needed to run the samples. + + .. code-block:: bash + + $ pip install -r requirements.txt + +.. _pip: https://pip.pypa.io/ +.. _virtualenv: https://virtualenv.pypa.io/ + + + +The client library +------------------------------------------------------------------------------- + +This sample uses the `Google Cloud Client Library for Python`_. +You can read the documentation for more details on API usage and use GitHub +to `browse the source`_ and `report issues`_. + +.. _Google Cloud Client Library for Python: + https://googlecloudplatform.github.io/google-cloud-python/ +.. _browse the source: + https://github.com/GoogleCloudPlatform/google-cloud-python +.. _report issues: + https://github.com/GoogleCloudPlatform/google-cloud-python/issues + + +.. _Google Cloud SDK: https://cloud.google.com/sdk/ \ No newline at end of file diff --git a/datalabeling/README.rst.in b/datalabeling/README.rst.in new file mode 100644 index 00000000000..c87a1ff89b4 --- /dev/null +++ b/datalabeling/README.rst.in @@ -0,0 +1,18 @@ +# This file is used to generate README.rst + +product: + name: Google Cloud Data Labeling Service + short_name: Cloud Data Labeling + url: https://cloud.google.com/data-labeling/docs/ + description: > + `Google Cloud Data Labeling Service`_ allows developers to request having + human labelers label a collection of data that you plan to use to train a + custom machine learning model. + +setup: +- auth +- install_deps + +cloud_client_library: true + +folder: datalabeling \ No newline at end of file diff --git a/datalabeling/create_annotation_spec_set.py b/datalabeling/create_annotation_spec_set.py new file mode 100644 index 00000000000..29eab029e53 --- /dev/null +++ b/datalabeling/create_annotation_spec_set.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_create_annotation_spec_set_beta] +def create_annotation_spec_set(project_id): + """Creates a data labeling annotation spec set for the given + Google Cloud project. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + project_path = client.project_path(project_id) + + annotation_spec_1 = datalabeling.types.AnnotationSpec( + display_name='label_1', + description='label_description_1' + ) + + annotation_spec_2 = datalabeling.types.AnnotationSpec( + display_name='label_2', + description='label_description_2' + ) + + annotation_spec_set = datalabeling.types.AnnotationSpecSet( + display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME', + description='YOUR_DESCRIPTION', + annotation_specs=[annotation_spec_1, annotation_spec_2] + ) + + response = client.create_annotation_spec_set( + project_path, annotation_spec_set) + + # The format of the resource name: + # project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id} + print('The annotation_spec_set resource name: {}'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Annotation specs:') + for annotation_spec in response.annotation_specs: + print('\tDisplay name: {}'.format(annotation_spec.display_name)) + print('\tDescription: {}\n'.format(annotation_spec.description)) + + return response +# [END datalabeling_create_annotation_spec_set_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + args = parser.parse_args() + + create_annotation_spec_set(args.project_id) diff --git a/datalabeling/create_annotation_spec_set_test.py b/datalabeling/create_annotation_spec_set_test.py new file mode 100644 index 00000000000..0214fa7967c --- /dev/null +++ b/datalabeling/create_annotation_spec_set_test.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import create_annotation_spec_set +from google.cloud import datalabeling_v1beta1 as datalabeling +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') + + +@pytest.mark.slow +def test_create_annotation_spec_set(capsys): + response = create_annotation_spec_set.create_annotation_spec_set( + PROJECT_ID) + out, _ = capsys.readouterr() + assert 'The annotation_spec_set resource name:' in out + + # Delete the created annotation spec set. + annotation_spec_set_name = response.name + client = datalabeling.DataLabelingServiceClient() + client.delete_annotation_spec_set(annotation_spec_set_name) diff --git a/datalabeling/create_instruction.py b/datalabeling/create_instruction.py new file mode 100644 index 00000000000..c2d608f402c --- /dev/null +++ b/datalabeling/create_instruction.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_create_instruction_beta] +def create_instruction(project_id, data_type, instruction_gcs_uri): + """ Creates a data labeling PDF instruction for the given Google Cloud + project. The PDF file should be uploaded to the project in + Google Cloud Storage. + """ + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + project_path = client.project_path(project_id) + + pdf_instruction = datalabeling.types.PdfInstruction( + gcs_file_uri=instruction_gcs_uri) + + instruction = datalabeling.types.Instruction( + display_name='YOUR_INSTRUCTION_DISPLAY_NAME', + description='YOUR_DESCRIPTION', + data_type=data_type, + pdf_instruction=pdf_instruction + ) + + operation = client.create_instruction(project_path, instruction) + + result = operation.result() + + # The format of the resource name: + # project_id/{project_id}/instruction/{instruction_id} + print('The instruction resource name: {}\n'.format(result.name)) + print('Display name: {}'.format(result.display_name)) + print('Description: {}'.format(result.description)) + print('Create time:') + print('\tseconds: {}'.format(result.create_time.seconds)) + print('\tnanos: {}'.format(result.create_time.nanos)) + print('Data type: {}'.format( + datalabeling.enums.DataType(result.data_type).name)) + print('Pdf instruction:') + print('\tGcs file uri: {}'.format( + result.pdf_instruction.gcs_file_uri)) + + return result +# [END datalabeling_create_instruction_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + parser.add_argument( + '--data-type', + help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.', + required=True + ) + + parser.add_argument( + '--instruction-gcs-uri', + help='The URI of Google Cloud Storage of the instruction. Required.', + required=True + ) + + args = parser.parse_args() + + create_instruction( + args.project_id, + args.data_type, + args.instruction_gcs_uri + ) diff --git a/datalabeling/create_instruction_test.py b/datalabeling/create_instruction_test.py new file mode 100644 index 00000000000..43cf90e0262 --- /dev/null +++ b/datalabeling/create_instruction_test.py @@ -0,0 +1,41 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import create_instruction +from google.cloud import datalabeling_v1beta1 as datalabeling +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') +INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling' + '/instruction/test.pdf') + + +@pytest.mark.slow +def test_create_instruction(capsys): + result = create_instruction.create_instruction( + PROJECT_ID, + 'IMAGE', + INSTRUCTION_GCS_URI + ) + out, _ = capsys.readouterr() + assert 'The instruction resource name: ' in out + + # Delete the created instruction. + instruction_name = result.name + client = datalabeling.DataLabelingServiceClient() + client.delete_instruction(instruction_name) diff --git a/datalabeling/export_data.py b/datalabeling/export_data.py new file mode 100644 index 00000000000..2487124c008 --- /dev/null +++ b/datalabeling/export_data.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_export_data_beta] +def export_data(dataset_resource_name, annotated_dataset_resource_name, + export_gcs_uri): + """Exports a dataset from the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + gcs_destination = datalabeling.types.GcsDestination( + output_uri=export_gcs_uri, mime_type='text/csv') + + output_config = datalabeling.types.OutputConfig( + gcs_destination=gcs_destination) + + response = client.export_data( + dataset_resource_name, + annotated_dataset_resource_name, + output_config + ) + + print('Dataset ID: {}\n'.format(response.result().dataset)) + print('Output config:') + print('\tGcs destination:') + print('\t\tOutput URI: {}\n'.format( + response.result().output_config.gcs_destination.output_uri)) +# [END datalabeling_export_data_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotated-dataset-resource-name', + help='Annotated Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--export-gcs-uri', + help='The export GCS URI. Required.', + required=True + ) + + args = parser.parse_args() + + export_data( + args.dataset_resource_name, + args.annotated_dataset_resource_name, + args.export_gcs_uri + ) diff --git a/datalabeling/import_data.py b/datalabeling/import_data.py new file mode 100644 index 00000000000..a529694128a --- /dev/null +++ b/datalabeling/import_data.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_import_data_beta] +def import_data(dataset_resource_name, data_type, input_gcs_uri): + """Imports data to the given Google Cloud project and dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + gcs_source = datalabeling.types.GcsSource( + input_uri=input_gcs_uri, mime_type='text/csv') + + csv_input_config = datalabeling.types.InputConfig( + data_type=data_type, gcs_source=gcs_source) + + response = client.import_data(dataset_resource_name, csv_input_config) + + result = response.result() + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('Dataset resource name: {}\n'.format(result.dataset)) + + return result +# [END datalabeling_import_data_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--data-type', + help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.', + required=True + ) + + parser.add_argument( + '--input-gcs-uri', + help='The GCS URI of the input dataset. Required.', + required=True + ) + + args = parser.parse_args() + + import_data(args.dataset_resource_name, args.data_type, args.input_gcs_uri) diff --git a/datalabeling/import_data_test.py b/datalabeling/import_data_test.py new file mode 100644 index 00000000000..6a389e94204 --- /dev/null +++ b/datalabeling/import_data_test.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import import_data +import manage_dataset +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv' + + +@pytest.fixture(scope='function') +def dataset(): + # create a temporary dataset + dataset = manage_dataset.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + manage_dataset.delete_dataset(dataset.name) + + +@pytest.mark.slow +def test_import_data(capsys, dataset): + import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) + out, _ = capsys.readouterr() + assert 'Dataset resource name: ' in out diff --git a/datalabeling/label_image.py b/datalabeling/label_image.py new file mode 100644 index 00000000000..7984540ff70 --- /dev/null +++ b/datalabeling/label_image.py @@ -0,0 +1,85 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_label_image_beta] +def label_image(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels an image dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = datalabeling.enums.LabelImageRequest.Feature.CLASSIFICATION + + config = datalabeling.types.ImageClassificationConfig( + annotation_spec_set=annotation_spec_set_resource_name, + allow_multi_label=False, + answer_aggregation_type=datalabeling.enums.StringAggregationType + .MAJORITY_VOTE + ) + + response = client.label_image( + dataset_resource_name, + basic_config, + feature, + image_classification_config=config + ) + + print('Label_image operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_image_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_image( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/datalabeling/label_image_test.py b/datalabeling/label_image_test.py new file mode 100644 index 00000000000..e6bb3a2814a --- /dev/null +++ b/datalabeling/label_image_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import create_annotation_spec_set +import create_instruction +from google.cloud import datalabeling_v1beta1 as datalabeling +import import_data +import label_image +import manage_dataset +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/image/image_dataset.csv' + + +@pytest.fixture(scope='function') +def dataset(): + # create a temporary dataset + dataset = manage_dataset.create_dataset(PROJECT_ID) + + # import some data to it + import_data.import_data(dataset.name, 'IMAGE', INPUT_GCS_URI) + + yield dataset + + # tear down + manage_dataset.delete_dataset(dataset.name) + + +@pytest.fixture(scope='function') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = create_annotation_spec_set.create_annotation_spec_set( + PROJECT_ID) + + yield response + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='function') +def instruction(): + # create a temporary instruction + instruction = create_instruction.create_instruction( + PROJECT_ID, 'IMAGE', + 'gs://cloud-samples-data/datalabeling/instruction/test.pdf') + + yield instruction + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_instruction(instruction.name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.slow +def test_label_image(capsys, annotation_spec_set, instruction, dataset): + + # Start labeling. + response = label_image.label_image( + dataset.name, + instruction.name, + annotation_spec_set.name + ) + out, _ = capsys.readouterr() + assert 'Label_image operation name: ' in out + operation_name = response.operation.name + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True + + client = datalabeling.DataLabelingServiceClient() + client.transport._operations_client.cancel_operation( + operation_name) diff --git a/datalabeling/label_text.py b/datalabeling/label_text.py new file mode 100644 index 00000000000..107bb8d257d --- /dev/null +++ b/datalabeling/label_text.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_label_text_beta] +def label_text(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels a text dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = (datalabeling.enums.LabelTextRequest. + Feature.TEXT_ENTITY_EXTRACTION) + + config = datalabeling.types.TextEntityExtractionConfig( + annotation_spec_set=annotation_spec_set_resource_name) + + response = client.label_text( + dataset_resource_name, + basic_config, + feature, + text_entity_extraction_config=config + ) + + print('Label_text operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_text_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_text( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/datalabeling/label_text_test.py b/datalabeling/label_text_test.py new file mode 100644 index 00000000000..0a7b8bb06db --- /dev/null +++ b/datalabeling/label_text_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import create_annotation_spec_set +import create_instruction +from google.cloud import datalabeling_v1beta1 as datalabeling +import import_data +import label_text +import manage_dataset +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/text/text_dataset.csv' + + +@pytest.fixture(scope='function') +def dataset(): + # create a temporary dataset + dataset = manage_dataset.create_dataset(PROJECT_ID) + + # import some data to it + import_data.import_data(dataset.name, 'TEXT', INPUT_GCS_URI) + + yield dataset + + # tear down + manage_dataset.delete_dataset(dataset.name) + + +@pytest.fixture(scope='function') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = create_annotation_spec_set.create_annotation_spec_set( + PROJECT_ID) + + yield response + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='function') +def instruction(): + # create a temporary instruction + instruction = create_instruction.create_instruction( + PROJECT_ID, 'TEXT', + 'gs://cloud-samples-data/datalabeling/instruction/test.pdf') + + yield instruction + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_instruction(instruction.name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.slow +def test_label_text(capsys, annotation_spec_set, instruction, dataset): + + # Start labeling. + response = label_text.label_text( + dataset.name, + instruction.name, + annotation_spec_set.name + ) + out, _ = capsys.readouterr() + assert 'Label_text operation name: ' in out + operation_name = response.operation.name + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True + + client = datalabeling.DataLabelingServiceClient() + client.transport._operations_client.cancel_operation( + operation_name) diff --git a/datalabeling/label_video.py b/datalabeling/label_video.py new file mode 100644 index 00000000000..45edfaf23f6 --- /dev/null +++ b/datalabeling/label_video.py @@ -0,0 +1,82 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_label_video_beta] +def label_video(dataset_resource_name, instruction_resource_name, + annotation_spec_set_resource_name): + """Labels a video dataset.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + basic_config = datalabeling.types.HumanAnnotationConfig( + instruction=instruction_resource_name, + annotated_dataset_display_name='YOUR_ANNOTATED_DATASET_DISPLAY_NAME', + label_group='YOUR_LABEL_GROUP', + replica_count=1 + ) + + feature = datalabeling.enums.LabelVideoRequest.Feature.OBJECT_TRACKING + + config = datalabeling.types.ObjectTrackingConfig( + annotation_spec_set=annotation_spec_set_resource_name + ) + + response = client.label_video( + dataset_resource_name, + basic_config, + feature, + object_tracking_config=config + ) + + print('Label_video operation name: {}'.format(response.operation.name)) + return response +# [END datalabeling_label_video_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument( + '--dataset-resource-name', + help='Dataset resource name. Required.', + required=True + ) + + parser.add_argument( + '--instruction-resource-name', + help='Instruction resource name. Required.', + required=True + ) + + parser.add_argument( + '--annotation-spec-set-resource-name', + help='Annotation spec set resource name. Required.', + required=True + ) + + args = parser.parse_args() + + label_video( + args.dataset_resource_name, + args.instruction_resource_name, + args.annotation_spec_set_resource_name + ) diff --git a/datalabeling/label_video_test.py b/datalabeling/label_video_test.py new file mode 100644 index 00000000000..c3dfca367f8 --- /dev/null +++ b/datalabeling/label_video_test.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import create_annotation_spec_set +import create_instruction +from google.cloud import datalabeling_v1beta1 as datalabeling +import import_data +import label_video +import manage_dataset +import pytest + +PROJECT_ID = os.getenv('GCLOUD_PROJECT') +INPUT_GCS_URI = 'gs://cloud-samples-data/datalabeling/videos/video_dataset.csv' + + +@pytest.fixture(scope='function') +def dataset(): + # create a temporary dataset + dataset = manage_dataset.create_dataset(PROJECT_ID) + + # import some data to it + import_data.import_data(dataset.name, 'VIDEO', INPUT_GCS_URI) + + yield dataset + + # tear down + manage_dataset.delete_dataset(dataset.name) + + +@pytest.fixture(scope='function') +def annotation_spec_set(): + # create a temporary annotation_spec_set + response = create_annotation_spec_set.create_annotation_spec_set( + PROJECT_ID) + + yield response + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_annotation_spec_set(response.name) + + +@pytest.fixture(scope='function') +def instruction(): + # create a temporary instruction + instruction = create_instruction.create_instruction( + PROJECT_ID, 'VIDEO', + 'gs://cloud-samples-data/datalabeling/instruction/test.pdf') + + yield instruction + + # tear down + client = datalabeling.DataLabelingServiceClient() + client.delete_instruction(instruction.name) + + +# Passing in dataset as the last argument in test_label_image since it needs +# to be deleted before the annotation_spec_set can be deleted. +@pytest.mark.slow +def test_label_video(capsys, annotation_spec_set, instruction, dataset): + + # Start labeling. + response = label_video.label_video( + dataset.name, + instruction.name, + annotation_spec_set.name + ) + out, _ = capsys.readouterr() + assert 'Label_video operation name: ' in out + operation_name = response.operation.name + + # Cancels the labeling operation. + response.cancel() + assert response.cancelled() is True + + client = datalabeling.DataLabelingServiceClient() + client.transport._operations_client.cancel_operation( + operation_name) diff --git a/datalabeling/manage_dataset.py b/datalabeling/manage_dataset.py new file mode 100644 index 00000000000..a13f5ad2ca0 --- /dev/null +++ b/datalabeling/manage_dataset.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python + +# Copyright 2019 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse + + +# [START datalabeling_create_dataset_beta] +def create_dataset(project_id): + """Creates a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + formatted_project_name = client.project_path(project_id) + + dataset = datalabeling.types.Dataset( + display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME', + description='YOUR_DESCRIPTION' + ) + + response = client.create_dataset(formatted_project_name, dataset) + + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('The dataset resource name: {}\n'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Create time:') + print('\tseconds: {}'.format(response.create_time.seconds)) + print('\tnanos: {}'.format(response.create_time.nanos)) + + return response +# [END datalabeling_create_dataset_beta] + + +# [START datalabeling_list_datasets_beta] +def list_datasets(project_id): + """Lists datasets for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + formatted_project_name = client.project_path(project_id) + + response = client.list_datasets(formatted_project_name) + for element in response: + # The format of resource name: + # project_id/{project_id}/datasets/{dataset_id} + print('The dataset resource name: {}\n'.format(element.name)) + print('Display name: {}'.format(element.display_name)) + print('Description: {}'.format(element.description)) + print('Create time:') + print('\tseconds: {}'.format(element.create_time.seconds)) + print('\tnanos: {}'.format(element.create_time.nanos)) +# [END datalabeling_list_datasets_beta] + + +# [START datalabeling_get_dataset_beta] +def get_dataset(dataset_resource_name): + """Gets a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + response = client.get_dataset(dataset_resource_name) + + print('The dataset resource name: {}\n'.format(response.name)) + print('Display name: {}'.format(response.display_name)) + print('Description: {}'.format(response.description)) + print('Create time:') + print('\tseconds: {}'.format(response.create_time.seconds)) + print('\tnanos: {}'.format(response.create_time.nanos)) +# [END datalabeling_get_dataset_beta] + + +# [START datalabeling_delete_dataset_beta] +def delete_dataset(dataset_resource_name): + """Deletes a dataset for the given Google Cloud project.""" + from google.cloud import datalabeling_v1beta1 as datalabeling + client = datalabeling.DataLabelingServiceClient() + + response = client.delete_dataset(dataset_resource_name) + + print('Dataset deleted. {}\n'.format(response)) +# [END datalabeling_delete_dataset_beta] + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + description=__doc__, + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + subparsers = parser.add_subparsers(dest='command') + + create_parser = subparsers.add_parser( + 'create', help='Create a new dataset.') + create_parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + list_parser = subparsers.add_parser('list', help='List all datasets.') + list_parser.add_argument( + '--project-id', + help='Project ID. Required.', + required=True + ) + + get_parser = subparsers.add_parser( + 'get', help='Get a dataset by the dataset resource name.') + get_parser.add_argument( + '--dataset-resource-name', + help='The dataset resource name. Used in the get or delete operation.', + required=True + ) + + delete_parser = subparsers.add_parser( + 'delete', help='Delete a dataset by the dataset resource name.') + delete_parser.add_argument( + '--dataset-resource-name', + help='The dataset resource name. Used in the get or delete operation.', + required=True + ) + + args = parser.parse_args() + + if args.command == 'create': + create_dataset(args.project_id) + elif args.command == 'list': + list_datasets(args.project_id) + elif args.command == 'get': + get_dataset(args.dataset_resource_name) + elif args.command == 'delete': + delete_dataset(args.dataset_resource_name) diff --git a/datalabeling/manage_dataset_test.py b/datalabeling/manage_dataset_test.py new file mode 100644 index 00000000000..ac7cd83fae5 --- /dev/null +++ b/datalabeling/manage_dataset_test.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python + +# Copyright 2019 Google, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import manage_dataset +import pytest + +PROJECT_ID = os.getenv("GCLOUD_PROJECT") + + +@pytest.fixture(scope='function') +def dataset(): + # create a temporary dataset + dataset = manage_dataset.create_dataset(PROJECT_ID) + + yield dataset + + # tear down + manage_dataset.delete_dataset(dataset.name) + + +def test_create_dataset(capsys): + response = manage_dataset.create_dataset(PROJECT_ID) + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + # clean up + manage_dataset.delete_dataset(response.name) + + +def test_list_dataset(capsys, dataset): + manage_dataset.list_datasets(PROJECT_ID) + out, _ = capsys.readouterr() + assert dataset.name in out + + +def test_get_dataset(capsys, dataset): + manage_dataset.get_dataset(dataset.name) + out, _ = capsys.readouterr() + assert "The dataset resource name:" in out + + +def test_delete_dataset(capsys): + # Creates a dataset. + response = manage_dataset.create_dataset(PROJECT_ID) + + manage_dataset.delete_dataset(response.name) + out, _ = capsys.readouterr() + assert "Dataset deleted." in out diff --git a/datalabeling/requirements.txt b/datalabeling/requirements.txt new file mode 100644 index 00000000000..6cc7309cddf --- /dev/null +++ b/datalabeling/requirements.txt @@ -0,0 +1 @@ +google-cloud-datalabeling==0.1.1