From f377d8aa26d27f001b76922293f25203bfab6ed4 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Mon, 16 Dec 2019 11:14:30 -0500 Subject: [PATCH 01/12] fix: correct dataset name, use env var for project --- dlp/risk_test.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index dafb58523bc..164cd9c6fb8 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -12,18 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. -from gcp_devrel.testing.flaky import flaky +from flaky import flaky import google.cloud.pubsub import pytest +import os import risk -GCLOUD_PROJECT = "python-docs-samples" -TABLE_PROJECT = "python-docs-samples" +GCLOUD_PROJECT = os.environ.get('GCLOUD_PROJECT') +TABLE_PROJECT = os.environ.get('GCLOUD_PROJECT') TOPIC_ID = "dlp-test" SUBSCRIPTION_ID = "dlp-test-subscription" -DATASET_ID = "integration_tests_dlp" +DATASET_ID = "dlp_test_dataset" UNIQUE_FIELD = "Name" REPEATED_FIELD = "Mystery" NUMERIC_FIELD = "Age" From 352785e06303e86141e7fc7a516a4060d8c0a099 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Fri, 3 Jan 2020 14:30:12 -0800 Subject: [PATCH 02/12] Add uuids to tests --- dlp/inspect_content_test.py | 15 +++++++++------ dlp/risk_test.py | 9 ++++++--- dlp/templates_test.py | 5 +++-- dlp/triggers_test.py | 7 ++++--- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index 899ed64c3b3..7be4ac68814 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import uuid from gcp_devrel.testing import eventually_consistent from gcp_devrel.testing.flaky import flaky @@ -26,16 +27,18 @@ import pytest import inspect_content +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" +TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources") RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"] -TOPIC_ID = "dlp-test" -SUBSCRIPTION_ID = "dlp-test-subscription" +TOPIC_ID = "dlp-test" + UNIQUE_STRING +SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING DATASTORE_KIND = "DLP test kind" -BIGQUERY_DATASET_ID = "dlp_test_dataset" -BIGQUERY_TABLE_ID = "dlp_test_table" +DATASTORE_NAME = "DLP test object" + UNIQUE_STRING +BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING +BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING @pytest.fixture(scope="module") @@ -108,7 +111,7 @@ def datastore_project(): datastore_client = google.cloud.datastore.Client() kind = DATASTORE_KIND - name = "DLP test object" + name = DATASTORE_NAME key = datastore_client.key(kind, name) item = google.cloud.datastore.Entity(key=key) item["payload"] = "My name is Gary Smith and my email is gary@example.com" diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 164cd9c6fb8..b4b8b94ab16 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -13,6 +13,8 @@ # limitations under the License. from flaky import flaky +import uuid + import google.cloud.pubsub import pytest @@ -20,11 +22,12 @@ import risk +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.environ.get('GCLOUD_PROJECT') TABLE_PROJECT = os.environ.get('GCLOUD_PROJECT') -TOPIC_ID = "dlp-test" -SUBSCRIPTION_ID = "dlp-test-subscription" -DATASET_ID = "dlp_test_dataset" +TOPIC_ID = "dlp-test" + UNIQUE_STRING +SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING +DATASET_ID = "integration_tests_dlp" + UNIQUE_STRING UNIQUE_FIELD = "Name" REPEATED_FIELD = "Mystery" NUMERIC_FIELD = "Age" diff --git a/dlp/templates_test.py b/dlp/templates_test.py index dff157a9ee6..f63400defa5 100644 --- a/dlp/templates_test.py +++ b/dlp/templates_test.py @@ -13,15 +13,16 @@ # limitations under the License. import os +import uuid import google.api_core.exceptions import google.cloud.storage import templates - +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_TEMPLATE_ID = "test-template" +TEST_TEMPLATE_ID = "test-template"+UNIQUE_STRING def test_create_list_and_delete_template(capsys): diff --git a/dlp/triggers_test.py b/dlp/triggers_test.py index 6a9d7d79261..9ecf92e9a37 100644 --- a/dlp/triggers_test.py +++ b/dlp/triggers_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +import uuid import google.api_core.exceptions import google.cloud.storage @@ -21,12 +22,12 @@ import triggers - +UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" +TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources") RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"] -TEST_TRIGGER_ID = "test-trigger" +TEST_TRIGGER_ID = "test-trigger" + UNIQUE_STRING @pytest.fixture(scope="module") From 503e6283626f68a26ca6962479d077189d0d2224 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Fri, 3 Jan 2020 17:50:10 -0800 Subject: [PATCH 03/12] add uuids and fixtures for bq --- dlp/deid.py | 19 ++-- dlp/deid_test.py | 2 +- dlp/inspect_content.py | 7 +- dlp/inspect_content_test.py | 10 ++- dlp/jobs.py | 2 +- dlp/metadata.py | 2 +- dlp/redact.py | 9 +- dlp/redact_test.py | 2 +- dlp/risk.py | 24 ++--- dlp/risk_test.py | 169 +++++++++++++++++++++++++++++------- dlp/templates.py | 5 +- dlp/templates_test.py | 2 +- dlp/triggers.py | 2 +- 13 files changed, 188 insertions(+), 67 deletions(-) diff --git a/dlp/deid.py b/dlp/deid.py index 423e0c26c64..6ca8f86ec1a 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -131,7 +131,7 @@ def deidentify_with_fpe( # Construct FPE configuration dictionary crypto_replace_ffx_fpe_config = { "crypto_key": { - "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name} + "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name,} }, "common_alphabet": alphabet, } @@ -176,7 +176,12 @@ def deidentify_with_fpe( # [START dlp_reidentify_fpe] def reidentify_with_fpe( - project, string, alphabet=None, surrogate_type=None, key_name=None, wrapped_key=None + project, + string, + alphabet=None, + surrogate_type=None, + key_name=None, + wrapped_key=None, ): """Uses the Data Loss Prevention API to reidentify sensitive data in a string that was encrypted by Format Preserving Encryption (FPE). @@ -333,7 +338,7 @@ def map_data(value): try: date = datetime.strptime(value, "%m/%d/%Y") return { - "date_value": {"year": date.year, "month": date.month, "day": date.day} + "date_value": {"year": date.year, "month": date.month, "day": date.day,} } except ValueError: return {"string_value": value} @@ -438,7 +443,7 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) mask_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) mask_parser.add_argument("item", help="The string to deidentify.") mask_parser.add_argument( @@ -471,7 +476,7 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) fpe_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) fpe_parser.add_argument( "item", @@ -513,7 +518,7 @@ def write_data(data): "Encryption (FPE).", ) reid_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) reid_parser.add_argument( "item", @@ -553,7 +558,7 @@ def write_data(data): help="Deidentify dates in a CSV file by pseudorandomly shifting them.", ) date_shift_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) date_shift_parser.add_argument( "input_csv_file", diff --git a/dlp/deid_test.py b/dlp/deid_test.py index df9dae418e6..9a84a09c616 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -78,7 +78,7 @@ def test_deidentify_with_mask_masking_character_specified(capsys): def test_deidentify_with_mask_masking_number_specified(capsys): deid.deidentify_with_mask( - GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7 + GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7, ) out, _ = capsys.readouterr() diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index 0c151bf64e7..c2fa2b5af11 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -1121,10 +1121,11 @@ def callback(message): "datastore", help="Inspect files on Google Datastore." ) parser_datastore.add_argument( - "datastore_project", help="The Google Cloud project id of the target Datastore." + "datastore_project", + help="The Google Cloud project id of the target Datastore.", ) parser_datastore.add_argument( - "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".' + "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".', ) parser_datastore.add_argument( "topic_id", @@ -1200,7 +1201,7 @@ def callback(message): "bigquery", help="Inspect files on Google BigQuery." ) parser_bigquery.add_argument( - "bigquery_project", help="The Google Cloud project id of the target table." + "bigquery_project", help="The Google Cloud project id of the target table.", ) parser_bigquery.add_argument( "dataset_id", help="The ID of the target BigQuery dataset." diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index 7be4ac68814..cf58ee83138 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -162,7 +162,10 @@ def test_inspect_string(capsys): test_string = "My name is Gary Smith and my email is gary@example.com" inspect_content.inspect_string( - GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True + GCLOUD_PROJECT, + test_string, + ["FIRST_NAME", "EMAIL_ADDRESS"], + include_quote=True, ) out, _ = capsys.readouterr() @@ -214,7 +217,10 @@ def test_inspect_string_no_results(capsys): test_string = "Nothing to see here" inspect_content.inspect_string( - GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True + GCLOUD_PROJECT, + test_string, + ["FIRST_NAME", "EMAIL_ADDRESS"], + include_quote=True, ) out, _ = capsys.readouterr() diff --git a/dlp/jobs.py b/dlp/jobs.py index ec84efbf8f5..9bc3b916b23 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -135,7 +135,7 @@ def delete_dlp_job(project, job_name): list_parser.add_argument( "-t", "--type", - choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB"], + choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB",], help='The type of job. API defaults to "INSPECT"', ) diff --git a/dlp/metadata.py b/dlp/metadata.py index 81b8f5e08a4..229ffab0a0a 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -55,7 +55,7 @@ def list_info_types(language_code=None, result_filter=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'." + "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'.", ) parser.add_argument( "--filter", diff --git a/dlp/redact.py b/dlp/redact.py index e3ff08ec65e..ceaa09012e9 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -30,7 +30,7 @@ def redact_image( - project, filename, output_filename, info_types, min_likelihood=None, mime_type=None + project, filename, output_filename, info_types, min_likelihood=None, mime_type=None, ): """Uses the Data Loss Prevention API to redact protected data in an image. Args: @@ -68,7 +68,10 @@ def redact_image( # Construct the configuration dictionary. Keys which are None may # optionally be omitted entirely. - inspect_config = {"min_likelihood": min_likelihood, "info_types": info_types} + inspect_config = { + "min_likelihood": min_likelihood, + "info_types": info_types, + } # If mime_type is not specified, guess it from the filename. if mime_type is None: @@ -121,7 +124,7 @@ def redact_image( parser.add_argument("filename", help="The path to the file to inspect.") parser.add_argument( - "output_filename", help="The path to which the redacted image will be written." + "output_filename", help="The path to which the redacted image will be written.", ) parser.add_argument( "--project", diff --git a/dlp/redact_test.py b/dlp/redact_test.py index 39875551b12..a33b655d688 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -36,7 +36,7 @@ def test_redact_image_file(tempdir, capsys): output_filepath = os.path.join(tempdir, "redacted.png") redact.redact_image( - GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"] + GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"], ) out, _ = capsys.readouterr() diff --git a/dlp/risk.py b/dlp/risk.py index 272d29768dc..05c33a4d467 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -66,7 +66,7 @@ def callback(message): results = job.risk_details.numerical_stats_result print( "Value Range: [{}, {}]".format( - results.min_value.integer_value, results.max_value.integer_value + results.min_value.integer_value, results.max_value.integer_value, ) ) prev_value = None @@ -674,7 +674,7 @@ def map_fields(quasi_id, info_type): numerical_parser = subparsers.add_parser("numerical", help="") numerical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) numerical_parser.add_argument( "table_project_id", @@ -685,7 +685,7 @@ def map_fields(quasi_id, info_type): ) numerical_parser.add_argument("table_id", help="The id of the table to inspect.") numerical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for." + "column_name", help="The name of the column to compute risk metrics for.", ) numerical_parser.add_argument( "topic_id", @@ -704,7 +704,7 @@ def map_fields(quasi_id, info_type): categorical_parser = subparsers.add_parser("categorical", help="") categorical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) categorical_parser.add_argument( "table_project_id", @@ -715,7 +715,7 @@ def map_fields(quasi_id, info_type): ) categorical_parser.add_argument("table_id", help="The id of the table to inspect.") categorical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for." + "column_name", help="The name of the column to compute risk metrics for.", ) categorical_parser.add_argument( "topic_id", @@ -737,7 +737,7 @@ def map_fields(quasi_id, info_type): help="Computes the k-anonymity of a column set in a Google BigQuery" "table.", ) k_anonymity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) k_anonymity_parser.add_argument( "table_project_id", @@ -757,7 +757,7 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_anonymity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) k_anonymity_parser.add_argument( "--timeout", @@ -770,7 +770,7 @@ def map_fields(quasi_id, info_type): help="Computes the l-diversity of a column set in a Google BigQuery" "table.", ) l_diversity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) l_diversity_parser.add_argument( "table_project_id", @@ -790,10 +790,10 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) l_diversity_parser.add_argument( - "sensitive_attribute", help="The column to measure l-diversity relative to." + "sensitive_attribute", help="The column to measure l-diversity relative to.", ) l_diversity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) l_diversity_parser.add_argument( "--timeout", @@ -807,7 +807,7 @@ def map_fields(quasi_id, info_type): "BigQuery table.", ) k_map_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource." + "project", help="The Google Cloud project id to use as a parent resource.", ) k_map_parser.add_argument( "table_project_id", @@ -825,7 +825,7 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_map_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key." + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) k_map_parser.add_argument( "-t", diff --git a/dlp/risk_test.py b/dlp/risk_test.py index b4b8b94ab16..dbed70ca93a 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -16,6 +16,7 @@ import uuid import google.cloud.pubsub +import google.cloud.bigquery import pytest import os @@ -23,16 +24,18 @@ import risk UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] -GCLOUD_PROJECT = os.environ.get('GCLOUD_PROJECT') -TABLE_PROJECT = os.environ.get('GCLOUD_PROJECT') +GCLOUD_PROJECT = os.environ.get("GCLOUD_PROJECT") +TABLE_PROJECT = os.environ.get("GCLOUD_PROJECT") TOPIC_ID = "dlp-test" + UNIQUE_STRING SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING -DATASET_ID = "integration_tests_dlp" + UNIQUE_STRING UNIQUE_FIELD = "Name" REPEATED_FIELD = "Mystery" NUMERIC_FIELD = "Age" STRING_BOOLEAN_FIELD = "Gender" +BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING +BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING +BIGQUERY_HARMFUL_TABLE_ID = "harmful" + UNIQUE_STRING # Create new custom topic/subscription @pytest.fixture(scope="module") @@ -66,13 +69,95 @@ def subscription_id(topic_id): subscriber.delete_subscription(subscription_path) +""" +@pytest.fixture(scope="module") +def bigquery_dataset_id(): + # adds bq dataset, yields the id, tears down + bigquery_client = google.cloud.bigquery.Client() + + dataset_ref = bigquery_client.dataset(BIGQUERY_DATASET_ID) + dataset = google.cloud.bigquery.Dataset(dataset_ref) + try: + dataset = bigquery_client.create_dataset(dataset) + except google.api_core.exceptions.Conflict: + dataset = bigquery_client.get_dataset(dataset) + yield BIGQUERY_DATASET_ID + bigquery_client.delete_dataset(dataset_ref, delete_contents=True) + +@pytest.fixture(scope="module") +def bigquery_table_id(bigquery_dataset_id): + bigquery_client = google.cloud.bigquery.Client() + dataset_ref = bigquery_client.dataset(bigquery +""" + + +@pytest.fixture(scope="module") +def bigquery_project(): + # Adds test Bigquery data, yields the project ID and then tears down. + + bigquery_client = google.cloud.bigquery.Client() + + dataset_ref = bigquery_client.dataset(BIGQUERY_DATASET_ID) + dataset = google.cloud.bigquery.Dataset(dataset_ref) + try: + dataset = bigquery_client.create_dataset(dataset) + except google.api_core.exceptions.Conflict: + dataset = bigquery_client.get_dataset(dataset) + table_ref = dataset_ref.table(BIGQUERY_TABLE_ID) + table = google.cloud.bigquery.Table(table_ref) + + harmful_table_ref = dataset_ref.table(BIGQUERY_HARMFUL_TABLE_ID) + harmful_table = google.cloud.bigquery.Table(harmful_table_ref) + + # DO NOT SUBMIT: trim this down once we find out what works + table.schema = ( + google.cloud.bigquery.SchemaField("Name", "STRING"), + google.cloud.bigquery.SchemaField("Comment", "STRING"), + ) + + harmful_table.schema = ( + google.cloud.bigquery.SchemaField("Name", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField("TelephoneNumber", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField("Mystery", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField("Age", "INTEGER", "REQUIRED"), + google.cloud.bigquery.SchemaField("Gender", "STRING"), + google.cloud.bigquery.SchemaField("RegionCode", "STRING"), + ) + + try: + table = bigquery_client.create_table(table) + except google.api_core.exceptions.Conflict: + table = bigquery_client.get_table(table) + + try: + harmful_table = bigquery_client.create_table(harmful_table) + except google.api_core.exceptions.Conflict: + harmful_table = bigquery_client.get_table(harmful_table) + + rows_to_insert = [(u"Gary Smith", u"My email is gary@example.com")] + harmful_rows_to_insert = [ + (u"Gandalf", u"(123) 456-7890", "4231 5555 6781 9876", 27, "Male", "US"), + (u"Dumbledore", u"(313) 337-1337", "6291 8765 1095 7629", 27, "Male", "US"), + (u"Joe", u"(452) 123-1234", "3782 2288 1166 3030", 35, "Male", "US"), + (u"James", u"(567) 890-1234", "8291 3627 8250 1234", 19, "Male", "US"), + (u"Marie", u"(452) 123-1234", "8291 3627 8250 1234", 35, "Female", "US"), + (u"Carrie", u"(567) 890-1234", "2253 5218 4251 4526", 35, "Female", "US"), + ] + + bigquery_client.insert_rows(table, rows_to_insert) + bigquery_client.insert_rows(harmful_table, harmful_rows_to_insert) + yield GCLOUD_PROJECT + + bigquery_client.delete_dataset(dataset_ref, delete_contents=True) + + @flaky -def test_numerical_risk_analysis(topic_id, subscription_id, capsys): +def test_numerical_risk_analysis(topic_id, subscription_id, bigquery_project, capsys): risk.numerical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, NUMERIC_FIELD, topic_id, subscription_id, @@ -83,12 +168,14 @@ def test_numerical_risk_analysis(topic_id, subscription_id, capsys): @flaky -def test_categorical_risk_analysis_on_string_field(topic_id, subscription_id, capsys): +def test_categorical_risk_analysis_on_string_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.categorical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, UNIQUE_FIELD, topic_id, subscription_id, @@ -100,12 +187,14 @@ def test_categorical_risk_analysis_on_string_field(topic_id, subscription_id, ca @flaky -def test_categorical_risk_analysis_on_number_field(topic_id, subscription_id, capsys): +def test_categorical_risk_analysis_on_number_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.categorical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, NUMERIC_FIELD, topic_id, subscription_id, @@ -116,12 +205,14 @@ def test_categorical_risk_analysis_on_number_field(topic_id, subscription_id, ca @flaky -def test_k_anonymity_analysis_single_field(topic_id, subscription_id, capsys): +def test_k_anonymity_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_anonymity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD], @@ -133,12 +224,14 @@ def test_k_anonymity_analysis_single_field(topic_id, subscription_id, capsys): @flaky -def test_k_anonymity_analysis_multiple_fields(topic_id, subscription_id, capsys): +def test_k_anonymity_analysis_multiple_fields( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_anonymity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, REPEATED_FIELD], @@ -150,12 +243,14 @@ def test_k_anonymity_analysis_multiple_fields(topic_id, subscription_id, capsys) @flaky -def test_l_diversity_analysis_single_field(topic_id, subscription_id, capsys): +def test_l_diversity_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.l_diversity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, UNIQUE_FIELD, @@ -169,12 +264,14 @@ def test_l_diversity_analysis_single_field(topic_id, subscription_id, capsys): @flaky -def test_l_diversity_analysis_multiple_field(topic_id, subscription_id, capsys): +def test_l_diversity_analysis_multiple_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.l_diversity_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, UNIQUE_FIELD, @@ -188,12 +285,14 @@ def test_l_diversity_analysis_multiple_field(topic_id, subscription_id, capsys): @flaky -def test_k_map_estimate_analysis_single_field(topic_id, subscription_id, capsys): +def test_k_map_estimate_analysis_single_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD], @@ -207,12 +306,14 @@ def test_k_map_estimate_analysis_single_field(topic_id, subscription_id, capsys) @flaky -def test_k_map_estimate_analysis_multiple_field(topic_id, subscription_id, capsys): +def test_k_map_estimate_analysis_multiple_field( + topic_id, subscription_id, bigquery_project, capsys +): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], @@ -226,13 +327,15 @@ def test_k_map_estimate_analysis_multiple_field(topic_id, subscription_id, capsy @flaky -def test_k_map_estimate_analysis_quasi_ids_info_types_equal(topic_id, subscription_id): +def test_k_map_estimate_analysis_quasi_ids_info_types_equal( + topic_id, subscription_id, bigquery_project +): with pytest.raises(ValueError): risk.k_map_estimate_analysis( GCLOUD_PROJECT, TABLE_PROJECT, - DATASET_ID, - "harmful", + BIGQUERY_DATASET_ID, + BIGQUERY_HARMFUL_TABLE_ID, topic_id, subscription_id, [NUMERIC_FIELD, STRING_BOOLEAN_FIELD], diff --git a/dlp/templates.py b/dlp/templates.py index 9e29245a248..d632958b427 100644 --- a/dlp/templates.py +++ b/dlp/templates.py @@ -68,7 +68,10 @@ def create_inspect_template( "limits": {"max_findings_per_request": max_findings}, } - inspect_template = {"inspect_config": inspect_config, "display_name": display_name} + inspect_template = { + "inspect_config": inspect_config, + "display_name": display_name, + } # Convert the project id into a full resource id. parent = dlp.project_path(project) diff --git a/dlp/templates_test.py b/dlp/templates_test.py index f63400defa5..8ecf8542db6 100644 --- a/dlp/templates_test.py +++ b/dlp/templates_test.py @@ -22,7 +22,7 @@ UNIQUE_STRING = str(uuid.uuid4()).split("-")[0] GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT") -TEST_TEMPLATE_ID = "test-template"+UNIQUE_STRING +TEST_TEMPLATE_ID = "test-template" + UNIQUE_STRING def test_create_list_and_delete_template(capsys): diff --git a/dlp/triggers.py b/dlp/triggers.py index c786cf6e547..f8624f5492f 100644 --- a/dlp/triggers.py +++ b/dlp/triggers.py @@ -251,7 +251,7 @@ def delete_trigger(project, trigger_id): help="The maximum number of findings to report; 0 = no maximum.", ) parser_create.add_argument( - "--auto_populate_timespan", type=bool, help="Limit scan to new content only." + "--auto_populate_timespan", type=bool, help="Limit scan to new content only.", ) parser_list = subparsers.add_parser("list", help="List all triggers.") From e842941b8446f943a13c57b7caf6fa2090500546 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Mon, 6 Jan 2020 15:21:36 -0800 Subject: [PATCH 04/12] Add logic to delete job --- dlp/jobs_test.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 15417def67c..836ed348233 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -49,8 +49,13 @@ def test_job_name(): full_path = response.name # API expects only job name, not full project path job_name = full_path[full_path.rfind("/") + 1 :] - return job_name + yield job_name + # clean up job if not deleted + try: + dlp.delete_dlp_job(full_path) + except google.cloud.exceptions.NotFound: + print("Issue during teardown, missing job") def test_list_dlp_jobs(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT) From e8e30198bd458d6cda78e97803184d4f4c71412f Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Mon, 6 Jan 2020 15:25:10 -0800 Subject: [PATCH 05/12] ran black --- dlp/jobs_test.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 836ed348233..d95eaa42cc6 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -53,9 +53,10 @@ def test_job_name(): # clean up job if not deleted try: - dlp.delete_dlp_job(full_path) + dlp.delete_dlp_job(full_path) except google.cloud.exceptions.NotFound: - print("Issue during teardown, missing job") + print("Issue during teardown, missing job") + def test_list_dlp_jobs(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT) From bc72e88d2e2c2762f3435ee06b4fecafbf495df5 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Mon, 6 Jan 2020 15:32:19 -0800 Subject: [PATCH 06/12] Run black with line length --- dlp/deid.py | 44 +++++++++++++++------ dlp/deid_test.py | 5 ++- dlp/inspect_content.py | 14 +++++-- dlp/inspect_content_test.py | 12 ++++-- dlp/jobs.py | 16 ++++++-- dlp/metadata.py | 7 +++- dlp/quickstart.py | 8 +++- dlp/redact.py | 10 ++++- dlp/redact_test.py | 5 ++- dlp/risk.py | 77 ++++++++++++++++++++++++++----------- dlp/risk_test.py | 48 +++++++++++++++++++---- dlp/templates.py | 19 ++++++--- dlp/triggers.py | 15 ++++++-- 13 files changed, 211 insertions(+), 69 deletions(-) diff --git a/dlp/deid.py b/dlp/deid.py index 6ca8f86ec1a..b08a341dd82 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -46,7 +46,9 @@ def deidentify_with_mask( parent = dlp.project_path(project) # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -131,17 +133,24 @@ def deidentify_with_fpe( # Construct FPE configuration dictionary crypto_replace_ffx_fpe_config = { "crypto_key": { - "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name,} + "kms_wrapped": { + "wrapped_key": wrapped_key, + "crypto_key_name": key_name, + } }, "common_alphabet": alphabet, } # Add surrogate type if surrogate_type: - crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type} + crypto_replace_ffx_fpe_config["surrogate_info_type"] = { + "name": surrogate_type + } # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -338,7 +347,11 @@ def map_data(value): try: date = datetime.strptime(value, "%m/%d/%Y") return { - "date_value": {"year": date.year, "month": date.month, "day": date.day,} + "date_value": { + "year": date.year, + "month": date.month, + "day": date.day, + } } except ValueError: return {"string_value": value} @@ -431,7 +444,8 @@ def write_data(data): mask_parser = subparsers.add_parser( "deid_mask", - help="Deidentify sensitive data in a string by masking it with a " "character.", + help="Deidentify sensitive data in a string by masking it with a " + "character.", ) mask_parser.add_argument( "--info_types", @@ -443,7 +457,8 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) mask_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) mask_parser.add_argument("item", help="The string to deidentify.") mask_parser.add_argument( @@ -476,11 +491,13 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) fpe_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) fpe_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) fpe_parser.add_argument( "key_name", @@ -518,11 +535,13 @@ def write_data(data): "Encryption (FPE).", ) reid_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) reid_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) reid_parser.add_argument( "surrogate_type", @@ -558,7 +577,8 @@ def write_data(data): help="Deidentify dates in a CSV file by pseudorandomly shifting them.", ) date_shift_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) date_shift_parser.add_argument( "input_csv_file", diff --git a/dlp/deid_test.py b/dlp/deid_test.py index 9a84a09c616..db14b5758e9 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys): def test_deidentify_with_mask_masking_number_specified(capsys): deid.deidentify_with_mask( - GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7, + GCLOUD_PROJECT, + HARMFUL_STRING, + ["US_SOCIAL_SECURITY_NUMBER"], + number_to_mask=7, ) out, _ = capsys.readouterr() diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index c2fa2b5af11..336f798128b 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -220,7 +220,9 @@ def inspect_table( headers = [{"name": val} for val in data["header"]] rows = [] for row in data["rows"]: - rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) + rows.append( + {"values": [{"string_value": cell_val} for cell_val in row]} + ) table = {} table["headers"] = headers @@ -978,7 +980,9 @@ def callback(message): ) parser_file = subparsers.add_parser("file", help="Inspect a local file.") - parser_file.add_argument("filename", help="The path to the file to inspect.") + parser_file.add_argument( + "filename", help="The path to the file to inspect." + ) parser_file.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", @@ -1125,7 +1129,8 @@ def callback(message): help="The Google Cloud project id of the target Datastore.", ) parser_datastore.add_argument( - "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".', + "kind", + help='The kind of the Datastore entity to inspect, e.g. "Person".', ) parser_datastore.add_argument( "topic_id", @@ -1201,7 +1206,8 @@ def callback(message): "bigquery", help="Inspect files on Google BigQuery." ) parser_bigquery.add_argument( - "bigquery_project", help="The Google Cloud project id of the target table.", + "bigquery_project", + help="The Google Cloud project id of the target table.", ) parser_bigquery.add_argument( "dataset_id", help="The ID of the target BigQuery dataset." diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index cf58ee83138..e15d6f55a56 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -94,7 +94,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -329,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types( @flaky -def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys): +def test_inspect_gcs_file_no_results( + bucket, topic_id, subscription_id, capsys +): inspect_content.inspect_gcs_file( GCLOUD_PROJECT, bucket.name, @@ -376,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): @flaky -def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys): +def test_inspect_datastore( + datastore_project, topic_id, subscription_id, capsys +): @eventually_consistent.call def _(): inspect_content.inspect_datastore( diff --git a/dlp/jobs.py b/dlp/jobs.py index 9bc3b916b23..882d4ad71c3 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -122,7 +122,8 @@ def delete_dlp_job(project, job_name): list_parser = subparsers.add_parser( "list", - help="List Data Loss Prevention API jobs corresponding to a given " "filter.", + help="List Data Loss Prevention API jobs corresponding to a given " + "filter.", ) list_parser.add_argument( "project", help="The project id to use as a parent resource." @@ -135,7 +136,11 @@ def delete_dlp_job(project, job_name): list_parser.add_argument( "-t", "--type", - choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB",], + choices=[ + "DLP_JOB_TYPE_UNSPECIFIED", + "INSPECT_JOB", + "RISK_ANALYSIS_JOB", + ], help='The type of job. API defaults to "INSPECT"', ) @@ -147,12 +152,15 @@ def delete_dlp_job(project, job_name): ) delete_parser.add_argument( "job_name", - help="The name of the DlpJob resource to be deleted. " "Example: X-#####", + help="The name of the DlpJob resource to be deleted. " + "Example: X-#####", ) args = parser.parse_args() if args.content == "list": - list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type) + list_dlp_jobs( + args.project, filter_string=args.filter, job_type=args.type + ) elif args.content == "delete": delete_dlp_job(args.project, args.job_name) diff --git a/dlp/metadata.py b/dlp/metadata.py index 229ffab0a0a..5bf1c0cb9d2 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -55,7 +55,8 @@ def list_info_types(language_code=None, result_filter=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'.", + "--language_code", + help="The BCP-47 language code to use, e.g. 'en-US'.", ) parser.add_argument( "--filter", @@ -65,4 +66,6 @@ def list_info_types(language_code=None, result_filter=None): args = parser.parse_args() - list_info_types(language_code=args.language_code, result_filter=args.filter) + list_info_types( + language_code=args.language_code, result_filter=args.filter + ) diff --git a/dlp/quickstart.py b/dlp/quickstart.py index 2cc0f144267..1b12a83da1d 100644 --- a/dlp/quickstart.py +++ b/dlp/quickstart.py @@ -74,7 +74,9 @@ def quickstart(project_id): print("Info type: {}".format(finding.info_type.name)) # Convert likelihood value to string respresentation. likelihood = ( - google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name["likelihood"] + google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name[ + "likelihood" + ] .enum_type.values_by_number[finding.likelihood] .name ) @@ -86,7 +88,9 @@ def quickstart(project_id): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("project_id", help="Enter your GCP project id.", type=str) + parser.add_argument( + "project_id", help="Enter your GCP project id.", type=str + ) args = parser.parse_args() if len(sys.argv) == 1: parser.print_usage() diff --git a/dlp/redact.py b/dlp/redact.py index ceaa09012e9..ad1d866d6d6 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -30,7 +30,12 @@ def redact_image( - project, filename, output_filename, info_types, min_likelihood=None, mime_type=None, + project, + filename, + output_filename, + info_types, + min_likelihood=None, + mime_type=None, ): """Uses the Data Loss Prevention API to redact protected data in an image. Args: @@ -124,7 +129,8 @@ def redact_image( parser.add_argument("filename", help="The path to the file to inspect.") parser.add_argument( - "output_filename", help="The path to which the redacted image will be written.", + "output_filename", + help="The path to which the redacted image will be written.", ) parser.add_argument( "--project", diff --git a/dlp/redact_test.py b/dlp/redact_test.py index a33b655d688..dd9a887d23d 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -36,7 +36,10 @@ def test_redact_image_file(tempdir, capsys): output_filepath = os.path.join(tempdir, "redacted.png") redact.redact_image( - GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"], + GCLOUD_PROJECT, + test_filepath, + output_filepath, + ["FIRST_NAME", "EMAIL_ADDRESS"], ) out, _ = capsys.readouterr() diff --git a/dlp/risk.py b/dlp/risk.py index 05c33a4d467..66f17179321 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -66,7 +66,8 @@ def callback(message): results = job.risk_details.numerical_stats_result print( "Value Range: [{}, {}]".format( - results.min_value.integer_value, results.max_value.integer_value, + results.min_value.integer_value, + results.max_value.integer_value, ) ) prev_value = None @@ -99,7 +100,9 @@ def callback(message): # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for risk_job = { - "privacy_metric": {"numerical_stats_config": {"field": {"name": column_name}}}, + "privacy_metric": { + "numerical_stats_config": {"field": {"name": column_name}} + }, "source_table": source_table, "actions": actions, } @@ -444,7 +447,9 @@ def callback(message): ) ) print( - " Class size: {}".format(value_bucket.equivalence_class_size) + " Class size: {}".format( + value_bucket.equivalence_class_size + ) ) for value in value_bucket.top_sensitive_values: print( @@ -674,7 +679,8 @@ def map_fields(quasi_id, info_type): numerical_parser = subparsers.add_parser("numerical", help="") numerical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) numerical_parser.add_argument( "table_project_id", @@ -683,9 +689,12 @@ def map_fields(quasi_id, info_type): numerical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - numerical_parser.add_argument("table_id", help="The id of the table to inspect.") numerical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for.", + "table_id", help="The id of the table to inspect." + ) + numerical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) numerical_parser.add_argument( "topic_id", @@ -704,7 +713,8 @@ def map_fields(quasi_id, info_type): categorical_parser = subparsers.add_parser("categorical", help="") categorical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) categorical_parser.add_argument( "table_project_id", @@ -713,9 +723,12 @@ def map_fields(quasi_id, info_type): categorical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - categorical_parser.add_argument("table_id", help="The id of the table to inspect.") categorical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for.", + "table_id", help="The id of the table to inspect." + ) + categorical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) categorical_parser.add_argument( "topic_id", @@ -734,10 +747,12 @@ def map_fields(quasi_id, info_type): k_anonymity_parser = subparsers.add_parser( "k_anonymity", - help="Computes the k-anonymity of a column set in a Google BigQuery" "table.", + help="Computes the k-anonymity of a column set in a Google BigQuery" + "table.", ) k_anonymity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_anonymity_parser.add_argument( "table_project_id", @@ -746,7 +761,9 @@ def map_fields(quasi_id, info_type): k_anonymity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - k_anonymity_parser.add_argument("table_id", help="The id of the table to inspect.") + k_anonymity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_anonymity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -757,7 +774,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_anonymity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_anonymity_parser.add_argument( "--timeout", @@ -767,10 +786,12 @@ def map_fields(quasi_id, info_type): l_diversity_parser = subparsers.add_parser( "l_diversity", - help="Computes the l-diversity of a column set in a Google BigQuery" "table.", + help="Computes the l-diversity of a column set in a Google BigQuery" + "table.", ) l_diversity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) l_diversity_parser.add_argument( "table_project_id", @@ -779,7 +800,9 @@ def map_fields(quasi_id, info_type): l_diversity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - l_diversity_parser.add_argument("table_id", help="The id of the table to inspect.") + l_diversity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) l_diversity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -790,10 +813,13 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) l_diversity_parser.add_argument( - "sensitive_attribute", help="The column to measure l-diversity relative to.", + "sensitive_attribute", + help="The column to measure l-diversity relative to.", ) l_diversity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) l_diversity_parser.add_argument( "--timeout", @@ -807,14 +833,19 @@ def map_fields(quasi_id, info_type): "BigQuery table.", ) k_map_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_map_parser.add_argument( "table_project_id", help="The Google Cloud project id where the BigQuery table is stored.", ) - k_map_parser.add_argument("dataset_id", help="The id of the dataset to inspect.") - k_map_parser.add_argument("table_id", help="The id of the table to inspect.") + k_map_parser.add_argument( + "dataset_id", help="The id of the dataset to inspect." + ) + k_map_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_map_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -825,7 +856,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_map_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_map_parser.add_argument( "-t", diff --git a/dlp/risk_test.py b/dlp/risk_test.py index dbed70ca93a..064e13b3e29 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -58,7 +58,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -117,7 +119,9 @@ def bigquery_project(): harmful_table.schema = ( google.cloud.bigquery.SchemaField("Name", "STRING", "REQUIRED"), - google.cloud.bigquery.SchemaField("TelephoneNumber", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField( + "TelephoneNumber", "STRING", "REQUIRED" + ), google.cloud.bigquery.SchemaField("Mystery", "STRING", "REQUIRED"), google.cloud.bigquery.SchemaField("Age", "INTEGER", "REQUIRED"), google.cloud.bigquery.SchemaField("Gender", "STRING"), @@ -136,12 +140,40 @@ def bigquery_project(): rows_to_insert = [(u"Gary Smith", u"My email is gary@example.com")] harmful_rows_to_insert = [ - (u"Gandalf", u"(123) 456-7890", "4231 5555 6781 9876", 27, "Male", "US"), - (u"Dumbledore", u"(313) 337-1337", "6291 8765 1095 7629", 27, "Male", "US"), + ( + u"Gandalf", + u"(123) 456-7890", + "4231 5555 6781 9876", + 27, + "Male", + "US", + ), + ( + u"Dumbledore", + u"(313) 337-1337", + "6291 8765 1095 7629", + 27, + "Male", + "US", + ), (u"Joe", u"(452) 123-1234", "3782 2288 1166 3030", 35, "Male", "US"), (u"James", u"(567) 890-1234", "8291 3627 8250 1234", 19, "Male", "US"), - (u"Marie", u"(452) 123-1234", "8291 3627 8250 1234", 35, "Female", "US"), - (u"Carrie", u"(567) 890-1234", "2253 5218 4251 4526", 35, "Female", "US"), + ( + u"Marie", + u"(452) 123-1234", + "8291 3627 8250 1234", + 35, + "Female", + "US", + ), + ( + u"Carrie", + u"(567) 890-1234", + "2253 5218 4251 4526", + 35, + "Female", + "US", + ), ] bigquery_client.insert_rows(table, rows_to_insert) @@ -152,7 +184,9 @@ def bigquery_project(): @flaky -def test_numerical_risk_analysis(topic_id, subscription_id, bigquery_project, capsys): +def test_numerical_risk_analysis( + topic_id, subscription_id, bigquery_project, capsys +): risk.numerical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, diff --git a/dlp/templates.py b/dlp/templates.py index d632958b427..5f03b596fc3 100644 --- a/dlp/templates.py +++ b/dlp/templates.py @@ -117,12 +117,18 @@ def human_readable_time(timestamp): print("Template {}:".format(template.name)) if template.display_name: print(" Display Name: {}".format(template.display_name)) - print(" Created: {}".format(human_readable_time(template.create_time))) - print(" Updated: {}".format(human_readable_time(template.update_time))) + print( + " Created: {}".format(human_readable_time(template.create_time)) + ) + print( + " Updated: {}".format(human_readable_time(template.update_time)) + ) config = template.inspect_config print( - " InfoTypes: {}".format(", ".join([it.name for it in config.info_types])) + " InfoTypes: {}".format( + ", ".join([it.name for it in config.info_types]) + ) ) print(" Minimum likelihood: {}".format(config.min_likelihood)) print(" Include quotes: {}".format(config.include_quote)) @@ -179,7 +185,8 @@ def delete_inspect_template(project, template_id): parser_create = subparsers.add_parser("create", help="Create a template.") parser_create.add_argument( "--template_id", - help="The id of the template. If omitted, an id will be randomly " "generated", + help="The id of the template. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the template." @@ -232,7 +239,9 @@ def delete_inspect_template(project, template_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a template.") - parser_delete.add_argument("template_id", help="The id of the template to delete.") + parser_delete.add_argument( + "template_id", help="The id of the template to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", diff --git a/dlp/triggers.py b/dlp/triggers.py index f8624f5492f..0c2b0bb4e29 100644 --- a/dlp/triggers.py +++ b/dlp/triggers.py @@ -92,7 +92,9 @@ def create_trigger( # Construct the schedule definition: schedule = { - "recurrence_period_duration": {"seconds": scan_period_days * 60 * 60 * 24} + "recurrence_period_duration": { + "seconds": scan_period_days * 60 * 60 * 24 + } } # Construct the trigger definition. @@ -210,7 +212,8 @@ def delete_trigger(project, trigger_id): ) parser_create.add_argument( "--trigger_id", - help="The id of the trigger. If omitted, an id will be randomly " "generated", + help="The id of the trigger. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the trigger." @@ -251,7 +254,9 @@ def delete_trigger(project, trigger_id): help="The maximum number of findings to report; 0 = no maximum.", ) parser_create.add_argument( - "--auto_populate_timespan", type=bool, help="Limit scan to new content only.", + "--auto_populate_timespan", + type=bool, + help="Limit scan to new content only.", ) parser_list = subparsers.add_parser("list", help="List all triggers.") @@ -262,7 +267,9 @@ def delete_trigger(project, trigger_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a trigger.") - parser_delete.add_argument("trigger_id", help="The id of the trigger to delete.") + parser_delete.add_argument( + "trigger_id", help="The id of the trigger to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", From ac8b39e5b3c60996cc9fbb63d3d57e1cd72fdce9 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Mon, 6 Jan 2020 16:05:55 -0800 Subject: [PATCH 07/12] Add utf encoding for python 2 tests --- dlp/metadata.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dlp/metadata.py b/dlp/metadata.py index 5bf1c0cb9d2..7a65941d622 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- # Copyright 2017 Google Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -43,7 +44,7 @@ def list_info_types(language_code=None, result_filter=None): print("Info types:") for info_type in response.info_types: print( - "{name}: {display_name}".format( + u"{name}: {display_name}".format( name=info_type.name, display_name=info_type.display_name ) ) From ef0a54c6893aab1eda826bc943885a482a9b590d Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Tue, 4 Feb 2020 15:09:00 -0800 Subject: [PATCH 08/12] Add skips for now --- dlp/jobs_test.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index d95eaa42cc6..eed887a6264 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -14,6 +14,8 @@ import os +import google.api_core.exceptions + import pytest import jobs @@ -25,10 +27,9 @@ TEST_TABLE_ID = "bikeshare_trips" -@pytest.fixture(scope="session") +@pytest.fixture(scope="module") def test_job_name(): import google.cloud.dlp - dlp = google.cloud.dlp_v2.DlpServiceClient() parent = dlp.project_path(GCLOUD_PROJECT) @@ -49,22 +50,28 @@ def test_job_name(): full_path = response.name # API expects only job name, not full project path job_name = full_path[full_path.rfind("/") + 1 :] + print("job") + print(job_name) + print(response.state) yield job_name # clean up job if not deleted try: dlp.delete_dlp_job(full_path) - except google.cloud.exceptions.NotFound: + except google.api_core.exceptions.NotFound: print("Issue during teardown, missing job") - -def test_list_dlp_jobs(capsys): +@pytest.mark.skip(reason='investigating possible api bug') +def test_list_dlp_jobs(test_job_name, capsys): + print(test_job_name) jobs.list_dlp_jobs(GCLOUD_PROJECT) out, _ = capsys.readouterr() assert "Job: projects/" in out +@pytest.mark.skip(reason='investigating possible api bug') +def test_list_dlp_jobs(test_job_name, capsys): def test_list_dlp_jobs_with_filter(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=DONE") @@ -72,6 +79,8 @@ def test_list_dlp_jobs_with_filter(capsys): assert "Job: projects/" in out +@pytest.mark.skip(reason='investigating possible api bug') +def test_list_dlp_jobs(test_job_name, capsys): def test_list_dlp_jobs_with_job_type(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type="INSPECT_JOB") From cd374afe88705e3474812d11a3e183532b3650c1 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Tue, 4 Feb 2020 15:21:51 -0800 Subject: [PATCH 09/12] Ran black --- dlp/deid.py | 44 ++++++--------------- dlp/deid_test.py | 5 +-- dlp/inspect_content.py | 14 ++----- dlp/inspect_content_test.py | 12 ++---- dlp/jobs.py | 16 ++------ dlp/jobs_test.py | 10 ++--- dlp/metadata.py | 7 +--- dlp/quickstart.py | 8 +--- dlp/redact.py | 10 +---- dlp/redact_test.py | 5 +-- dlp/risk.py | 77 +++++++++++-------------------------- dlp/risk_test.py | 48 ++++------------------- dlp/templates.py | 19 +++------ dlp/triggers.py | 15 ++------ 14 files changed, 74 insertions(+), 216 deletions(-) diff --git a/dlp/deid.py b/dlp/deid.py index b08a341dd82..6ca8f86ec1a 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -46,9 +46,7 @@ def deidentify_with_mask( parent = dlp.project_path(project) # Construct inspect configuration dictionary - inspect_config = { - "info_types": [{"name": info_type} for info_type in info_types] - } + inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} # Construct deidentify configuration dictionary deidentify_config = { @@ -133,24 +131,17 @@ def deidentify_with_fpe( # Construct FPE configuration dictionary crypto_replace_ffx_fpe_config = { "crypto_key": { - "kms_wrapped": { - "wrapped_key": wrapped_key, - "crypto_key_name": key_name, - } + "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name,} }, "common_alphabet": alphabet, } # Add surrogate type if surrogate_type: - crypto_replace_ffx_fpe_config["surrogate_info_type"] = { - "name": surrogate_type - } + crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type} # Construct inspect configuration dictionary - inspect_config = { - "info_types": [{"name": info_type} for info_type in info_types] - } + inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} # Construct deidentify configuration dictionary deidentify_config = { @@ -347,11 +338,7 @@ def map_data(value): try: date = datetime.strptime(value, "%m/%d/%Y") return { - "date_value": { - "year": date.year, - "month": date.month, - "day": date.day, - } + "date_value": {"year": date.year, "month": date.month, "day": date.day,} } except ValueError: return {"string_value": value} @@ -444,8 +431,7 @@ def write_data(data): mask_parser = subparsers.add_parser( "deid_mask", - help="Deidentify sensitive data in a string by masking it with a " - "character.", + help="Deidentify sensitive data in a string by masking it with a " "character.", ) mask_parser.add_argument( "--info_types", @@ -457,8 +443,7 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) mask_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) mask_parser.add_argument("item", help="The string to deidentify.") mask_parser.add_argument( @@ -491,13 +476,11 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) fpe_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) fpe_parser.add_argument( "item", - help="The string to deidentify. " - "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", ) fpe_parser.add_argument( "key_name", @@ -535,13 +518,11 @@ def write_data(data): "Encryption (FPE).", ) reid_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) reid_parser.add_argument( "item", - help="The string to deidentify. " - "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", ) reid_parser.add_argument( "surrogate_type", @@ -577,8 +558,7 @@ def write_data(data): help="Deidentify dates in a CSV file by pseudorandomly shifting them.", ) date_shift_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) date_shift_parser.add_argument( "input_csv_file", diff --git a/dlp/deid_test.py b/dlp/deid_test.py index db14b5758e9..9a84a09c616 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -78,10 +78,7 @@ def test_deidentify_with_mask_masking_character_specified(capsys): def test_deidentify_with_mask_masking_number_specified(capsys): deid.deidentify_with_mask( - GCLOUD_PROJECT, - HARMFUL_STRING, - ["US_SOCIAL_SECURITY_NUMBER"], - number_to_mask=7, + GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7, ) out, _ = capsys.readouterr() diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index 336f798128b..c2fa2b5af11 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -220,9 +220,7 @@ def inspect_table( headers = [{"name": val} for val in data["header"]] rows = [] for row in data["rows"]: - rows.append( - {"values": [{"string_value": cell_val} for cell_val in row]} - ) + rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) table = {} table["headers"] = headers @@ -980,9 +978,7 @@ def callback(message): ) parser_file = subparsers.add_parser("file", help="Inspect a local file.") - parser_file.add_argument( - "filename", help="The path to the file to inspect." - ) + parser_file.add_argument("filename", help="The path to the file to inspect.") parser_file.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", @@ -1129,8 +1125,7 @@ def callback(message): help="The Google Cloud project id of the target Datastore.", ) parser_datastore.add_argument( - "kind", - help='The kind of the Datastore entity to inspect, e.g. "Person".', + "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".', ) parser_datastore.add_argument( "topic_id", @@ -1206,8 +1201,7 @@ def callback(message): "bigquery", help="Inspect files on Google BigQuery." ) parser_bigquery.add_argument( - "bigquery_project", - help="The Google Cloud project id of the target table.", + "bigquery_project", help="The Google Cloud project id of the target table.", ) parser_bigquery.add_argument( "dataset_id", help="The ID of the target BigQuery dataset." diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index e15d6f55a56..cf58ee83138 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -94,9 +94,7 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path( - GCLOUD_PROJECT, SUBSCRIPTION_ID - ) + subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -331,9 +329,7 @@ def test_inspect_gcs_file_with_custom_info_types( @flaky -def test_inspect_gcs_file_no_results( - bucket, topic_id, subscription_id, capsys -): +def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys): inspect_content.inspect_gcs_file( GCLOUD_PROJECT, bucket.name, @@ -380,9 +376,7 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): @flaky -def test_inspect_datastore( - datastore_project, topic_id, subscription_id, capsys -): +def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys): @eventually_consistent.call def _(): inspect_content.inspect_datastore( diff --git a/dlp/jobs.py b/dlp/jobs.py index 882d4ad71c3..9bc3b916b23 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -122,8 +122,7 @@ def delete_dlp_job(project, job_name): list_parser = subparsers.add_parser( "list", - help="List Data Loss Prevention API jobs corresponding to a given " - "filter.", + help="List Data Loss Prevention API jobs corresponding to a given " "filter.", ) list_parser.add_argument( "project", help="The project id to use as a parent resource." @@ -136,11 +135,7 @@ def delete_dlp_job(project, job_name): list_parser.add_argument( "-t", "--type", - choices=[ - "DLP_JOB_TYPE_UNSPECIFIED", - "INSPECT_JOB", - "RISK_ANALYSIS_JOB", - ], + choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB",], help='The type of job. API defaults to "INSPECT"', ) @@ -152,15 +147,12 @@ def delete_dlp_job(project, job_name): ) delete_parser.add_argument( "job_name", - help="The name of the DlpJob resource to be deleted. " - "Example: X-#####", + help="The name of the DlpJob resource to be deleted. " "Example: X-#####", ) args = parser.parse_args() if args.content == "list": - list_dlp_jobs( - args.project, filter_string=args.filter, job_type=args.type - ) + list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type) elif args.content == "delete": delete_dlp_job(args.project, args.job_name) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index eed887a6264..56556337577 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -30,6 +30,7 @@ @pytest.fixture(scope="module") def test_job_name(): import google.cloud.dlp + dlp = google.cloud.dlp_v2.DlpServiceClient() parent = dlp.project_path(GCLOUD_PROJECT) @@ -61,7 +62,8 @@ def test_job_name(): except google.api_core.exceptions.NotFound: print("Issue during teardown, missing job") -@pytest.mark.skip(reason='investigating possible api bug') + +@pytest.mark.skip(reason="investigating possible api bug") def test_list_dlp_jobs(test_job_name, capsys): print(test_job_name) jobs.list_dlp_jobs(GCLOUD_PROJECT) @@ -70,8 +72,7 @@ def test_list_dlp_jobs(test_job_name, capsys): assert "Job: projects/" in out -@pytest.mark.skip(reason='investigating possible api bug') -def test_list_dlp_jobs(test_job_name, capsys): +@pytest.mark.skip(reason="investigating possible api bug") def test_list_dlp_jobs_with_filter(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=DONE") @@ -79,8 +80,7 @@ def test_list_dlp_jobs_with_filter(capsys): assert "Job: projects/" in out -@pytest.mark.skip(reason='investigating possible api bug') -def test_list_dlp_jobs(test_job_name, capsys): +@pytest.mark.skip(reason="investigating possible api bug") def test_list_dlp_jobs_with_job_type(capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type="INSPECT_JOB") diff --git a/dlp/metadata.py b/dlp/metadata.py index 7a65941d622..f6d46a41246 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -56,8 +56,7 @@ def list_info_types(language_code=None, result_filter=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--language_code", - help="The BCP-47 language code to use, e.g. 'en-US'.", + "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'.", ) parser.add_argument( "--filter", @@ -67,6 +66,4 @@ def list_info_types(language_code=None, result_filter=None): args = parser.parse_args() - list_info_types( - language_code=args.language_code, result_filter=args.filter - ) + list_info_types(language_code=args.language_code, result_filter=args.filter) diff --git a/dlp/quickstart.py b/dlp/quickstart.py index 1b12a83da1d..2cc0f144267 100644 --- a/dlp/quickstart.py +++ b/dlp/quickstart.py @@ -74,9 +74,7 @@ def quickstart(project_id): print("Info type: {}".format(finding.info_type.name)) # Convert likelihood value to string respresentation. likelihood = ( - google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name[ - "likelihood" - ] + google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name["likelihood"] .enum_type.values_by_number[finding.likelihood] .name ) @@ -88,9 +86,7 @@ def quickstart(project_id): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument( - "project_id", help="Enter your GCP project id.", type=str - ) + parser.add_argument("project_id", help="Enter your GCP project id.", type=str) args = parser.parse_args() if len(sys.argv) == 1: parser.print_usage() diff --git a/dlp/redact.py b/dlp/redact.py index ad1d866d6d6..ceaa09012e9 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -30,12 +30,7 @@ def redact_image( - project, - filename, - output_filename, - info_types, - min_likelihood=None, - mime_type=None, + project, filename, output_filename, info_types, min_likelihood=None, mime_type=None, ): """Uses the Data Loss Prevention API to redact protected data in an image. Args: @@ -129,8 +124,7 @@ def redact_image( parser.add_argument("filename", help="The path to the file to inspect.") parser.add_argument( - "output_filename", - help="The path to which the redacted image will be written.", + "output_filename", help="The path to which the redacted image will be written.", ) parser.add_argument( "--project", diff --git a/dlp/redact_test.py b/dlp/redact_test.py index dd9a887d23d..a33b655d688 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -36,10 +36,7 @@ def test_redact_image_file(tempdir, capsys): output_filepath = os.path.join(tempdir, "redacted.png") redact.redact_image( - GCLOUD_PROJECT, - test_filepath, - output_filepath, - ["FIRST_NAME", "EMAIL_ADDRESS"], + GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"], ) out, _ = capsys.readouterr() diff --git a/dlp/risk.py b/dlp/risk.py index 66f17179321..05c33a4d467 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -66,8 +66,7 @@ def callback(message): results = job.risk_details.numerical_stats_result print( "Value Range: [{}, {}]".format( - results.min_value.integer_value, - results.max_value.integer_value, + results.min_value.integer_value, results.max_value.integer_value, ) ) prev_value = None @@ -100,9 +99,7 @@ def callback(message): # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for risk_job = { - "privacy_metric": { - "numerical_stats_config": {"field": {"name": column_name}} - }, + "privacy_metric": {"numerical_stats_config": {"field": {"name": column_name}}}, "source_table": source_table, "actions": actions, } @@ -447,9 +444,7 @@ def callback(message): ) ) print( - " Class size: {}".format( - value_bucket.equivalence_class_size - ) + " Class size: {}".format(value_bucket.equivalence_class_size) ) for value in value_bucket.top_sensitive_values: print( @@ -679,8 +674,7 @@ def map_fields(quasi_id, info_type): numerical_parser = subparsers.add_parser("numerical", help="") numerical_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) numerical_parser.add_argument( "table_project_id", @@ -689,12 +683,9 @@ def map_fields(quasi_id, info_type): numerical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) + numerical_parser.add_argument("table_id", help="The id of the table to inspect.") numerical_parser.add_argument( - "table_id", help="The id of the table to inspect." - ) - numerical_parser.add_argument( - "column_name", - help="The name of the column to compute risk metrics for.", + "column_name", help="The name of the column to compute risk metrics for.", ) numerical_parser.add_argument( "topic_id", @@ -713,8 +704,7 @@ def map_fields(quasi_id, info_type): categorical_parser = subparsers.add_parser("categorical", help="") categorical_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) categorical_parser.add_argument( "table_project_id", @@ -723,12 +713,9 @@ def map_fields(quasi_id, info_type): categorical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) + categorical_parser.add_argument("table_id", help="The id of the table to inspect.") categorical_parser.add_argument( - "table_id", help="The id of the table to inspect." - ) - categorical_parser.add_argument( - "column_name", - help="The name of the column to compute risk metrics for.", + "column_name", help="The name of the column to compute risk metrics for.", ) categorical_parser.add_argument( "topic_id", @@ -747,12 +734,10 @@ def map_fields(quasi_id, info_type): k_anonymity_parser = subparsers.add_parser( "k_anonymity", - help="Computes the k-anonymity of a column set in a Google BigQuery" - "table.", + help="Computes the k-anonymity of a column set in a Google BigQuery" "table.", ) k_anonymity_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) k_anonymity_parser.add_argument( "table_project_id", @@ -761,9 +746,7 @@ def map_fields(quasi_id, info_type): k_anonymity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - k_anonymity_parser.add_argument( - "table_id", help="The id of the table to inspect." - ) + k_anonymity_parser.add_argument("table_id", help="The id of the table to inspect.") k_anonymity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -774,9 +757,7 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_anonymity_parser.add_argument( - "quasi_ids", - nargs="+", - help="A set of columns that form a composite key.", + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) k_anonymity_parser.add_argument( "--timeout", @@ -786,12 +767,10 @@ def map_fields(quasi_id, info_type): l_diversity_parser = subparsers.add_parser( "l_diversity", - help="Computes the l-diversity of a column set in a Google BigQuery" - "table.", + help="Computes the l-diversity of a column set in a Google BigQuery" "table.", ) l_diversity_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) l_diversity_parser.add_argument( "table_project_id", @@ -800,9 +779,7 @@ def map_fields(quasi_id, info_type): l_diversity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - l_diversity_parser.add_argument( - "table_id", help="The id of the table to inspect." - ) + l_diversity_parser.add_argument("table_id", help="The id of the table to inspect.") l_diversity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -813,13 +790,10 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) l_diversity_parser.add_argument( - "sensitive_attribute", - help="The column to measure l-diversity relative to.", + "sensitive_attribute", help="The column to measure l-diversity relative to.", ) l_diversity_parser.add_argument( - "quasi_ids", - nargs="+", - help="A set of columns that form a composite key.", + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) l_diversity_parser.add_argument( "--timeout", @@ -833,19 +807,14 @@ def map_fields(quasi_id, info_type): "BigQuery table.", ) k_map_parser.add_argument( - "project", - help="The Google Cloud project id to use as a parent resource.", + "project", help="The Google Cloud project id to use as a parent resource.", ) k_map_parser.add_argument( "table_project_id", help="The Google Cloud project id where the BigQuery table is stored.", ) - k_map_parser.add_argument( - "dataset_id", help="The id of the dataset to inspect." - ) - k_map_parser.add_argument( - "table_id", help="The id of the table to inspect." - ) + k_map_parser.add_argument("dataset_id", help="The id of the dataset to inspect.") + k_map_parser.add_argument("table_id", help="The id of the table to inspect.") k_map_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -856,9 +825,7 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_map_parser.add_argument( - "quasi_ids", - nargs="+", - help="A set of columns that form a composite key.", + "quasi_ids", nargs="+", help="A set of columns that form a composite key.", ) k_map_parser.add_argument( "-t", diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 064e13b3e29..25a01cb3112 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -58,9 +58,7 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path( - GCLOUD_PROJECT, SUBSCRIPTION_ID - ) + subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -119,9 +117,7 @@ def bigquery_project(): harmful_table.schema = ( google.cloud.bigquery.SchemaField("Name", "STRING", "REQUIRED"), - google.cloud.bigquery.SchemaField( - "TelephoneNumber", "STRING", "REQUIRED" - ), + google.cloud.bigquery.SchemaField("TelephoneNumber", "STRING", "REQUIRED"), google.cloud.bigquery.SchemaField("Mystery", "STRING", "REQUIRED"), google.cloud.bigquery.SchemaField("Age", "INTEGER", "REQUIRED"), google.cloud.bigquery.SchemaField("Gender", "STRING"), @@ -140,40 +136,12 @@ def bigquery_project(): rows_to_insert = [(u"Gary Smith", u"My email is gary@example.com")] harmful_rows_to_insert = [ - ( - u"Gandalf", - u"(123) 456-7890", - "4231 5555 6781 9876", - 27, - "Male", - "US", - ), - ( - u"Dumbledore", - u"(313) 337-1337", - "6291 8765 1095 7629", - 27, - "Male", - "US", - ), + (u"Gandalf", u"(123) 456-7890", "4231 5555 6781 9876", 27, "Male", "US",), + (u"Dumbledore", u"(313) 337-1337", "6291 8765 1095 7629", 27, "Male", "US",), (u"Joe", u"(452) 123-1234", "3782 2288 1166 3030", 35, "Male", "US"), (u"James", u"(567) 890-1234", "8291 3627 8250 1234", 19, "Male", "US"), - ( - u"Marie", - u"(452) 123-1234", - "8291 3627 8250 1234", - 35, - "Female", - "US", - ), - ( - u"Carrie", - u"(567) 890-1234", - "2253 5218 4251 4526", - 35, - "Female", - "US", - ), + (u"Marie", u"(452) 123-1234", "8291 3627 8250 1234", 35, "Female", "US",), + (u"Carrie", u"(567) 890-1234", "2253 5218 4251 4526", 35, "Female", "US",), ] bigquery_client.insert_rows(table, rows_to_insert) @@ -184,9 +152,7 @@ def bigquery_project(): @flaky -def test_numerical_risk_analysis( - topic_id, subscription_id, bigquery_project, capsys -): +def test_numerical_risk_analysis(topic_id, subscription_id, bigquery_project, capsys): risk.numerical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, diff --git a/dlp/templates.py b/dlp/templates.py index 5f03b596fc3..d632958b427 100644 --- a/dlp/templates.py +++ b/dlp/templates.py @@ -117,18 +117,12 @@ def human_readable_time(timestamp): print("Template {}:".format(template.name)) if template.display_name: print(" Display Name: {}".format(template.display_name)) - print( - " Created: {}".format(human_readable_time(template.create_time)) - ) - print( - " Updated: {}".format(human_readable_time(template.update_time)) - ) + print(" Created: {}".format(human_readable_time(template.create_time))) + print(" Updated: {}".format(human_readable_time(template.update_time))) config = template.inspect_config print( - " InfoTypes: {}".format( - ", ".join([it.name for it in config.info_types]) - ) + " InfoTypes: {}".format(", ".join([it.name for it in config.info_types])) ) print(" Minimum likelihood: {}".format(config.min_likelihood)) print(" Include quotes: {}".format(config.include_quote)) @@ -185,8 +179,7 @@ def delete_inspect_template(project, template_id): parser_create = subparsers.add_parser("create", help="Create a template.") parser_create.add_argument( "--template_id", - help="The id of the template. If omitted, an id will be randomly " - "generated", + help="The id of the template. If omitted, an id will be randomly " "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the template." @@ -239,9 +232,7 @@ def delete_inspect_template(project, template_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a template.") - parser_delete.add_argument( - "template_id", help="The id of the template to delete." - ) + parser_delete.add_argument("template_id", help="The id of the template to delete.") parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", diff --git a/dlp/triggers.py b/dlp/triggers.py index 0c2b0bb4e29..f8624f5492f 100644 --- a/dlp/triggers.py +++ b/dlp/triggers.py @@ -92,9 +92,7 @@ def create_trigger( # Construct the schedule definition: schedule = { - "recurrence_period_duration": { - "seconds": scan_period_days * 60 * 60 * 24 - } + "recurrence_period_duration": {"seconds": scan_period_days * 60 * 60 * 24} } # Construct the trigger definition. @@ -212,8 +210,7 @@ def delete_trigger(project, trigger_id): ) parser_create.add_argument( "--trigger_id", - help="The id of the trigger. If omitted, an id will be randomly " - "generated", + help="The id of the trigger. If omitted, an id will be randomly " "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the trigger." @@ -254,9 +251,7 @@ def delete_trigger(project, trigger_id): help="The maximum number of findings to report; 0 = no maximum.", ) parser_create.add_argument( - "--auto_populate_timespan", - type=bool, - help="Limit scan to new content only.", + "--auto_populate_timespan", type=bool, help="Limit scan to new content only.", ) parser_list = subparsers.add_parser("list", help="List all triggers.") @@ -267,9 +262,7 @@ def delete_trigger(project, trigger_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a trigger.") - parser_delete.add_argument( - "trigger_id", help="The id of the trigger to delete." - ) + parser_delete.add_argument("trigger_id", help="The id of the trigger to delete.") parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", From 61b3522e00f5b499d51d119ad787483e776e4222 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Tue, 4 Feb 2020 17:16:02 -0800 Subject: [PATCH 10/12] Remove skips, adjust job tests --- dlp/jobs_test.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index 56556337577..cc613f06841 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -13,6 +13,7 @@ # limitations under the License. import os +from flaky import flaky import google.api_core.exceptions @@ -51,9 +52,6 @@ def test_job_name(): full_path = response.name # API expects only job name, not full project path job_name = full_path[full_path.rfind("/") + 1 :] - print("job") - print(job_name) - print(response.state) yield job_name # clean up job if not deleted @@ -62,30 +60,27 @@ def test_job_name(): except google.api_core.exceptions.NotFound: print("Issue during teardown, missing job") - -@pytest.mark.skip(reason="investigating possible api bug") def test_list_dlp_jobs(test_job_name, capsys): - print(test_job_name) jobs.list_dlp_jobs(GCLOUD_PROJECT) out, _ = capsys.readouterr() - assert "Job: projects/" in out - + assert test_job_name not in out -@pytest.mark.skip(reason="investigating possible api bug") -def test_list_dlp_jobs_with_filter(capsys): - jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=DONE") +@flaky +def test_list_dlp_jobs_with_filter(test_job_name, capsys): + jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=RUNNING", job_type="RISK_ANALYSIS_JOB") out, _ = capsys.readouterr() - assert "Job: projects/" in out + assert test_job_name in out -@pytest.mark.skip(reason="investigating possible api bug") -def test_list_dlp_jobs_with_job_type(capsys): + +def test_list_dlp_jobs_with_job_type(test_job_name, capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type="INSPECT_JOB") out, _ = capsys.readouterr() - assert "Job: projects/" in out + assert test_job_name not in out # job created is a risk analysis job + def test_delete_dlp_job(test_job_name, capsys): From 93cc1a862e2ea573734822746b6d27d6f9bfa0a5 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Tue, 4 Feb 2020 17:48:50 -0800 Subject: [PATCH 11/12] fix lint and skips --- dlp/deid.py | 44 ++++++++++++++------ dlp/deid_test.py | 5 ++- dlp/inspect_content.py | 14 +++++-- dlp/inspect_content_test.py | 12 ++++-- dlp/jobs.py | 19 ++++++--- dlp/jobs_test.py | 16 ++++---- dlp/metadata.py | 7 +++- dlp/quickstart.py | 8 +++- dlp/redact.py | 10 ++++- dlp/redact_test.py | 5 ++- dlp/risk.py | 81 ++++++++++++++++++++++++++----------- dlp/risk_test.py | 48 ++++++++++++++++++---- dlp/templates.py | 19 ++++++--- dlp/triggers.py | 15 +++++-- 14 files changed, 224 insertions(+), 79 deletions(-) diff --git a/dlp/deid.py b/dlp/deid.py index 6ca8f86ec1a..b08a341dd82 100644 --- a/dlp/deid.py +++ b/dlp/deid.py @@ -46,7 +46,9 @@ def deidentify_with_mask( parent = dlp.project_path(project) # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -131,17 +133,24 @@ def deidentify_with_fpe( # Construct FPE configuration dictionary crypto_replace_ffx_fpe_config = { "crypto_key": { - "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name,} + "kms_wrapped": { + "wrapped_key": wrapped_key, + "crypto_key_name": key_name, + } }, "common_alphabet": alphabet, } # Add surrogate type if surrogate_type: - crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type} + crypto_replace_ffx_fpe_config["surrogate_info_type"] = { + "name": surrogate_type + } # Construct inspect configuration dictionary - inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]} + inspect_config = { + "info_types": [{"name": info_type} for info_type in info_types] + } # Construct deidentify configuration dictionary deidentify_config = { @@ -338,7 +347,11 @@ def map_data(value): try: date = datetime.strptime(value, "%m/%d/%Y") return { - "date_value": {"year": date.year, "month": date.month, "day": date.day,} + "date_value": { + "year": date.year, + "month": date.month, + "day": date.day, + } } except ValueError: return {"string_value": value} @@ -431,7 +444,8 @@ def write_data(data): mask_parser = subparsers.add_parser( "deid_mask", - help="Deidentify sensitive data in a string by masking it with a " "character.", + help="Deidentify sensitive data in a string by masking it with a " + "character.", ) mask_parser.add_argument( "--info_types", @@ -443,7 +457,8 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) mask_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) mask_parser.add_argument("item", help="The string to deidentify.") mask_parser.add_argument( @@ -476,11 +491,13 @@ def write_data(data): default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"], ) fpe_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) fpe_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) fpe_parser.add_argument( "key_name", @@ -518,11 +535,13 @@ def write_data(data): "Encryption (FPE).", ) reid_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) reid_parser.add_argument( "item", - help="The string to deidentify. " "Example: string = 'My SSN is 372819127'", + help="The string to deidentify. " + "Example: string = 'My SSN is 372819127'", ) reid_parser.add_argument( "surrogate_type", @@ -558,7 +577,8 @@ def write_data(data): help="Deidentify dates in a CSV file by pseudorandomly shifting them.", ) date_shift_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) date_shift_parser.add_argument( "input_csv_file", diff --git a/dlp/deid_test.py b/dlp/deid_test.py index 9a84a09c616..db14b5758e9 100644 --- a/dlp/deid_test.py +++ b/dlp/deid_test.py @@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys): def test_deidentify_with_mask_masking_number_specified(capsys): deid.deidentify_with_mask( - GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7, + GCLOUD_PROJECT, + HARMFUL_STRING, + ["US_SOCIAL_SECURITY_NUMBER"], + number_to_mask=7, ) out, _ = capsys.readouterr() diff --git a/dlp/inspect_content.py b/dlp/inspect_content.py index c2fa2b5af11..336f798128b 100644 --- a/dlp/inspect_content.py +++ b/dlp/inspect_content.py @@ -220,7 +220,9 @@ def inspect_table( headers = [{"name": val} for val in data["header"]] rows = [] for row in data["rows"]: - rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) + rows.append( + {"values": [{"string_value": cell_val} for cell_val in row]} + ) table = {} table["headers"] = headers @@ -978,7 +980,9 @@ def callback(message): ) parser_file = subparsers.add_parser("file", help="Inspect a local file.") - parser_file.add_argument("filename", help="The path to the file to inspect.") + parser_file.add_argument( + "filename", help="The path to the file to inspect." + ) parser_file.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", @@ -1125,7 +1129,8 @@ def callback(message): help="The Google Cloud project id of the target Datastore.", ) parser_datastore.add_argument( - "kind", help='The kind of the Datastore entity to inspect, e.g. "Person".', + "kind", + help='The kind of the Datastore entity to inspect, e.g. "Person".', ) parser_datastore.add_argument( "topic_id", @@ -1201,7 +1206,8 @@ def callback(message): "bigquery", help="Inspect files on Google BigQuery." ) parser_bigquery.add_argument( - "bigquery_project", help="The Google Cloud project id of the target table.", + "bigquery_project", + help="The Google Cloud project id of the target table.", ) parser_bigquery.add_argument( "dataset_id", help="The ID of the target BigQuery dataset." diff --git a/dlp/inspect_content_test.py b/dlp/inspect_content_test.py index cf58ee83138..e15d6f55a56 100644 --- a/dlp/inspect_content_test.py +++ b/dlp/inspect_content_test.py @@ -94,7 +94,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -329,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types( @flaky -def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys): +def test_inspect_gcs_file_no_results( + bucket, topic_id, subscription_id, capsys +): inspect_content.inspect_gcs_file( GCLOUD_PROJECT, bucket.name, @@ -376,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys): @flaky -def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys): +def test_inspect_datastore( + datastore_project, topic_id, subscription_id, capsys +): @eventually_consistent.call def _(): inspect_content.inspect_datastore( diff --git a/dlp/jobs.py b/dlp/jobs.py index 9bc3b916b23..a8ac0b43c5e 100644 --- a/dlp/jobs.py +++ b/dlp/jobs.py @@ -65,7 +65,8 @@ def list_dlp_jobs(project, filter_string=None, job_type=None): # Job type dictionary job_type_to_int = { - "DLP_JOB_TYPE_UNSPECIFIED": google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, + "DLP_JOB_TYPE_UNSPECIFIED": + google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED, "INSPECT_JOB": google.cloud.dlp.enums.DlpJobType.INSPECT_JOB, "RISK_ANALYSIS_JOB": google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB, } @@ -122,7 +123,8 @@ def delete_dlp_job(project, job_name): list_parser = subparsers.add_parser( "list", - help="List Data Loss Prevention API jobs corresponding to a given " "filter.", + help="List Data Loss Prevention API jobs corresponding to a given " + "filter.", ) list_parser.add_argument( "project", help="The project id to use as a parent resource." @@ -135,7 +137,11 @@ def delete_dlp_job(project, job_name): list_parser.add_argument( "-t", "--type", - choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB",], + choices=[ + "DLP_JOB_TYPE_UNSPECIFIED", + "INSPECT_JOB", + "RISK_ANALYSIS_JOB", + ], help='The type of job. API defaults to "INSPECT"', ) @@ -147,12 +153,15 @@ def delete_dlp_job(project, job_name): ) delete_parser.add_argument( "job_name", - help="The name of the DlpJob resource to be deleted. " "Example: X-#####", + help="The name of the DlpJob resource to be deleted. " + "Example: X-#####", ) args = parser.parse_args() if args.content == "list": - list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type) + list_dlp_jobs( + args.project, filter_string=args.filter, job_type=args.type + ) elif args.content == "delete": delete_dlp_job(args.project, args.job_name) diff --git a/dlp/jobs_test.py b/dlp/jobs_test.py index cc613f06841..98acb7464e3 100644 --- a/dlp/jobs_test.py +++ b/dlp/jobs_test.py @@ -15,8 +15,6 @@ import os from flaky import flaky -import google.api_core.exceptions - import pytest import jobs @@ -51,7 +49,7 @@ def test_job_name(): response = dlp.create_dlp_job(parent, risk_job=risk_job) full_path = response.name # API expects only job name, not full project path - job_name = full_path[full_path.rfind("/") + 1 :] + job_name = full_path[full_path.rfind("/") + 1:] yield job_name # clean up job if not deleted @@ -60,27 +58,31 @@ def test_job_name(): except google.api_core.exceptions.NotFound: print("Issue during teardown, missing job") + def test_list_dlp_jobs(test_job_name, capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT) out, _ = capsys.readouterr() assert test_job_name not in out + @flaky def test_list_dlp_jobs_with_filter(test_job_name, capsys): - jobs.list_dlp_jobs(GCLOUD_PROJECT, filter_string="state=RUNNING", job_type="RISK_ANALYSIS_JOB") + jobs.list_dlp_jobs( + GCLOUD_PROJECT, + filter_string="state=RUNNING", + job_type="RISK_ANALYSIS_JOB", + ) out, _ = capsys.readouterr() assert test_job_name in out - def test_list_dlp_jobs_with_job_type(test_job_name, capsys): jobs.list_dlp_jobs(GCLOUD_PROJECT, job_type="INSPECT_JOB") out, _ = capsys.readouterr() - assert test_job_name not in out # job created is a risk analysis job - + assert test_job_name not in out # job created is a risk analysis job def test_delete_dlp_job(test_job_name, capsys): diff --git a/dlp/metadata.py b/dlp/metadata.py index f6d46a41246..7a65941d622 100644 --- a/dlp/metadata.py +++ b/dlp/metadata.py @@ -56,7 +56,8 @@ def list_info_types(language_code=None, result_filter=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) parser.add_argument( - "--language_code", help="The BCP-47 language code to use, e.g. 'en-US'.", + "--language_code", + help="The BCP-47 language code to use, e.g. 'en-US'.", ) parser.add_argument( "--filter", @@ -66,4 +67,6 @@ def list_info_types(language_code=None, result_filter=None): args = parser.parse_args() - list_info_types(language_code=args.language_code, result_filter=args.filter) + list_info_types( + language_code=args.language_code, result_filter=args.filter + ) diff --git a/dlp/quickstart.py b/dlp/quickstart.py index 2cc0f144267..1b12a83da1d 100644 --- a/dlp/quickstart.py +++ b/dlp/quickstart.py @@ -74,7 +74,9 @@ def quickstart(project_id): print("Info type: {}".format(finding.info_type.name)) # Convert likelihood value to string respresentation. likelihood = ( - google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name["likelihood"] + google.cloud.dlp.types.Finding.DESCRIPTOR.fields_by_name[ + "likelihood" + ] .enum_type.values_by_number[finding.likelihood] .name ) @@ -86,7 +88,9 @@ def quickstart(project_id): if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("project_id", help="Enter your GCP project id.", type=str) + parser.add_argument( + "project_id", help="Enter your GCP project id.", type=str + ) args = parser.parse_args() if len(sys.argv) == 1: parser.print_usage() diff --git a/dlp/redact.py b/dlp/redact.py index ceaa09012e9..ad1d866d6d6 100644 --- a/dlp/redact.py +++ b/dlp/redact.py @@ -30,7 +30,12 @@ def redact_image( - project, filename, output_filename, info_types, min_likelihood=None, mime_type=None, + project, + filename, + output_filename, + info_types, + min_likelihood=None, + mime_type=None, ): """Uses the Data Loss Prevention API to redact protected data in an image. Args: @@ -124,7 +129,8 @@ def redact_image( parser.add_argument("filename", help="The path to the file to inspect.") parser.add_argument( - "output_filename", help="The path to which the redacted image will be written.", + "output_filename", + help="The path to which the redacted image will be written.", ) parser.add_argument( "--project", diff --git a/dlp/redact_test.py b/dlp/redact_test.py index a33b655d688..dd9a887d23d 100644 --- a/dlp/redact_test.py +++ b/dlp/redact_test.py @@ -36,7 +36,10 @@ def test_redact_image_file(tempdir, capsys): output_filepath = os.path.join(tempdir, "redacted.png") redact.redact_image( - GCLOUD_PROJECT, test_filepath, output_filepath, ["FIRST_NAME", "EMAIL_ADDRESS"], + GCLOUD_PROJECT, + test_filepath, + output_filepath, + ["FIRST_NAME", "EMAIL_ADDRESS"], ) out, _ = capsys.readouterr() diff --git a/dlp/risk.py b/dlp/risk.py index 05c33a4d467..386f05c0d73 100644 --- a/dlp/risk.py +++ b/dlp/risk.py @@ -66,7 +66,8 @@ def callback(message): results = job.risk_details.numerical_stats_result print( "Value Range: [{}, {}]".format( - results.min_value.integer_value, results.max_value.integer_value, + results.min_value.integer_value, + results.max_value.integer_value, ) ) prev_value = None @@ -99,7 +100,9 @@ def callback(message): # Configure risk analysis job # Give the name of the numeric column to compute risk metrics for risk_job = { - "privacy_metric": {"numerical_stats_config": {"field": {"name": column_name}}}, + "privacy_metric": { + "numerical_stats_config": {"field": {"name": column_name}} + }, "source_table": source_table, "actions": actions, } @@ -171,7 +174,7 @@ def callback(message): # Now that the job is done, fetch the results and print them. job = dlp.get_dlp_job(operation.name) histogram_buckets = ( - job.risk_details.categorical_stats_result.value_frequency_histogram_buckets + job.risk_details.categorical_stats_result.value_frequency_histogram_buckets # noqa: E501 ) # Print bucket stats for i, bucket in enumerate(histogram_buckets): @@ -426,7 +429,7 @@ def callback(message): # Now that the job is done, fetch the results and print them. job = dlp.get_dlp_job(operation.name) histogram_buckets = ( - job.risk_details.l_diversity_result.sensitive_value_frequency_histogram_buckets + job.risk_details.l_diversity_result.sensitive_value_frequency_histogram_buckets # noqa: E501 ) # Print bucket stats for i, bucket in enumerate(histogram_buckets): @@ -444,7 +447,9 @@ def callback(message): ) ) print( - " Class size: {}".format(value_bucket.equivalence_class_size) + " Class size: {}".format( + value_bucket.equivalence_class_size + ) ) for value in value_bucket.top_sensitive_values: print( @@ -674,7 +679,8 @@ def map_fields(quasi_id, info_type): numerical_parser = subparsers.add_parser("numerical", help="") numerical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) numerical_parser.add_argument( "table_project_id", @@ -683,9 +689,12 @@ def map_fields(quasi_id, info_type): numerical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - numerical_parser.add_argument("table_id", help="The id of the table to inspect.") numerical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for.", + "table_id", help="The id of the table to inspect." + ) + numerical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) numerical_parser.add_argument( "topic_id", @@ -704,7 +713,8 @@ def map_fields(quasi_id, info_type): categorical_parser = subparsers.add_parser("categorical", help="") categorical_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) categorical_parser.add_argument( "table_project_id", @@ -713,9 +723,12 @@ def map_fields(quasi_id, info_type): categorical_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - categorical_parser.add_argument("table_id", help="The id of the table to inspect.") categorical_parser.add_argument( - "column_name", help="The name of the column to compute risk metrics for.", + "table_id", help="The id of the table to inspect." + ) + categorical_parser.add_argument( + "column_name", + help="The name of the column to compute risk metrics for.", ) categorical_parser.add_argument( "topic_id", @@ -734,10 +747,12 @@ def map_fields(quasi_id, info_type): k_anonymity_parser = subparsers.add_parser( "k_anonymity", - help="Computes the k-anonymity of a column set in a Google BigQuery" "table.", + help="Computes the k-anonymity of a column set in a Google BigQuery" + "table.", ) k_anonymity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_anonymity_parser.add_argument( "table_project_id", @@ -746,7 +761,9 @@ def map_fields(quasi_id, info_type): k_anonymity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - k_anonymity_parser.add_argument("table_id", help="The id of the table to inspect.") + k_anonymity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_anonymity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -757,7 +774,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_anonymity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_anonymity_parser.add_argument( "--timeout", @@ -767,10 +786,12 @@ def map_fields(quasi_id, info_type): l_diversity_parser = subparsers.add_parser( "l_diversity", - help="Computes the l-diversity of a column set in a Google BigQuery" "table.", + help="Computes the l-diversity of a column set in a Google BigQuery" + "table.", ) l_diversity_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) l_diversity_parser.add_argument( "table_project_id", @@ -779,7 +800,9 @@ def map_fields(quasi_id, info_type): l_diversity_parser.add_argument( "dataset_id", help="The id of the dataset to inspect." ) - l_diversity_parser.add_argument("table_id", help="The id of the table to inspect.") + l_diversity_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) l_diversity_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -790,10 +813,13 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) l_diversity_parser.add_argument( - "sensitive_attribute", help="The column to measure l-diversity relative to.", + "sensitive_attribute", + help="The column to measure l-diversity relative to.", ) l_diversity_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) l_diversity_parser.add_argument( "--timeout", @@ -807,14 +833,19 @@ def map_fields(quasi_id, info_type): "BigQuery table.", ) k_map_parser.add_argument( - "project", help="The Google Cloud project id to use as a parent resource.", + "project", + help="The Google Cloud project id to use as a parent resource.", ) k_map_parser.add_argument( "table_project_id", help="The Google Cloud project id where the BigQuery table is stored.", ) - k_map_parser.add_argument("dataset_id", help="The id of the dataset to inspect.") - k_map_parser.add_argument("table_id", help="The id of the table to inspect.") + k_map_parser.add_argument( + "dataset_id", help="The id of the dataset to inspect." + ) + k_map_parser.add_argument( + "table_id", help="The id of the table to inspect." + ) k_map_parser.add_argument( "topic_id", help="The name of the Pub/Sub topic to notify once the job completes.", @@ -825,7 +856,9 @@ def map_fields(quasi_id, info_type): "job completion notifications.", ) k_map_parser.add_argument( - "quasi_ids", nargs="+", help="A set of columns that form a composite key.", + "quasi_ids", + nargs="+", + help="A set of columns that form a composite key.", ) k_map_parser.add_argument( "-t", diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 25a01cb3112..064e13b3e29 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -58,7 +58,9 @@ def subscription_id(topic_id): # Subscribes to a topic. subscriber = google.cloud.pubsub.SubscriberClient() topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id) - subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID) + subscription_path = subscriber.subscription_path( + GCLOUD_PROJECT, SUBSCRIPTION_ID + ) try: subscriber.create_subscription(subscription_path, topic_path) except google.api_core.exceptions.AlreadyExists: @@ -117,7 +119,9 @@ def bigquery_project(): harmful_table.schema = ( google.cloud.bigquery.SchemaField("Name", "STRING", "REQUIRED"), - google.cloud.bigquery.SchemaField("TelephoneNumber", "STRING", "REQUIRED"), + google.cloud.bigquery.SchemaField( + "TelephoneNumber", "STRING", "REQUIRED" + ), google.cloud.bigquery.SchemaField("Mystery", "STRING", "REQUIRED"), google.cloud.bigquery.SchemaField("Age", "INTEGER", "REQUIRED"), google.cloud.bigquery.SchemaField("Gender", "STRING"), @@ -136,12 +140,40 @@ def bigquery_project(): rows_to_insert = [(u"Gary Smith", u"My email is gary@example.com")] harmful_rows_to_insert = [ - (u"Gandalf", u"(123) 456-7890", "4231 5555 6781 9876", 27, "Male", "US",), - (u"Dumbledore", u"(313) 337-1337", "6291 8765 1095 7629", 27, "Male", "US",), + ( + u"Gandalf", + u"(123) 456-7890", + "4231 5555 6781 9876", + 27, + "Male", + "US", + ), + ( + u"Dumbledore", + u"(313) 337-1337", + "6291 8765 1095 7629", + 27, + "Male", + "US", + ), (u"Joe", u"(452) 123-1234", "3782 2288 1166 3030", 35, "Male", "US"), (u"James", u"(567) 890-1234", "8291 3627 8250 1234", 19, "Male", "US"), - (u"Marie", u"(452) 123-1234", "8291 3627 8250 1234", 35, "Female", "US",), - (u"Carrie", u"(567) 890-1234", "2253 5218 4251 4526", 35, "Female", "US",), + ( + u"Marie", + u"(452) 123-1234", + "8291 3627 8250 1234", + 35, + "Female", + "US", + ), + ( + u"Carrie", + u"(567) 890-1234", + "2253 5218 4251 4526", + 35, + "Female", + "US", + ), ] bigquery_client.insert_rows(table, rows_to_insert) @@ -152,7 +184,9 @@ def bigquery_project(): @flaky -def test_numerical_risk_analysis(topic_id, subscription_id, bigquery_project, capsys): +def test_numerical_risk_analysis( + topic_id, subscription_id, bigquery_project, capsys +): risk.numerical_risk_analysis( GCLOUD_PROJECT, TABLE_PROJECT, diff --git a/dlp/templates.py b/dlp/templates.py index d632958b427..5f03b596fc3 100644 --- a/dlp/templates.py +++ b/dlp/templates.py @@ -117,12 +117,18 @@ def human_readable_time(timestamp): print("Template {}:".format(template.name)) if template.display_name: print(" Display Name: {}".format(template.display_name)) - print(" Created: {}".format(human_readable_time(template.create_time))) - print(" Updated: {}".format(human_readable_time(template.update_time))) + print( + " Created: {}".format(human_readable_time(template.create_time)) + ) + print( + " Updated: {}".format(human_readable_time(template.update_time)) + ) config = template.inspect_config print( - " InfoTypes: {}".format(", ".join([it.name for it in config.info_types])) + " InfoTypes: {}".format( + ", ".join([it.name for it in config.info_types]) + ) ) print(" Minimum likelihood: {}".format(config.min_likelihood)) print(" Include quotes: {}".format(config.include_quote)) @@ -179,7 +185,8 @@ def delete_inspect_template(project, template_id): parser_create = subparsers.add_parser("create", help="Create a template.") parser_create.add_argument( "--template_id", - help="The id of the template. If omitted, an id will be randomly " "generated", + help="The id of the template. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the template." @@ -232,7 +239,9 @@ def delete_inspect_template(project, template_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a template.") - parser_delete.add_argument("template_id", help="The id of the template to delete.") + parser_delete.add_argument( + "template_id", help="The id of the template to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", diff --git a/dlp/triggers.py b/dlp/triggers.py index f8624f5492f..0c2b0bb4e29 100644 --- a/dlp/triggers.py +++ b/dlp/triggers.py @@ -92,7 +92,9 @@ def create_trigger( # Construct the schedule definition: schedule = { - "recurrence_period_duration": {"seconds": scan_period_days * 60 * 60 * 24} + "recurrence_period_duration": { + "seconds": scan_period_days * 60 * 60 * 24 + } } # Construct the trigger definition. @@ -210,7 +212,8 @@ def delete_trigger(project, trigger_id): ) parser_create.add_argument( "--trigger_id", - help="The id of the trigger. If omitted, an id will be randomly " "generated", + help="The id of the trigger. If omitted, an id will be randomly " + "generated", ) parser_create.add_argument( "--display_name", help="The optional display name of the trigger." @@ -251,7 +254,9 @@ def delete_trigger(project, trigger_id): help="The maximum number of findings to report; 0 = no maximum.", ) parser_create.add_argument( - "--auto_populate_timespan", type=bool, help="Limit scan to new content only.", + "--auto_populate_timespan", + type=bool, + help="Limit scan to new content only.", ) parser_list = subparsers.add_parser("list", help="List all triggers.") @@ -262,7 +267,9 @@ def delete_trigger(project, trigger_id): ) parser_delete = subparsers.add_parser("delete", help="Delete a trigger.") - parser_delete.add_argument("trigger_id", help="The id of the trigger to delete.") + parser_delete.add_argument( + "trigger_id", help="The id of the trigger to delete." + ) parser_delete.add_argument( "--project", help="The Google Cloud project id to use as a parent resource.", From 7aa86077d139d19363e97c79a074e03e911fc6d2 Mon Sep 17 00:00:00 2001 From: Leah Cole Date: Thu, 6 Feb 2020 14:19:29 -0800 Subject: [PATCH 12/12] Cleanup commented things --- dlp/risk_test.py | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/dlp/risk_test.py b/dlp/risk_test.py index 064e13b3e29..41b514f4da7 100644 --- a/dlp/risk_test.py +++ b/dlp/risk_test.py @@ -71,28 +71,6 @@ def subscription_id(topic_id): subscriber.delete_subscription(subscription_path) -""" -@pytest.fixture(scope="module") -def bigquery_dataset_id(): - # adds bq dataset, yields the id, tears down - bigquery_client = google.cloud.bigquery.Client() - - dataset_ref = bigquery_client.dataset(BIGQUERY_DATASET_ID) - dataset = google.cloud.bigquery.Dataset(dataset_ref) - try: - dataset = bigquery_client.create_dataset(dataset) - except google.api_core.exceptions.Conflict: - dataset = bigquery_client.get_dataset(dataset) - yield BIGQUERY_DATASET_ID - bigquery_client.delete_dataset(dataset_ref, delete_contents=True) - -@pytest.fixture(scope="module") -def bigquery_table_id(bigquery_dataset_id): - bigquery_client = google.cloud.bigquery.Client() - dataset_ref = bigquery_client.dataset(bigquery -""" - - @pytest.fixture(scope="module") def bigquery_project(): # Adds test Bigquery data, yields the project ID and then tears down. @@ -111,7 +89,6 @@ def bigquery_project(): harmful_table_ref = dataset_ref.table(BIGQUERY_HARMFUL_TABLE_ID) harmful_table = google.cloud.bigquery.Table(harmful_table_ref) - # DO NOT SUBMIT: trim this down once we find out what works table.schema = ( google.cloud.bigquery.SchemaField("Name", "STRING"), google.cloud.bigquery.SchemaField("Comment", "STRING"),