Skip to content

Commit d87e01d

Browse files
leahecolekurtisvg
andauthored
fix: correct dataset name, use env var for project (#2621)
* fix: correct dataset name, use env var for project * Add uuids to tests * add uuids and fixtures for bq * Add logic to delete job * ran black * Run black with line length * Add utf encoding for python 2 tests * Add skips for now * Ran black * Remove skips, adjust job tests * fix lint and skips * Cleanup commented things Co-authored-by: Kurtis Van Gent <[email protected]>
1 parent d26b380 commit d87e01d

16 files changed

+393
-130
lines changed

dlp/deid.py

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ def deidentify_with_mask(
4646
parent = dlp.project_path(project)
4747

4848
# Construct inspect configuration dictionary
49-
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
49+
inspect_config = {
50+
"info_types": [{"name": info_type} for info_type in info_types]
51+
}
5052

5153
# Construct deidentify configuration dictionary
5254
deidentify_config = {
@@ -131,17 +133,24 @@ def deidentify_with_fpe(
131133
# Construct FPE configuration dictionary
132134
crypto_replace_ffx_fpe_config = {
133135
"crypto_key": {
134-
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
136+
"kms_wrapped": {
137+
"wrapped_key": wrapped_key,
138+
"crypto_key_name": key_name,
139+
}
135140
},
136141
"common_alphabet": alphabet,
137142
}
138143

139144
# Add surrogate type
140145
if surrogate_type:
141-
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {"name": surrogate_type}
146+
crypto_replace_ffx_fpe_config["surrogate_info_type"] = {
147+
"name": surrogate_type
148+
}
142149

143150
# Construct inspect configuration dictionary
144-
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
151+
inspect_config = {
152+
"info_types": [{"name": info_type} for info_type in info_types]
153+
}
145154

146155
# Construct deidentify configuration dictionary
147156
deidentify_config = {
@@ -176,7 +185,12 @@ def deidentify_with_fpe(
176185

177186
# [START dlp_reidentify_fpe]
178187
def reidentify_with_fpe(
179-
project, string, alphabet=None, surrogate_type=None, key_name=None, wrapped_key=None
188+
project,
189+
string,
190+
alphabet=None,
191+
surrogate_type=None,
192+
key_name=None,
193+
wrapped_key=None,
180194
):
181195
"""Uses the Data Loss Prevention API to reidentify sensitive data in a
182196
string that was encrypted by Format Preserving Encryption (FPE).
@@ -333,7 +347,11 @@ def map_data(value):
333347
try:
334348
date = datetime.strptime(value, "%m/%d/%Y")
335349
return {
336-
"date_value": {"year": date.year, "month": date.month, "day": date.day}
350+
"date_value": {
351+
"year": date.year,
352+
"month": date.month,
353+
"day": date.day,
354+
}
337355
}
338356
except ValueError:
339357
return {"string_value": value}
@@ -426,7 +444,8 @@ def write_data(data):
426444

427445
mask_parser = subparsers.add_parser(
428446
"deid_mask",
429-
help="Deidentify sensitive data in a string by masking it with a " "character.",
447+
help="Deidentify sensitive data in a string by masking it with a "
448+
"character.",
430449
)
431450
mask_parser.add_argument(
432451
"--info_types",
@@ -438,7 +457,8 @@ def write_data(data):
438457
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
439458
)
440459
mask_parser.add_argument(
441-
"project", help="The Google Cloud project id to use as a parent resource."
460+
"project",
461+
help="The Google Cloud project id to use as a parent resource.",
442462
)
443463
mask_parser.add_argument("item", help="The string to deidentify.")
444464
mask_parser.add_argument(
@@ -471,11 +491,13 @@ def write_data(data):
471491
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
472492
)
473493
fpe_parser.add_argument(
474-
"project", help="The Google Cloud project id to use as a parent resource."
494+
"project",
495+
help="The Google Cloud project id to use as a parent resource.",
475496
)
476497
fpe_parser.add_argument(
477498
"item",
478-
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
499+
help="The string to deidentify. "
500+
"Example: string = 'My SSN is 372819127'",
479501
)
480502
fpe_parser.add_argument(
481503
"key_name",
@@ -513,11 +535,13 @@ def write_data(data):
513535
"Encryption (FPE).",
514536
)
515537
reid_parser.add_argument(
516-
"project", help="The Google Cloud project id to use as a parent resource."
538+
"project",
539+
help="The Google Cloud project id to use as a parent resource.",
517540
)
518541
reid_parser.add_argument(
519542
"item",
520-
help="The string to deidentify. " "Example: string = 'My SSN is 372819127'",
543+
help="The string to deidentify. "
544+
"Example: string = 'My SSN is 372819127'",
521545
)
522546
reid_parser.add_argument(
523547
"surrogate_type",
@@ -553,7 +577,8 @@ def write_data(data):
553577
help="Deidentify dates in a CSV file by pseudorandomly shifting them.",
554578
)
555579
date_shift_parser.add_argument(
556-
"project", help="The Google Cloud project id to use as a parent resource."
580+
"project",
581+
help="The Google Cloud project id to use as a parent resource.",
557582
)
558583
date_shift_parser.add_argument(
559584
"input_csv_file",

dlp/deid_test.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,10 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
7878

7979
def test_deidentify_with_mask_masking_number_specified(capsys):
8080
deid.deidentify_with_mask(
81-
GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7
81+
GCLOUD_PROJECT,
82+
HARMFUL_STRING,
83+
["US_SOCIAL_SECURITY_NUMBER"],
84+
number_to_mask=7,
8285
)
8386

8487
out, _ = capsys.readouterr()

dlp/inspect_content.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -220,7 +220,9 @@ def inspect_table(
220220
headers = [{"name": val} for val in data["header"]]
221221
rows = []
222222
for row in data["rows"]:
223-
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
223+
rows.append(
224+
{"values": [{"string_value": cell_val} for cell_val in row]}
225+
)
224226

225227
table = {}
226228
table["headers"] = headers
@@ -978,7 +980,9 @@ def callback(message):
978980
)
979981

980982
parser_file = subparsers.add_parser("file", help="Inspect a local file.")
981-
parser_file.add_argument("filename", help="The path to the file to inspect.")
983+
parser_file.add_argument(
984+
"filename", help="The path to the file to inspect."
985+
)
982986
parser_file.add_argument(
983987
"--project",
984988
help="The Google Cloud project id to use as a parent resource.",
@@ -1121,10 +1125,12 @@ def callback(message):
11211125
"datastore", help="Inspect files on Google Datastore."
11221126
)
11231127
parser_datastore.add_argument(
1124-
"datastore_project", help="The Google Cloud project id of the target Datastore."
1128+
"datastore_project",
1129+
help="The Google Cloud project id of the target Datastore.",
11251130
)
11261131
parser_datastore.add_argument(
1127-
"kind", help='The kind of the Datastore entity to inspect, e.g. "Person".'
1132+
"kind",
1133+
help='The kind of the Datastore entity to inspect, e.g. "Person".',
11281134
)
11291135
parser_datastore.add_argument(
11301136
"topic_id",
@@ -1200,7 +1206,8 @@ def callback(message):
12001206
"bigquery", help="Inspect files on Google BigQuery."
12011207
)
12021208
parser_bigquery.add_argument(
1203-
"bigquery_project", help="The Google Cloud project id of the target table."
1209+
"bigquery_project",
1210+
help="The Google Cloud project id of the target table.",
12041211
)
12051212
parser_bigquery.add_argument(
12061213
"dataset_id", help="The ID of the target BigQuery dataset."

dlp/inspect_content_test.py

Lines changed: 26 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
# limitations under the License.
1414

1515
import os
16+
import uuid
1617

1718
from gcp_devrel.testing import eventually_consistent
1819
from gcp_devrel.testing.flaky import flaky
@@ -26,16 +27,18 @@
2627
import pytest
2728
import inspect_content
2829

30+
UNIQUE_STRING = str(uuid.uuid4()).split("-")[0]
2931

3032
GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
31-
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test"
33+
TEST_BUCKET_NAME = GCLOUD_PROJECT + "-dlp-python-client-test" + UNIQUE_STRING
3234
RESOURCE_DIRECTORY = os.path.join(os.path.dirname(__file__), "resources")
3335
RESOURCE_FILE_NAMES = ["test.txt", "test.png", "harmless.txt", "accounts.txt"]
34-
TOPIC_ID = "dlp-test"
35-
SUBSCRIPTION_ID = "dlp-test-subscription"
36+
TOPIC_ID = "dlp-test" + UNIQUE_STRING
37+
SUBSCRIPTION_ID = "dlp-test-subscription" + UNIQUE_STRING
3638
DATASTORE_KIND = "DLP test kind"
37-
BIGQUERY_DATASET_ID = "dlp_test_dataset"
38-
BIGQUERY_TABLE_ID = "dlp_test_table"
39+
DATASTORE_NAME = "DLP test object" + UNIQUE_STRING
40+
BIGQUERY_DATASET_ID = "dlp_test_dataset" + UNIQUE_STRING
41+
BIGQUERY_TABLE_ID = "dlp_test_table" + UNIQUE_STRING
3942

4043

4144
@pytest.fixture(scope="module")
@@ -91,7 +94,9 @@ def subscription_id(topic_id):
9194
# Subscribes to a topic.
9295
subscriber = google.cloud.pubsub.SubscriberClient()
9396
topic_path = subscriber.topic_path(GCLOUD_PROJECT, topic_id)
94-
subscription_path = subscriber.subscription_path(GCLOUD_PROJECT, SUBSCRIPTION_ID)
97+
subscription_path = subscriber.subscription_path(
98+
GCLOUD_PROJECT, SUBSCRIPTION_ID
99+
)
95100
try:
96101
subscriber.create_subscription(subscription_path, topic_path)
97102
except google.api_core.exceptions.AlreadyExists:
@@ -108,7 +113,7 @@ def datastore_project():
108113
datastore_client = google.cloud.datastore.Client()
109114

110115
kind = DATASTORE_KIND
111-
name = "DLP test object"
116+
name = DATASTORE_NAME
112117
key = datastore_client.key(kind, name)
113118
item = google.cloud.datastore.Entity(key=key)
114119
item["payload"] = "My name is Gary Smith and my email is [email protected]"
@@ -159,7 +164,10 @@ def test_inspect_string(capsys):
159164
test_string = "My name is Gary Smith and my email is [email protected]"
160165

161166
inspect_content.inspect_string(
162-
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
167+
GCLOUD_PROJECT,
168+
test_string,
169+
["FIRST_NAME", "EMAIL_ADDRESS"],
170+
include_quote=True,
163171
)
164172

165173
out, _ = capsys.readouterr()
@@ -211,7 +219,10 @@ def test_inspect_string_no_results(capsys):
211219
test_string = "Nothing to see here"
212220

213221
inspect_content.inspect_string(
214-
GCLOUD_PROJECT, test_string, ["FIRST_NAME", "EMAIL_ADDRESS"], include_quote=True
222+
GCLOUD_PROJECT,
223+
test_string,
224+
["FIRST_NAME", "EMAIL_ADDRESS"],
225+
include_quote=True,
215226
)
216227

217228
out, _ = capsys.readouterr()
@@ -320,7 +331,9 @@ def test_inspect_gcs_file_with_custom_info_types(
320331

321332

322333
@flaky
323-
def test_inspect_gcs_file_no_results(bucket, topic_id, subscription_id, capsys):
334+
def test_inspect_gcs_file_no_results(
335+
bucket, topic_id, subscription_id, capsys
336+
):
324337
inspect_content.inspect_gcs_file(
325338
GCLOUD_PROJECT,
326339
bucket.name,
@@ -367,7 +380,9 @@ def test_inspect_gcs_multiple_files(bucket, topic_id, subscription_id, capsys):
367380

368381

369382
@flaky
370-
def test_inspect_datastore(datastore_project, topic_id, subscription_id, capsys):
383+
def test_inspect_datastore(
384+
datastore_project, topic_id, subscription_id, capsys
385+
):
371386
@eventually_consistent.call
372387
def _():
373388
inspect_content.inspect_datastore(

dlp/jobs.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ def list_dlp_jobs(project, filter_string=None, job_type=None):
6565

6666
# Job type dictionary
6767
job_type_to_int = {
68-
"DLP_JOB_TYPE_UNSPECIFIED": google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
68+
"DLP_JOB_TYPE_UNSPECIFIED":
69+
google.cloud.dlp.enums.DlpJobType.DLP_JOB_TYPE_UNSPECIFIED,
6970
"INSPECT_JOB": google.cloud.dlp.enums.DlpJobType.INSPECT_JOB,
7071
"RISK_ANALYSIS_JOB": google.cloud.dlp.enums.DlpJobType.RISK_ANALYSIS_JOB,
7172
}
@@ -122,7 +123,8 @@ def delete_dlp_job(project, job_name):
122123

123124
list_parser = subparsers.add_parser(
124125
"list",
125-
help="List Data Loss Prevention API jobs corresponding to a given " "filter.",
126+
help="List Data Loss Prevention API jobs corresponding to a given "
127+
"filter.",
126128
)
127129
list_parser.add_argument(
128130
"project", help="The project id to use as a parent resource."
@@ -135,7 +137,11 @@ def delete_dlp_job(project, job_name):
135137
list_parser.add_argument(
136138
"-t",
137139
"--type",
138-
choices=["DLP_JOB_TYPE_UNSPECIFIED", "INSPECT_JOB", "RISK_ANALYSIS_JOB"],
140+
choices=[
141+
"DLP_JOB_TYPE_UNSPECIFIED",
142+
"INSPECT_JOB",
143+
"RISK_ANALYSIS_JOB",
144+
],
139145
help='The type of job. API defaults to "INSPECT"',
140146
)
141147

@@ -147,12 +153,15 @@ def delete_dlp_job(project, job_name):
147153
)
148154
delete_parser.add_argument(
149155
"job_name",
150-
help="The name of the DlpJob resource to be deleted. " "Example: X-#####",
156+
help="The name of the DlpJob resource to be deleted. "
157+
"Example: X-#####",
151158
)
152159

153160
args = parser.parse_args()
154161

155162
if args.content == "list":
156-
list_dlp_jobs(args.project, filter_string=args.filter, job_type=args.type)
163+
list_dlp_jobs(
164+
args.project, filter_string=args.filter, job_type=args.type
165+
)
157166
elif args.content == "delete":
158167
delete_dlp_job(args.project, args.job_name)

0 commit comments

Comments
 (0)