Skip to content

Commit 88bb66c

Browse files
feat!: migrate to use microgen (#34)
* feat!: migrate to use microgen * Update UPGRADING.md Co-authored-by: Bu Sun Kim <[email protected]> Co-authored-by: Bu Sun Kim <[email protected]>
1 parent 500d18e commit 88bb66c

18 files changed

+377
-521
lines changed

dlp/snippets/custom_infotype.py

Lines changed: 49 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,7 @@
2020

2121
# [START dlp_omit_name_if_also_email]
2222
def omit_name_if_also_email(
23-
project,
24-
content_string,
23+
project, content_string,
2524
):
2625
"""Marches PERSON_NAME and EMAIL_ADDRESS, but not both.
2726
@@ -51,33 +50,34 @@ def omit_name_if_also_email(
5150
# the total number of findings when there is a large overlap between different
5251
# infoTypes.
5352
inspect_config = {
54-
"info_types":
55-
info_types_to_locate,
56-
"rule_set": [{
57-
"info_types": [{
58-
"name": "PERSON_NAME"
59-
}],
60-
"rules": [{
61-
"exclusion_rule": {
62-
"exclude_info_types": {
63-
"info_types": [{
64-
"name": "EMAIL_ADDRESS"
65-
}]
66-
},
67-
"matching_type": "MATCHING_TYPE_PARTIAL_MATCH"
68-
}
69-
}]
70-
}]
53+
"info_types": info_types_to_locate,
54+
"rule_set": [
55+
{
56+
"info_types": [{"name": "PERSON_NAME"}],
57+
"rules": [
58+
{
59+
"exclusion_rule": {
60+
"exclude_info_types": {
61+
"info_types": [{"name": "EMAIL_ADDRESS"}]
62+
},
63+
"matching_type": google.cloud.dlp_v2.MatchingType.MATCHING_TYPE_PARTIAL_MATCH,
64+
}
65+
}
66+
],
67+
}
68+
],
7169
}
7270

7371
# Construct the `item`.
7472
item = {"value": content_string}
7573

7674
# Convert the project id into a full resource id.
77-
parent = dlp.project_path(project)
75+
parent = f"projects/{project}"
7876

7977
# Call the API.
80-
response = dlp.inspect_content(parent, inspect_config, item)
78+
response = dlp.inspect_content(
79+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
80+
)
8181

8282
return [f.info_type.name for f in response.result.findings]
8383

@@ -87,9 +87,7 @@ def omit_name_if_also_email(
8787

8888
# [START inspect_with_person_name_w_custom_hotword]
8989
def inspect_with_person_name_w_custom_hotword(
90-
project,
91-
content_string,
92-
custom_hotword="patient"
90+
project, content_string, custom_hotword="patient"
9391
):
9492
"""Uses the Data Loss Prevention API increase likelihood for matches on
9593
PERSON_NAME if the user specified custom hotword is present. Only
@@ -114,7 +112,9 @@ def inspect_with_person_name_w_custom_hotword(
114112
# window preceding the PII finding.
115113
hotword_rule = {
116114
"hotword_regex": {"pattern": custom_hotword},
117-
"likelihood_adjustment": {"fixed_likelihood": "VERY_LIKELY"},
115+
"likelihood_adjustment": {
116+
"fixed_likelihood": google.cloud.dlp_v2.Likelihood.VERY_LIKELY
117+
},
118118
"proximity": {"window_before": 50},
119119
}
120120

@@ -128,17 +128,19 @@ def inspect_with_person_name_w_custom_hotword(
128128
# Construct the configuration dictionary with the custom regex info type.
129129
inspect_config = {
130130
"rule_set": rule_set,
131-
"min_likelihood": "VERY_LIKELY",
131+
"min_likelihood": google.cloud.dlp_v2.Likelihood.VERY_LIKELY,
132132
}
133133

134134
# Construct the `item`.
135135
item = {"value": content_string}
136136

137137
# Convert the project id into a full resource id.
138-
parent = dlp.project_path(project)
138+
parent = f"projects/{project}"
139139

140140
# Call the API.
141-
response = dlp.inspect_content(parent, inspect_config, item)
141+
response = dlp.inspect_content(
142+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
143+
)
142144

143145
# Print out the results.
144146
if response.result.findings:
@@ -153,13 +155,13 @@ def inspect_with_person_name_w_custom_hotword(
153155
else:
154156
print("No findings.")
155157

158+
156159
# [END inspect_with_person_name_w_custom_hotword]
157160

158161

159162
# [START dlp_inspect_with_medical_record_number_custom_regex_detector]
160163
def inspect_with_medical_record_number_custom_regex_detector(
161-
project,
162-
content_string,
164+
project, content_string,
163165
):
164166
"""Uses the Data Loss Prevention API to analyze string with medical record
165167
number custom regex detector
@@ -183,7 +185,7 @@ def inspect_with_medical_record_number_custom_regex_detector(
183185
{
184186
"info_type": {"name": "C_MRN"},
185187
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
186-
"likelihood": "POSSIBLE",
188+
"likelihood": google.cloud.dlp_v2.Likelihood.POSSIBLE,
187189
}
188190
]
189191

@@ -196,10 +198,12 @@ def inspect_with_medical_record_number_custom_regex_detector(
196198
item = {"value": content_string}
197199

198200
# Convert the project id into a full resource id.
199-
parent = dlp.project_path(project)
201+
parent = f"projects/{project}"
200202

201203
# Call the API.
202-
response = dlp.inspect_content(parent, inspect_config, item)
204+
response = dlp.inspect_content(
205+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
206+
)
203207

204208
# Print out the results.
205209
if response.result.findings:
@@ -214,13 +218,13 @@ def inspect_with_medical_record_number_custom_regex_detector(
214218
else:
215219
print("No findings.")
216220

221+
217222
# [END dlp_inspect_with_medical_record_number_custom_regex_detector]
218223

219224

220225
# [START dlp_inspect_with_medical_record_number_w_custom_hotwords]
221226
def inspect_with_medical_record_number_w_custom_hotwords(
222-
project,
223-
content_string,
227+
project, content_string,
224228
):
225229
"""Uses the Data Loss Prevention API to analyze string with medical record
226230
number custom regex detector, with custom hotwords rules to boost finding
@@ -245,30 +249,23 @@ def inspect_with_medical_record_number_w_custom_hotwords(
245249
{
246250
"info_type": {"name": "C_MRN"},
247251
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
248-
"likelihood": "POSSIBLE",
252+
"likelihood": google.cloud.dlp_v2.Likelihood.POSSIBLE,
249253
}
250254
]
251255

252256
# Construct a rule set with hotwords "mrn" and "medical", with a likelohood
253257
# boost to VERY_LIKELY when hotwords are present within the 10 character-
254258
# window preceding the PII finding.
255259
hotword_rule = {
256-
"hotword_regex": {
257-
"pattern": "(?i)(mrn|medical)(?-i)"
258-
},
260+
"hotword_regex": {"pattern": "(?i)(mrn|medical)(?-i)"},
259261
"likelihood_adjustment": {
260-
"fixed_likelihood": "VERY_LIKELY"
262+
"fixed_likelihood": google.cloud.dlp_v2.Likelihood.VERY_LIKELY
261263
},
262-
"proximity": {
263-
"window_before": 10
264-
}
264+
"proximity": {"window_before": 10},
265265
}
266266

267267
rule_set = [
268-
{
269-
"info_types": [{"name": "C_MRN"}],
270-
"rules": [{"hotword_rule": hotword_rule}],
271-
}
268+
{"info_types": [{"name": "C_MRN"}], "rules": [{"hotword_rule": hotword_rule}]}
272269
]
273270

274271
# Construct the configuration dictionary with the custom regex info type.
@@ -281,10 +278,12 @@ def inspect_with_medical_record_number_w_custom_hotwords(
281278
item = {"value": content_string}
282279

283280
# Convert the project id into a full resource id.
284-
parent = dlp.project_path(project)
281+
parent = f"projects/{project}"
285282

286283
# Call the API.
287-
response = dlp.inspect_content(parent, inspect_config, item)
284+
response = dlp.inspect_content(
285+
request={"parent": parent, "inspect_config": inspect_config, "item": item}
286+
)
288287

289288
# Print out the results.
290289
if response.result.findings:
@@ -299,4 +298,5 @@ def inspect_with_medical_record_number_w_custom_hotwords(
299298
else:
300299
print("No findings.")
301300

301+
302302
# [END dlp_inspect_with_medical_record_number_w_custom_hotwords]

dlp/snippets/custom_infotype_test.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@
2121

2222
def test_omit_name_if_also_email(capsys):
2323
info_types = custom_infotype.omit_name_if_also_email(
24-
GCLOUD_PROJECT, "[email protected]")
24+
GCLOUD_PROJECT, "[email protected]"
25+
)
2526

2627
# Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME.
2728
assert len(info_types) == 1
@@ -30,7 +31,8 @@ def test_omit_name_if_also_email(capsys):
3031

3132
def test_inspect_with_person_name_w_custom_hotword(capsys):
3233
custom_infotype.inspect_with_person_name_w_custom_hotword(
33-
GCLOUD_PROJECT, "patient's name is John Doe.", "patient")
34+
GCLOUD_PROJECT, "patient's name is John Doe.", "patient"
35+
)
3436

3537
out, _ = capsys.readouterr()
3638
assert "Info type: PERSON_NAME" in out
@@ -39,26 +41,27 @@ def test_inspect_with_person_name_w_custom_hotword(capsys):
3941

4042
def test_inspect_with_medical_record_number_custom_regex_detector(capsys):
4143
custom_infotype.inspect_with_medical_record_number_custom_regex_detector(
42-
GCLOUD_PROJECT, "Patients MRN 444-5-22222")
44+
GCLOUD_PROJECT, "Patients MRN 444-5-22222"
45+
)
4346

4447
out, _ = capsys.readouterr()
4548
assert "Info type: C_MRN" in out
4649

4750

48-
def test_inspect_with_medical_record_number_w_custom_hotwords_no_hotwords(
49-
capsys):
51+
def test_inspect_with_medical_record_number_w_custom_hotwords_no_hotwords(capsys):
5052
custom_infotype.inspect_with_medical_record_number_w_custom_hotwords(
51-
GCLOUD_PROJECT, "just a number 444-5-22222")
53+
GCLOUD_PROJECT, "just a number 444-5-22222"
54+
)
5255

5356
out, _ = capsys.readouterr()
5457
assert "Info type: C_MRN" in out
5558
assert "Likelihood: 3" in out
5659

5760

58-
def test_inspect_with_medical_record_number_w_custom_hotwords_has_hotwords(
59-
capsys):
61+
def test_inspect_with_medical_record_number_w_custom_hotwords_has_hotwords(capsys):
6062
custom_infotype.inspect_with_medical_record_number_w_custom_hotwords(
61-
GCLOUD_PROJECT, "Patients MRN 444-5-22222")
63+
GCLOUD_PROJECT, "Patients MRN 444-5-22222"
64+
)
6265

6366
out, _ = capsys.readouterr()
6467
assert "Info type: C_MRN" in out

0 commit comments

Comments
 (0)