Skip to content

Commit b7bd227

Browse files
add code sample and test for medical number custom detector with hotwords (#4071)
Co-authored-by: Kurtis Van Gent <[email protected]>
1 parent 88ca655 commit b7bd227

File tree

2 files changed

+105
-0
lines changed

2 files changed

+105
-0
lines changed

dlp/custom_infotype.py

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,3 +215,88 @@ def inspect_with_medical_record_number_custom_regex_detector(
215215
print("No findings.")
216216

217217
# [END dlp_inspect_with_medical_record_number_custom_regex_detector]
218+
219+
220+
# [START dlp_inspect_with_medical_record_number_w_custom_hotwords]
221+
def inspect_with_medical_record_number_w_custom_hotwords(
222+
project,
223+
content_string,
224+
):
225+
"""Uses the Data Loss Prevention API to analyze string with medical record
226+
number custom regex detector, with custom hotwords rules to boost finding
227+
certainty under some circumstances.
228+
Args:
229+
project: The Google Cloud project id to use as a parent resource.
230+
content_string: The string to inspect.
231+
Returns:
232+
None; the response from the API is printed to the terminal.
233+
"""
234+
235+
# Import the client library.
236+
import google.cloud.dlp
237+
238+
# Instantiate a client.
239+
dlp = google.cloud.dlp_v2.DlpServiceClient()
240+
241+
# Construct a custom regex detector info type called "C_MRN",
242+
# with ###-#-##### pattern, where each # represents a digit from 1 to 9.
243+
# The detector has a detection likelihood of POSSIBLE.
244+
custom_info_types = [
245+
{
246+
"info_type": {"name": "C_MRN"},
247+
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
248+
"likelihood": "POSSIBLE",
249+
}
250+
]
251+
252+
# Construct a rule set with hotwords "mrn" and "medical", with a likelohood
253+
# boost to VERY_LIKELY when hotwords are present within the 10 character-
254+
# window preceding the PII finding.
255+
hotword_rule = {
256+
"hotword_regex": {
257+
"pattern": "(?i)(mrn|medical)(?-i)"
258+
},
259+
"likelihood_adjustment": {
260+
"fixed_likelihood": "VERY_LIKELY"
261+
},
262+
"proximity": {
263+
"window_before": 10
264+
}
265+
}
266+
267+
rule_set = [
268+
{
269+
"info_types": [{"name": "C_MRN"}],
270+
"rules": [{"hotword_rule": hotword_rule}],
271+
}
272+
]
273+
274+
# Construct the configuration dictionary with the custom regex info type.
275+
inspect_config = {
276+
"custom_info_types": custom_info_types,
277+
"rule_set": rule_set,
278+
}
279+
280+
# Construct the `item`.
281+
item = {"value": content_string}
282+
283+
# Convert the project id into a full resource id.
284+
parent = dlp.project_path(project)
285+
286+
# Call the API.
287+
response = dlp.inspect_content(parent, inspect_config, item)
288+
289+
# Print out the results.
290+
if response.result.findings:
291+
for finding in response.result.findings:
292+
try:
293+
if finding.quote:
294+
print(f"Quote: {finding.quote}")
295+
except AttributeError:
296+
pass
297+
print(f"Info type: {finding.info_type.name}")
298+
print(f"Likelihood: {finding.likelihood}")
299+
else:
300+
print("No findings.")
301+
302+
# [END dlp_inspect_with_medical_record_number_w_custom_hotwords]

dlp/custom_infotype_test.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,3 +43,23 @@ def test_inspect_with_medical_record_number_custom_regex_detector(capsys):
4343

4444
out, _ = capsys.readouterr()
4545
assert "Info type: C_MRN" in out
46+
47+
48+
def test_inspect_with_medical_record_number_w_custom_hotwords_no_hotwords(
49+
capsys):
50+
custom_infotype.inspect_with_medical_record_number_w_custom_hotwords(
51+
GCLOUD_PROJECT, "just a number 444-5-22222")
52+
53+
out, _ = capsys.readouterr()
54+
assert "Info type: C_MRN" in out
55+
assert "Likelihood: 3" in out
56+
57+
58+
def test_inspect_with_medical_record_number_w_custom_hotwords_has_hotwords(
59+
capsys):
60+
custom_infotype.inspect_with_medical_record_number_w_custom_hotwords(
61+
GCLOUD_PROJECT, "Patients MRN 444-5-22222")
62+
63+
out, _ = capsys.readouterr()
64+
assert "Info type: C_MRN" in out
65+
assert "Likelihood: 5" in out

0 commit comments

Comments
 (0)