Skip to content

Commit 0ff2d31

Browse files
authored
dlp: add inspect string sample, person_name w/ custom hotword certainty boosting (#4081)
1 parent 2efc5fc commit 0ff2d31

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

dlp/custom_infotype.py

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,78 @@ def omit_name_if_also_email(
8484

8585
# [END dlp_omit_name_if_also_email]
8686

87+
88+
# [START inspect_with_person_name_w_custom_hotword]
89+
def inspect_with_person_name_w_custom_hotword(
90+
project,
91+
content_string,
92+
custom_hotword="patient"
93+
):
94+
"""Uses the Data Loss Prevention API increase likelihood for matches on
95+
PERSON_NAME if the user specified custom hotword is present. Only
96+
includes findings with the increased likelihood by setting a minimum
97+
likelihood threshold of VERY_LIKELY.
98+
Args:
99+
project: The Google Cloud project id to use as a parent resource.
100+
content_string: The string to inspect.
101+
custom_hotword: The custom hotword used for likelihood boosting.
102+
Returns:
103+
None; the response from the API is printed to the terminal.
104+
"""
105+
106+
# Import the client library.
107+
import google.cloud.dlp
108+
109+
# Instantiate a client.
110+
dlp = google.cloud.dlp_v2.DlpServiceClient()
111+
112+
# Construct a rule set with caller provided hotword, with a likelihood
113+
# boost to VERY_LIKELY when the hotword are present within the 50 character-
114+
# window preceding the PII finding.
115+
hotword_rule = {
116+
"hotword_regex": {"pattern": custom_hotword},
117+
"likelihood_adjustment": {"fixed_likelihood": "VERY_LIKELY"},
118+
"proximity": {"window_before": 50},
119+
}
120+
121+
rule_set = [
122+
{
123+
"info_types": [{"name": "PERSON_NAME"}],
124+
"rules": [{"hotword_rule": hotword_rule}],
125+
}
126+
]
127+
128+
# Construct the configuration dictionary with the custom regex info type.
129+
inspect_config = {
130+
"rule_set": rule_set,
131+
"min_likelihood": "VERY_LIKELY",
132+
}
133+
134+
# Construct the `item`.
135+
item = {"value": content_string}
136+
137+
# Convert the project id into a full resource id.
138+
parent = dlp.project_path(project)
139+
140+
# Call the API.
141+
response = dlp.inspect_content(parent, inspect_config, item)
142+
143+
# Print out the results.
144+
if response.result.findings:
145+
for finding in response.result.findings:
146+
try:
147+
if finding.quote:
148+
print(f"Quote: {finding.quote}")
149+
except AttributeError:
150+
pass
151+
print(f"Info type: {finding.info_type.name}")
152+
print(f"Likelihood: {finding.likelihood}")
153+
else:
154+
print("No findings.")
155+
156+
# [END inspect_with_person_name_w_custom_hotword]
157+
158+
87159
# [START dlp_inspect_with_medical_record_number_custom_regex_detector]
88160
def inspect_with_medical_record_number_custom_regex_detector(
89161
project,

dlp/custom_infotype_test.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ def test_omit_name_if_also_email(capsys):
2828
assert info_types[0] == "EMAIL_ADDRESS"
2929

3030

31+
def test_inspect_with_person_name_w_custom_hotword(capsys):
32+
custom_infotype.inspect_with_person_name_w_custom_hotword(
33+
GCLOUD_PROJECT, "patient's name is John Doe.", "patient")
34+
35+
out, _ = capsys.readouterr()
36+
assert "Info type: PERSON_NAME" in out
37+
assert "Likelihood: 5" in out
38+
39+
3140
def test_inspect_with_medical_record_number_custom_regex_detector(capsys):
3241
custom_infotype.inspect_with_medical_record_number_custom_regex_detector(
3342
GCLOUD_PROJECT, "Patients MRN 444-5-22222")

0 commit comments

Comments
 (0)