Skip to content

Commit f8aeba9

Browse files
lxhfirenkingbusunkim96gcf-merge-on-green[bot]
authored
Add DLP sample code for inspecting with custom regex detector (#4031)
* code sample and test for medical record number custom regex detector * fix linter error * Using f-strings instead of string.format Co-authored-by: Bu Sun Kim <[email protected]> Co-authored-by: Bu Sun Kim <[email protected]> Co-authored-by: gcf-merge-on-green[bot] <60162190+gcf-merge-on-green[bot]@users.noreply.github.com>
1 parent 29da228 commit f8aeba9

File tree

2 files changed

+68
-0
lines changed

2 files changed

+68
-0
lines changed

dlp/custom_infotype.py

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,63 @@ def omit_name_if_also_email(
8383

8484

8585
# [END dlp_omit_name_if_also_email]
86+
87+
# [START dlp_inspect_with_medical_record_number_custom_regex_detector]
88+
def inspect_with_medical_record_number_custom_regex_detector(
89+
project,
90+
content_string,
91+
):
92+
"""Uses the Data Loss Prevention API to analyze string with medical record
93+
number custom regex detector
94+
Args:
95+
project: The Google Cloud project id to use as a parent resource.
96+
content_string: The string to inspect.
97+
Returns:
98+
None; the response from the API is printed to the terminal.
99+
"""
100+
101+
# Import the client library.
102+
import google.cloud.dlp
103+
104+
# Instantiate a client.
105+
dlp = google.cloud.dlp_v2.DlpServiceClient()
106+
107+
# Construct a custom regex detector info type called "C_MRN",
108+
# with ###-#-##### pattern, where each # represents a digit from 1 to 9.
109+
# The detector has a detection likelihood of POSSIBLE.
110+
custom_info_types = [
111+
{
112+
"info_type": {"name": "C_MRN"},
113+
"regex": {"pattern": "[1-9]{3}-[1-9]{1}-[1-9]{5}"},
114+
"likelihood": "POSSIBLE",
115+
}
116+
]
117+
118+
# Construct the configuration dictionary with the custom regex info type.
119+
inspect_config = {
120+
"custom_info_types": custom_info_types,
121+
}
122+
123+
# Construct the `item`.
124+
item = {"value": content_string}
125+
126+
# Convert the project id into a full resource id.
127+
parent = dlp.project_path(project)
128+
129+
# Call the API.
130+
response = dlp.inspect_content(parent, inspect_config, item)
131+
132+
# Print out the results.
133+
if response.result.findings:
134+
for finding in response.result.findings:
135+
try:
136+
if finding.quote:
137+
print(f"Quote: {finding.quote}")
138+
except AttributeError:
139+
pass
140+
print(f"Info type: {finding.info_type.name}")
141+
print(f"Likelihood: {finding.likelihood}")
142+
else:
143+
print("No findings.")
144+
145+
# [END dlp_inspect_with_medical_record_number_custom_regex_detector]

dlp/custom_infotype_test.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,11 @@ def test_omit_name_if_also_email(capsys):
2626
# Ensure we found only EMAIL_ADDRESS, and not PERSON_NAME.
2727
assert len(info_types) == 1
2828
assert info_types[0] == "EMAIL_ADDRESS"
29+
30+
31+
def test_inspect_with_medical_record_number_custom_regex_detector(capsys):
32+
custom_infotype.inspect_with_medical_record_number_custom_regex_detector(
33+
GCLOUD_PROJECT, "Patients MRN 444-5-22222")
34+
35+
out, _ = capsys.readouterr()
36+
assert "Info type: C_MRN" in out

0 commit comments

Comments
 (0)