@@ -215,3 +215,88 @@ def inspect_with_medical_record_number_custom_regex_detector(
215
215
print ("No findings." )
216
216
217
217
# [END dlp_inspect_with_medical_record_number_custom_regex_detector]
218
+
219
+
220
+ # [START dlp_inspect_with_medical_record_number_w_custom_hotwords]
221
+ def inspect_with_medical_record_number_w_custom_hotwords (
222
+ project ,
223
+ content_string ,
224
+ ):
225
+ """Uses the Data Loss Prevention API to analyze string with medical record
226
+ number custom regex detector, with custom hotwords rules to boost finding
227
+ certainty under some circumstances.
228
+ Args:
229
+ project: The Google Cloud project id to use as a parent resource.
230
+ content_string: The string to inspect.
231
+ Returns:
232
+ None; the response from the API is printed to the terminal.
233
+ """
234
+
235
+ # Import the client library.
236
+ import google .cloud .dlp
237
+
238
+ # Instantiate a client.
239
+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
240
+
241
+ # Construct a custom regex detector info type called "C_MRN",
242
+ # with ###-#-##### pattern, where each # represents a digit from 1 to 9.
243
+ # The detector has a detection likelihood of POSSIBLE.
244
+ custom_info_types = [
245
+ {
246
+ "info_type" : {"name" : "C_MRN" },
247
+ "regex" : {"pattern" : "[1-9]{3}-[1-9]{1}-[1-9]{5}" },
248
+ "likelihood" : "POSSIBLE" ,
249
+ }
250
+ ]
251
+
252
+ # Construct a rule set with hotwords "mrn" and "medical", with a likelohood
253
+ # boost to VERY_LIKELY when hotwords are present within the 10 character-
254
+ # window preceding the PII finding.
255
+ hotword_rule = {
256
+ "hotword_regex" : {
257
+ "pattern" : "(?i)(mrn|medical)(?-i)"
258
+ },
259
+ "likelihood_adjustment" : {
260
+ "fixed_likelihood" : "VERY_LIKELY"
261
+ },
262
+ "proximity" : {
263
+ "window_before" : 10
264
+ }
265
+ }
266
+
267
+ rule_set = [
268
+ {
269
+ "info_types" : [{"name" : "C_MRN" }],
270
+ "rules" : [{"hotword_rule" : hotword_rule }],
271
+ }
272
+ ]
273
+
274
+ # Construct the configuration dictionary with the custom regex info type.
275
+ inspect_config = {
276
+ "custom_info_types" : custom_info_types ,
277
+ "rule_set" : rule_set ,
278
+ }
279
+
280
+ # Construct the `item`.
281
+ item = {"value" : content_string }
282
+
283
+ # Convert the project id into a full resource id.
284
+ parent = dlp .project_path (project )
285
+
286
+ # Call the API.
287
+ response = dlp .inspect_content (parent , inspect_config , item )
288
+
289
+ # Print out the results.
290
+ if response .result .findings :
291
+ for finding in response .result .findings :
292
+ try :
293
+ if finding .quote :
294
+ print (f"Quote: { finding .quote } " )
295
+ except AttributeError :
296
+ pass
297
+ print (f"Info type: { finding .info_type .name } " )
298
+ print (f"Likelihood: { finding .likelihood } " )
299
+ else :
300
+ print ("No findings." )
301
+
302
+ # [END dlp_inspect_with_medical_record_number_w_custom_hotwords]
0 commit comments