Skip to content

Commit c8cc8ba

Browse files
authored
feat: crypto_deterministic_config (#108) (#119)
Example of of Crypto Deterministic Config using https://cloud.google.com/dlp/docs/pseudonymization#supported-methods to resolve googleapis/python-dlp#108
1 parent ab0c1de commit c8cc8ba

File tree

2 files changed

+201
-0
lines changed

2 files changed

+201
-0
lines changed

dlp/snippets/deid.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,92 @@ def deidentify_with_fpe(
288288

289289
# [END dlp_deidentify_fpe]
290290

291+
# [START dlp_deidentify_deterministic]
292+
def deidentify_with_deterministic(
293+
project,
294+
input_str,
295+
info_types,
296+
surrogate_type=None,
297+
key_name=None,
298+
wrapped_key=None,
299+
):
300+
"""Deidentifies sensitive data in a string using deterministic encryption.
301+
Args:
302+
project: The Google Cloud project id to use as a parent resource.
303+
input_str: The string to deidentify (will be treated as text).
304+
surrogate_type: The name of the surrogate custom info type to use. Only
305+
necessary if you want to reverse the deidentification process. Can
306+
be essentially any arbitrary string, as long as it doesn't appear
307+
in your dataset otherwise.
308+
key_name: The name of the Cloud KMS key used to encrypt ('wrap') the
309+
AES-256 key. Example:
310+
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
311+
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
312+
wrapped_key: The encrypted ('wrapped') AES-256 key to use. This key
313+
should be encrypted using the Cloud KMS key specified by key_name.
314+
Returns:
315+
None; the response from the API is printed to the terminal.
316+
"""
317+
import base64
318+
319+
# Import the client library
320+
import google.cloud.dlp
321+
322+
# Instantiate a client
323+
dlp = google.cloud.dlp_v2.DlpServiceClient()
324+
325+
# Convert the project id into a full resource id.
326+
parent = f"projects/{project}"
327+
328+
# The wrapped key is base64-encoded, but the library expects a binary
329+
# string, so decode it here.
330+
wrapped_key = base64.b64decode(wrapped_key)
331+
332+
# Construct Deterministic encryption configuration dictionary
333+
crypto_replace_deterministic_config = {
334+
"crypto_key": {
335+
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
336+
},
337+
}
338+
339+
# Add surrogate type
340+
if surrogate_type:
341+
crypto_replace_deterministic_config["surrogate_info_type"] = {"name": surrogate_type}
342+
343+
# Construct inspect configuration dictionary
344+
inspect_config = {"info_types": [{"name": info_type} for info_type in info_types]}
345+
346+
# Construct deidentify configuration dictionary
347+
deidentify_config = {
348+
"info_type_transformations": {
349+
"transformations": [
350+
{
351+
"primitive_transformation": {
352+
"crypto_deterministic_config": crypto_replace_deterministic_config
353+
}
354+
}
355+
]
356+
}
357+
}
358+
359+
# Convert string to item
360+
item = {"value": input_str}
361+
362+
# Call the API
363+
response = dlp.deidentify_content(
364+
request={
365+
"parent": parent,
366+
"deidentify_config": deidentify_config,
367+
"inspect_config": inspect_config,
368+
"item": item,
369+
}
370+
)
371+
372+
# Print results
373+
print(response.item.value)
374+
375+
# [END dlp_deidentify_deterministic]
376+
291377

292378
# [START dlp_reidentify_fpe]
293379
def reidentify_with_fpe(
@@ -380,6 +466,90 @@ def reidentify_with_fpe(
380466
# [END dlp_reidentify_fpe]
381467

382468

469+
# [START dlp_reidentify_deterministic]
470+
def reidentify_with_deterministic(
471+
project,
472+
input_str,
473+
surrogate_type=None,
474+
key_name=None,
475+
wrapped_key=None,
476+
):
477+
"""Deidentifies sensitive data in a string using deterministic encryption.
478+
Args:
479+
project: The Google Cloud project id to use as a parent resource.
480+
input_str: The string to deidentify (will be treated as text).
481+
surrogate_type: The name of the surrogate custom info type to used
482+
during the encryption process.
483+
key_name: The name of the Cloud KMS key used to encrypt ('wrap') the
484+
AES-256 key. Example:
485+
keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
486+
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
487+
wrapped_key: The encrypted ('wrapped') AES-256 key to use. This key
488+
should be encrypted using the Cloud KMS key specified by key_name.
489+
Returns:
490+
None; the response from the API is printed to the terminal.
491+
"""
492+
import base64
493+
494+
# Import the client library
495+
import google.cloud.dlp
496+
497+
# Instantiate a client
498+
dlp = google.cloud.dlp_v2.DlpServiceClient()
499+
500+
# Convert the project id into a full resource id.
501+
parent = f"projects/{project}"
502+
503+
# The wrapped key is base64-encoded, but the library expects a binary
504+
# string, so decode it here.
505+
wrapped_key = base64.b64decode(wrapped_key)
506+
507+
# Construct reidentify Configuration
508+
reidentify_config = {
509+
"info_type_transformations": {
510+
"transformations": [
511+
{
512+
"primitive_transformation": {
513+
"crypto_deterministic_config": {
514+
"crypto_key": {
515+
"kms_wrapped": {
516+
"wrapped_key": wrapped_key,
517+
"crypto_key_name": key_name,
518+
}
519+
},
520+
"surrogate_info_type": {"name": surrogate_type},
521+
}
522+
}
523+
}
524+
]
525+
}
526+
}
527+
528+
inspect_config = {
529+
"custom_info_types": [
530+
{"info_type": {"name": surrogate_type}, "surrogate_type": {}}
531+
]
532+
}
533+
534+
# Convert string to item
535+
item = {"value": input_str}
536+
537+
# Call the API
538+
response = dlp.reidentify_content(
539+
request={
540+
"parent": parent,
541+
"reidentify_config": reidentify_config,
542+
"inspect_config": inspect_config,
543+
"item": item,
544+
}
545+
)
546+
547+
# Print results
548+
print(response.item.value)
549+
550+
# [END dlp_reidentify_deterministic]
551+
552+
383553
# [START dlp_deidentify_free_text_with_fpe_using_surrogate]
384554
def deidentify_free_text_with_fpe_using_surrogate(
385555
project,

dlp/snippets/deid_test.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,21 @@ def test_deidentify_with_fpe(capsys):
123123
assert "372819127" not in out
124124

125125

126+
def test_deidentify_with_deterministic(capsys):
127+
deid.deidentify_with_deterministic(
128+
GCLOUD_PROJECT,
129+
HARMFUL_STRING,
130+
["US_SOCIAL_SECURITY_NUMBER"],
131+
surrogate_type=SURROGATE_TYPE,
132+
key_name=KEY_NAME,
133+
wrapped_key=WRAPPED_KEY,
134+
)
135+
136+
out, _ = capsys.readouterr()
137+
assert "My SSN is" in out
138+
assert "372819127" not in out
139+
140+
126141
def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
127142
deid.deidentify_with_fpe(
128143
GCLOUD_PROJECT,
@@ -207,6 +222,22 @@ def test_reidentify_with_fpe(capsys):
207222
assert "731997681" not in out
208223

209224

225+
def test_reidentify_with_deterministic(capsys):
226+
labeled_fpe_string = "My SSN is SSN_TOKEN(36):ATeRUd3WWnAHHFtjtl1bv+CT09FZ7hyqNas="
227+
228+
deid.reidentify_with_deterministic(
229+
GCLOUD_PROJECT,
230+
labeled_fpe_string,
231+
surrogate_type=SURROGATE_TYPE,
232+
key_name=KEY_NAME,
233+
wrapped_key=WRAPPED_KEY,
234+
)
235+
236+
out, _ = capsys.readouterr()
237+
238+
assert "SSN_TOKEN(" not in out
239+
240+
210241
def test_deidentify_free_text_with_fpe_using_surrogate(capsys):
211242
labeled_fpe_string = "My phone number is 4359916732"
212243

0 commit comments

Comments
 (0)