Skip to content
This repository was archived by the owner on Dec 10, 2023. It is now read-only.

Commit 4546538

Browse files
authored
dlp: Add sample for reid w/ fpe using surrogate type and unwrapped security key [(#4051)](GoogleCloudPlatform/python-docs-samples#4051)
* add code sample and test for reid w/ fpe using surrogate type and unwrapped security key * refactor reidentify_config
1 parent fa8478a commit 4546538

File tree

2 files changed

+99
-0
lines changed

2 files changed

+99
-0
lines changed

samples/snippets/deid.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,6 +394,86 @@ def reidentify_with_fpe(
394394

395395
# [END dlp_reidentify_fpe]
396396

397+
# [START dlp_reidentify_free_text_with_fpe_using_surrogate]
398+
def reidentify_free_text_with_fpe_using_surrogate(
399+
project,
400+
input_str,
401+
alphabet="NUMERIC",
402+
surrogate_type="PHONE_TOKEN",
403+
unwrapped_key="YWJjZGVmZ2hpamtsbW5vcA==",
404+
):
405+
"""Uses the Data Loss Prevention API to reidentify sensitive data in a
406+
string that was encrypted by Format Preserving Encryption (FPE) with
407+
surrogate type. The encryption is performed with an unwrapped key.
408+
Args:
409+
project: The Google Cloud project id to use as a parent resource.
410+
input_str: The string to deidentify (will be treated as text).
411+
alphabet: The set of characters to replace sensitive ones with. For
412+
more information, see https://cloud.google.com/dlp/docs/reference/
413+
rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
414+
surrogate_type: The name of the surrogate custom info type to used
415+
during the encryption process.
416+
unwrapped_key: The base64-encoded AES-256 key to use.
417+
Returns:
418+
None; the response from the API is printed to the terminal.
419+
"""
420+
# Import the client library
421+
import google.cloud.dlp
422+
423+
# Instantiate a client
424+
dlp = google.cloud.dlp_v2.DlpServiceClient()
425+
426+
# Convert the project id into a full resource id.
427+
parent = dlp.project_path(project)
428+
429+
# The unwrapped key is base64-encoded, but the library expects a binary
430+
# string, so decode it here.
431+
import base64
432+
433+
unwrapped_key = base64.b64decode(unwrapped_key)
434+
435+
# Construct Deidentify Config
436+
transformation = {
437+
"primitive_transformation": {
438+
"crypto_replace_ffx_fpe_config": {
439+
"crypto_key": {
440+
"unwrapped": {"key": unwrapped_key}
441+
},
442+
"common_alphabet": alphabet,
443+
"surrogate_info_type": {"name": surrogate_type},
444+
}
445+
}
446+
}
447+
448+
reidentify_config = {
449+
"info_type_transformations": {
450+
"transformations": [transformation]
451+
}
452+
}
453+
454+
inspect_config = {
455+
"custom_info_types": [
456+
{"info_type": {"name": surrogate_type}, "surrogate_type": {}}
457+
]
458+
}
459+
460+
# Convert string to item
461+
item = {"value": input_str}
462+
463+
# Call the API
464+
response = dlp.reidentify_content(
465+
parent,
466+
inspect_config=inspect_config,
467+
reidentify_config=reidentify_config,
468+
item=item,
469+
)
470+
471+
# Print results
472+
print(response.item.value)
473+
474+
475+
# [END dlp_reidentify_free_text_with_fpe_using_surrogate]
476+
397477

398478
# [START dlp_deidentify_date_shift]
399479
def deidentify_with_date_shift(

samples/snippets/deid_test.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
HARMFUL_STRING = "My SSN is 372819127"
2424
HARMLESS_STRING = "My favorite color is blue"
2525
GCLOUD_PROJECT = os.getenv("GOOGLE_CLOUD_PROJECT")
26+
UNWRAPPED_KEY = "YWJjZGVmZ2hpamtsbW5vcA=="
2627
WRAPPED_KEY = (
2728
"CiQAz0hX4+go8fJwn80Fr8pVImwx+tmZdqU7JL+7TN/S5JxBU9gSSQDhFHpFVy"
2829
"uzJps0YH9ls480mU+JLG7jI/0lL04i6XJRWqmI6gUSZRUtECYcLH5gXK4SXHlL"
@@ -205,6 +206,24 @@ def test_reidentify_with_fpe(capsys):
205206
assert "731997681" not in out
206207

207208

209+
def test_reidentify_free_text_with_fpe_using_surrogate(capsys):
210+
labeled_fpe_string = "My phone number is PHONE_TOKEN(10):9617256398"
211+
212+
deid.reidentify_free_text_with_fpe_using_surrogate(
213+
GCLOUD_PROJECT,
214+
labeled_fpe_string,
215+
surrogate_type="PHONE_TOKEN",
216+
unwrapped_key=UNWRAPPED_KEY,
217+
alphabet="NUMERIC",
218+
)
219+
220+
out, _ = capsys.readouterr()
221+
222+
assert "PHONE_TOKEN" not in out
223+
assert "9617256398" not in out
224+
assert "My phone number is" in out
225+
226+
208227
def test_deidentify_with_replace_infotype(capsys):
209228
url_to_redact = "https://cloud.google.com"
210229
deid.deidentify_with_replace_infotype(

0 commit comments

Comments
 (0)