Skip to content

Commit b63f932

Browse files
committed
Add text redaction sample using DLP
1 parent a4277b1 commit b63f932

File tree

3 files changed

+105
-4
lines changed

3 files changed

+105
-4
lines changed

dlp/README.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,13 +339,12 @@ To run this sample:
339339
.. code-block:: bash
340340
341341
$ python deid.py
342-
343-
usage: deid.py [-h] {deid_mask,deid_fpe,reid_fpe,deid_date_shift} ...
342+
usage: deid.py [-h] {deid_mask,deid_fpe,reid_fpe,deid_date_shift,redact} ...
344343
345344
Uses of the Data Loss Prevention API for deidentifying sensitive data.
346345
347346
positional arguments:
348-
{deid_mask,deid_fpe,reid_fpe,deid_date_shift}
347+
{deid_mask,deid_fpe,reid_fpe,deid_date_shift,redact}
349348
Select how to submit content to the API.
350349
deid_mask Deidentify sensitive data in a string by masking it
351350
with a character.
@@ -355,6 +354,8 @@ To run this sample:
355354
Preserving Encryption (FPE).
356355
deid_date_shift Deidentify dates in a CSV file by pseudorandomly
357356
shifting them.
357+
redact Redact sensitive data in a string by replacing it with
358+
the info type of the data.
358359
359360
optional arguments:
360361
-h, --help show this help message and exit
@@ -378,4 +379,4 @@ to `browse the source`_ and `report issues`_.
378379
https://github.com/GoogleCloudPlatform/google-cloud-python/issues
379380
380381
381-
.. _Google Cloud SDK: https://cloud.google.com/sdk/
382+
.. _Google Cloud SDK: https://cloud.google.com/sdk/

dlp/deid.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,62 @@ def write_data(data):
435435
# [END dlp_deidentify_date_shift]
436436

437437

438+
# [START dlp_redact_sensitive_data]
439+
def redact_sensitive_data(project, string, info_types):
440+
"""Uses the Data Loss Prevention API to redact sensitive data in a
441+
string by replacing it with the info type.
442+
Args:
443+
project: The Google Cloud project id to use as a parent resource.
444+
item: The string to redact (will be treated as text).
445+
Returns:
446+
None; the response from the API is printed to the terminal.
447+
"""
448+
449+
# Import the client library
450+
import google.cloud.dlp
451+
452+
# Instantiate a client
453+
dlp = google.cloud.dlp_v2.DlpServiceClient()
454+
455+
# Convert the project id into a full resource id.
456+
parent = dlp.project_path(project)
457+
458+
# Construct inspect configuration dictionary
459+
inspect_config = {
460+
"info_types": [{"name": info_type} for info_type in info_types]
461+
}
462+
463+
# Construct deidentify configuration dictionary
464+
deidentify_config = {
465+
"info_type_transformations": {
466+
"transformations": [
467+
{
468+
"primitive_transformation": {
469+
"replace_with_info_type_config": {}
470+
}
471+
}
472+
]
473+
}
474+
}
475+
476+
# Construct item
477+
item = {"value": string}
478+
479+
# Call the API
480+
response = dlp.deidentify_content(
481+
parent,
482+
inspect_config=inspect_config,
483+
deidentify_config=deidentify_config,
484+
item=item,
485+
)
486+
487+
# Print out the results.
488+
print(response.item.value)
489+
490+
491+
# [END dlp_redact_sensitive_data]
492+
493+
438494
if __name__ == "__main__":
439495
parser = argparse.ArgumentParser(description=__doc__)
440496
subparsers = parser.add_subparsers(
@@ -626,6 +682,30 @@ def write_data(data):
626682
"key_name.",
627683
)
628684

685+
redact_parser = subparsers.add_parser(
686+
"redact",
687+
help="Redact sensitive data in a string by replacing it with the "
688+
"info type of the data.",
689+
)
690+
redact_parser.add_argument(
691+
"--info_types",
692+
action="append",
693+
help="Strings representing info types to look for. A full list of "
694+
"info categories and types is available from the API. Examples "
695+
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
696+
"If unspecified, the three above examples will be used.",
697+
default=["FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS"],
698+
)
699+
redact_parser.add_argument(
700+
"project",
701+
help="The Google Cloud project id to use as a parent resource.",
702+
)
703+
redact_parser.add_argument(
704+
"item",
705+
help="The string to redact."
706+
"Example: 'My credit card is 4242 4242 4242 4242'",
707+
)
708+
629709
args = parser.parse_args()
630710

631711
if args.content == "deid_mask":
@@ -667,3 +747,9 @@ def write_data(data):
667747
wrapped_key=args.wrapped_key,
668748
key_name=args.key_name,
669749
)
750+
elif args.content == "redact":
751+
redact_sensitive_data(
752+
args.project,
753+
string=args.item,
754+
info_types=args.info_types,
755+
)

dlp/deid_test.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,3 +185,17 @@ def test_reidentify_with_fpe(capsys):
185185
out, _ = capsys.readouterr()
186186

187187
assert "731997681" not in out
188+
189+
190+
def test_redact_sensitive_data(capsys):
191+
url_to_redact = "https://cloud.google.com"
192+
deid.redact_sensitive_data(
193+
GCLOUD_PROJECT,
194+
"My favorite site is " + url_to_redact,
195+
["URL"],
196+
)
197+
198+
out, _ = capsys.readouterr()
199+
200+
assert url_to_redact not in out
201+
assert "My favorite site is [URL]" in out

0 commit comments

Comments
 (0)