Skip to content

Commit 403f54a

Browse files
authored
Update DLP samples for release [(#1415)](GoogleCloudPlatform/python-docs-samples#1415)
1 parent 289f38d commit 403f54a

20 files changed

+3552
-349
lines changed

samples/snippets/deid.py

Lines changed: 549 additions & 0 deletions
Large diffs are not rendered by default.

samples/snippets/deid_test.py

Lines changed: 163 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,163 @@
1+
# Copyright 2017 Google Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the 'License');
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an 'AS IS' BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import os
16+
import shutil
17+
import tempfile
18+
19+
import pytest
20+
21+
import deid
22+
23+
HARMFUL_STRING = 'My SSN is 372819127'
24+
HARMLESS_STRING = 'My favorite color is blue'
25+
GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
26+
WRAPPED_KEY = ('CiQAz0hX4+go8fJwn80Fr8pVImwx+tmZdqU7JL+7TN/S5JxBU9gSSQDhFHpFVy'
27+
'uzJps0YH9ls480mU+JLG7jI/0lL04i6XJRWqmI6gUSZRUtECYcLH5gXK4SXHlL'
28+
'rotx7Chxz/4z7SIpXFOBY61z0/U=')
29+
KEY_NAME = ('projects/python-docs-samples-tests/locations/global/keyRings/'
30+
'dlp-test/cryptoKeys/dlp-test')
31+
SURROGATE_TYPE = 'SSN_TOKEN'
32+
CSV_FILE = os.path.join(os.path.dirname(__file__), 'resources/dates.csv')
33+
DATE_SHIFTED_AMOUNT = 30
34+
DATE_FIELDS = ['birth_date', 'register_date']
35+
CSV_CONTEXT_FIELD = 'name'
36+
37+
38+
@pytest.fixture(scope='module')
39+
def tempdir():
40+
tempdir = tempfile.mkdtemp()
41+
yield tempdir
42+
shutil.rmtree(tempdir)
43+
44+
45+
def test_deidentify_with_mask(capsys):
46+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
47+
48+
out, _ = capsys.readouterr()
49+
assert 'My SSN is *********' in out
50+
51+
52+
def test_deidentify_with_mask_ignore_insensitive_data(capsys):
53+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMLESS_STRING)
54+
55+
out, _ = capsys.readouterr()
56+
assert HARMLESS_STRING in out
57+
58+
59+
def test_deidentify_with_mask_masking_character_specified(capsys):
60+
deid.deidentify_with_mask(
61+
GCLOUD_PROJECT,
62+
HARMFUL_STRING,
63+
masking_character='#')
64+
65+
out, _ = capsys.readouterr()
66+
assert 'My SSN is #########' in out
67+
68+
69+
def test_deidentify_with_mask_masking_number_specified(capsys):
70+
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
71+
72+
out, _ = capsys.readouterr()
73+
assert 'My SSN is *******27' in out
74+
75+
76+
def test_deidentify_with_fpe(capsys):
77+
deid.deidentify_with_fpe(
78+
GCLOUD_PROJECT,
79+
HARMFUL_STRING,
80+
alphabet='NUMERIC',
81+
wrapped_key=WRAPPED_KEY,
82+
key_name=KEY_NAME)
83+
84+
out, _ = capsys.readouterr()
85+
assert 'My SSN is' in out
86+
assert '372819127' not in out
87+
88+
89+
def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
90+
deid.deidentify_with_fpe(
91+
GCLOUD_PROJECT,
92+
HARMFUL_STRING,
93+
alphabet='NUMERIC',
94+
wrapped_key=WRAPPED_KEY,
95+
key_name=KEY_NAME,
96+
surrogate_type=SURROGATE_TYPE)
97+
98+
out, _ = capsys.readouterr()
99+
assert 'My SSN is SSN_TOKEN' in out
100+
assert '372819127' not in out
101+
102+
103+
def test_deidentify_with_fpe_ignores_insensitive_data(capsys):
104+
deid.deidentify_with_fpe(
105+
GCLOUD_PROJECT,
106+
HARMLESS_STRING,
107+
alphabet='NUMERIC',
108+
wrapped_key=WRAPPED_KEY,
109+
key_name=KEY_NAME)
110+
111+
out, _ = capsys.readouterr()
112+
assert HARMLESS_STRING in out
113+
114+
115+
def test_deidentify_with_date_shift(tempdir, capsys):
116+
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
117+
118+
deid.deidentify_with_date_shift(
119+
GCLOUD_PROJECT,
120+
input_csv_file=CSV_FILE,
121+
output_csv_file=output_filepath,
122+
lower_bound_days=DATE_SHIFTED_AMOUNT,
123+
upper_bound_days=DATE_SHIFTED_AMOUNT,
124+
date_fields=DATE_FIELDS)
125+
126+
out, _ = capsys.readouterr()
127+
128+
assert 'Successful' in out
129+
130+
131+
def test_deidentify_with_date_shift_using_context_field(tempdir, capsys):
132+
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
133+
134+
deid.deidentify_with_date_shift(
135+
GCLOUD_PROJECT,
136+
input_csv_file=CSV_FILE,
137+
output_csv_file=output_filepath,
138+
lower_bound_days=DATE_SHIFTED_AMOUNT,
139+
upper_bound_days=DATE_SHIFTED_AMOUNT,
140+
date_fields=DATE_FIELDS,
141+
context_field_id=CSV_CONTEXT_FIELD,
142+
wrapped_key=WRAPPED_KEY,
143+
key_name=KEY_NAME)
144+
145+
out, _ = capsys.readouterr()
146+
147+
assert 'Successful' in out
148+
149+
150+
def test_reidentify_with_fpe(capsys):
151+
labeled_fpe_string = 'My SSN is SSN_TOKEN(9):731997681'
152+
153+
deid.reidentify_with_fpe(
154+
GCLOUD_PROJECT,
155+
labeled_fpe_string,
156+
surrogate_type=SURROGATE_TYPE,
157+
wrapped_key=WRAPPED_KEY,
158+
key_name=KEY_NAME,
159+
alphabet='NUMERIC')
160+
161+
out, _ = capsys.readouterr()
162+
163+
assert '731997681' not in out

0 commit comments

Comments
 (0)