Skip to content

Commit c38f4e4

Browse files
maximus12793kurtisvg
authored andcommitted
Update DLP samples to use dlp_v2 client. [(#2580)](GoogleCloudPlatform/python-docs-samples#2580)
1 parent 09dccf7 commit c38f4e4

18 files changed

+2029
-1562
lines changed

samples/snippets/deid.py

Lines changed: 299 additions & 239 deletions
Large diffs are not rendered by default.

samples/snippets/deid_test.py

Lines changed: 60 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -20,39 +20,45 @@
2020

2121
import deid
2222

23-
HARMFUL_STRING = 'My SSN is 372819127'
24-
HARMLESS_STRING = 'My favorite color is blue'
25-
GCLOUD_PROJECT = os.getenv('GCLOUD_PROJECT')
26-
WRAPPED_KEY = ('CiQAz0hX4+go8fJwn80Fr8pVImwx+tmZdqU7JL+7TN/S5JxBU9gSSQDhFHpFVy'
27-
'uzJps0YH9ls480mU+JLG7jI/0lL04i6XJRWqmI6gUSZRUtECYcLH5gXK4SXHlL'
28-
'rotx7Chxz/4z7SIpXFOBY61z0/U=')
29-
KEY_NAME = ('projects/python-docs-samples-tests/locations/global/keyRings/'
30-
'dlp-test/cryptoKeys/dlp-test')
31-
SURROGATE_TYPE = 'SSN_TOKEN'
32-
CSV_FILE = os.path.join(os.path.dirname(__file__), 'resources/dates.csv')
23+
HARMFUL_STRING = "My SSN is 372819127"
24+
HARMLESS_STRING = "My favorite color is blue"
25+
GCLOUD_PROJECT = os.getenv("GCLOUD_PROJECT")
26+
WRAPPED_KEY = (
27+
"CiQAz0hX4+go8fJwn80Fr8pVImwx+tmZdqU7JL+7TN/S5JxBU9gSSQDhFHpFVy"
28+
"uzJps0YH9ls480mU+JLG7jI/0lL04i6XJRWqmI6gUSZRUtECYcLH5gXK4SXHlL"
29+
"rotx7Chxz/4z7SIpXFOBY61z0/U="
30+
)
31+
KEY_NAME = (
32+
"projects/python-docs-samples-tests/locations/global/keyRings/"
33+
"dlp-test/cryptoKeys/dlp-test"
34+
)
35+
SURROGATE_TYPE = "SSN_TOKEN"
36+
CSV_FILE = os.path.join(os.path.dirname(__file__), "resources/dates.csv")
3337
DATE_SHIFTED_AMOUNT = 30
34-
DATE_FIELDS = ['birth_date', 'register_date']
35-
CSV_CONTEXT_FIELD = 'name'
38+
DATE_FIELDS = ["birth_date", "register_date"]
39+
CSV_CONTEXT_FIELD = "name"
3640

3741

38-
@pytest.fixture(scope='module')
42+
@pytest.fixture(scope="module")
3943
def tempdir():
4044
tempdir = tempfile.mkdtemp()
4145
yield tempdir
4246
shutil.rmtree(tempdir)
4347

4448

4549
def test_deidentify_with_mask(capsys):
46-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
47-
['US_SOCIAL_SECURITY_NUMBER'])
50+
deid.deidentify_with_mask(
51+
GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"]
52+
)
4853

4954
out, _ = capsys.readouterr()
50-
assert 'My SSN is *********' in out
55+
assert "My SSN is *********" in out
5156

5257

5358
def test_deidentify_with_mask_ignore_insensitive_data(capsys):
54-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMLESS_STRING,
55-
['US_SOCIAL_SECURITY_NUMBER'])
59+
deid.deidentify_with_mask(
60+
GCLOUD_PROJECT, HARMLESS_STRING, ["US_SOCIAL_SECURITY_NUMBER"]
61+
)
5662

5763
out, _ = capsys.readouterr()
5864
assert HARMLESS_STRING in out
@@ -62,82 +68,87 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
6268
deid.deidentify_with_mask(
6369
GCLOUD_PROJECT,
6470
HARMFUL_STRING,
65-
['US_SOCIAL_SECURITY_NUMBER'],
66-
masking_character='#')
71+
["US_SOCIAL_SECURITY_NUMBER"],
72+
masking_character="#",
73+
)
6774

6875
out, _ = capsys.readouterr()
69-
assert 'My SSN is #########' in out
76+
assert "My SSN is #########" in out
7077

7178

7279
def test_deidentify_with_mask_masking_number_specified(capsys):
73-
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
74-
['US_SOCIAL_SECURITY_NUMBER'],
75-
number_to_mask=7)
80+
deid.deidentify_with_mask(
81+
GCLOUD_PROJECT, HARMFUL_STRING, ["US_SOCIAL_SECURITY_NUMBER"], number_to_mask=7
82+
)
7683

7784
out, _ = capsys.readouterr()
78-
assert 'My SSN is *******27' in out
85+
assert "My SSN is *******27" in out
7986

8087

8188
def test_deidentify_with_fpe(capsys):
8289
deid.deidentify_with_fpe(
8390
GCLOUD_PROJECT,
8491
HARMFUL_STRING,
85-
['US_SOCIAL_SECURITY_NUMBER'],
86-
alphabet='NUMERIC',
92+
["US_SOCIAL_SECURITY_NUMBER"],
93+
alphabet="NUMERIC",
8794
wrapped_key=WRAPPED_KEY,
88-
key_name=KEY_NAME)
95+
key_name=KEY_NAME,
96+
)
8997

9098
out, _ = capsys.readouterr()
91-
assert 'My SSN is' in out
92-
assert '372819127' not in out
99+
assert "My SSN is" in out
100+
assert "372819127" not in out
93101

94102

95103
def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
96104
deid.deidentify_with_fpe(
97105
GCLOUD_PROJECT,
98106
HARMFUL_STRING,
99-
['US_SOCIAL_SECURITY_NUMBER'],
100-
alphabet='NUMERIC',
107+
["US_SOCIAL_SECURITY_NUMBER"],
108+
alphabet="NUMERIC",
101109
wrapped_key=WRAPPED_KEY,
102110
key_name=KEY_NAME,
103-
surrogate_type=SURROGATE_TYPE)
111+
surrogate_type=SURROGATE_TYPE,
112+
)
104113

105114
out, _ = capsys.readouterr()
106-
assert 'My SSN is SSN_TOKEN' in out
107-
assert '372819127' not in out
115+
assert "My SSN is SSN_TOKEN" in out
116+
assert "372819127" not in out
108117

109118

110119
def test_deidentify_with_fpe_ignores_insensitive_data(capsys):
111120
deid.deidentify_with_fpe(
112121
GCLOUD_PROJECT,
113122
HARMLESS_STRING,
114-
['US_SOCIAL_SECURITY_NUMBER'],
115-
alphabet='NUMERIC',
123+
["US_SOCIAL_SECURITY_NUMBER"],
124+
alphabet="NUMERIC",
116125
wrapped_key=WRAPPED_KEY,
117-
key_name=KEY_NAME)
126+
key_name=KEY_NAME,
127+
)
118128

119129
out, _ = capsys.readouterr()
120130
assert HARMLESS_STRING in out
121131

122132

123133
def test_deidentify_with_date_shift(tempdir, capsys):
124-
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
134+
output_filepath = os.path.join(tempdir, "dates-shifted.csv")
125135

126136
deid.deidentify_with_date_shift(
127137
GCLOUD_PROJECT,
128138
input_csv_file=CSV_FILE,
129139
output_csv_file=output_filepath,
130140
lower_bound_days=DATE_SHIFTED_AMOUNT,
131141
upper_bound_days=DATE_SHIFTED_AMOUNT,
132-
date_fields=DATE_FIELDS)
142+
date_fields=DATE_FIELDS,
143+
)
133144

134145
out, _ = capsys.readouterr()
135146

136-
assert 'Successful' in out
147+
assert "Successful" in out
137148

138149

139150
def test_deidentify_with_date_shift_using_context_field(tempdir, capsys):
140-
output_filepath = os.path.join(tempdir, 'dates-shifted.csv')
151+
output_filepath = os.path.join(tempdir, "dates-shifted.csv")
141152

142153
deid.deidentify_with_date_shift(
143154
GCLOUD_PROJECT,
@@ -148,24 +159,26 @@ def test_deidentify_with_date_shift_using_context_field(tempdir, capsys):
148159
date_fields=DATE_FIELDS,
149160
context_field_id=CSV_CONTEXT_FIELD,
150161
wrapped_key=WRAPPED_KEY,
151-
key_name=KEY_NAME)
162+
key_name=KEY_NAME,
163+
)
152164

153165
out, _ = capsys.readouterr()
154166

155-
assert 'Successful' in out
167+
assert "Successful" in out
156168

157169

158170
def test_reidentify_with_fpe(capsys):
159-
labeled_fpe_string = 'My SSN is SSN_TOKEN(9):731997681'
171+
labeled_fpe_string = "My SSN is SSN_TOKEN(9):731997681"
160172

161173
deid.reidentify_with_fpe(
162174
GCLOUD_PROJECT,
163175
labeled_fpe_string,
164176
surrogate_type=SURROGATE_TYPE,
165177
wrapped_key=WRAPPED_KEY,
166178
key_name=KEY_NAME,
167-
alphabet='NUMERIC')
179+
alphabet="NUMERIC",
180+
)
168181

169182
out, _ = capsys.readouterr()
170183

171-
assert '731997681' not in out
184+
assert "731997681" not in out

0 commit comments

Comments
 (0)