Skip to content

Commit a2474f6

Browse files
DLP: Added sample for deidentify using simple word list (#3099)
* Added sample for deidentify_simple_word_list Added unit test cases * Updated 'de-identify' to be consistent everywhere --------- Co-authored-by: Karl Weinmeister <[email protected]>
1 parent 7d4705c commit a2474f6

File tree

2 files changed

+145
-0
lines changed

2 files changed

+145
-0
lines changed

dlp/deIdentifyWithSimpleWordList.js

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
'use strict';
16+
17+
// sample-metadata:
18+
// title: De-identify sensitive data with a simple word list
19+
// description: Uses the Data Loss Prevention API to de-identify sensitive data in a
20+
// string using a custom simple word list.
21+
// usage: node deIdentifyWithSimpleWordList.js my-project text-to-insect word-list custom-info-type-name
22+
23+
function main(projectId, textToInspect, words, customInfoTypeName) {
24+
words = words.split(',');
25+
// [START dlp_deidentify_simple_word_list]
26+
// Imports the Google Cloud Data Loss Prevention library
27+
const DLP = require('@google-cloud/dlp');
28+
29+
// TODO(developer): Replace these variables before running the sample.
30+
// const projectId = "your-project-id";
31+
32+
// The string to de-identify
33+
// const textToInspect = 'Patient was seen in RM-YELLOW then transferred to rm green.';
34+
35+
// Words to look for during inspection
36+
// const words = ['RM-GREEN', 'RM-YELLOW', 'RM-ORANGE'];
37+
38+
// Name of the custom info type
39+
// const customInfoTypeName = 'CUSTOM_ROOM_ID';
40+
41+
async function deIdentifyWithSimpleWordList() {
42+
// Initialize client that will be used to send requests. This client only needs to be created
43+
// once, and can be reused for multiple requests. After completing all of your requests, call
44+
// the "close" method on the client to safely clean up any remaining background resources.
45+
const dlp = new DLP.DlpServiceClient();
46+
47+
// Construct the word list to be detected
48+
const wordList = {
49+
words: words,
50+
};
51+
52+
// Specify the word list custom info type the inspection will look for.
53+
const infoType = {
54+
name: customInfoTypeName,
55+
};
56+
const customInfoType = {
57+
infoType,
58+
dictionary: {
59+
wordList,
60+
},
61+
};
62+
63+
// Construct de-identify configuration
64+
const deidentifyConfig = {
65+
infoTypeTransformations: {
66+
transformations: [
67+
{
68+
primitiveTransformation: {
69+
replaceWithInfoTypeConfig: {},
70+
},
71+
},
72+
],
73+
},
74+
};
75+
76+
// Construct inspect configuration
77+
const inspectConfig = {
78+
customInfoTypes: [customInfoType],
79+
};
80+
81+
// Construct Item
82+
const item = {
83+
value: textToInspect,
84+
};
85+
// Combine configurations into a request for the service.
86+
const request = {
87+
parent: `projects/${projectId}/locations/global`,
88+
item,
89+
deidentifyConfig,
90+
inspectConfig,
91+
};
92+
93+
// Send the request and receive response from the service
94+
const [response] = await dlp.deidentifyContent(request);
95+
// Print the results
96+
console.log(
97+
`Text after replace with infotype config: ${response.item.value}`
98+
);
99+
}
100+
101+
deIdentifyWithSimpleWordList();
102+
// [END dlp_deidentify_simple_word_list]
103+
}
104+
105+
main(...process.argv.slice(2));
106+
process.on('unhandledRejection', err => {
107+
console.error(err.message);
108+
process.exitCode = 1;
109+
});

dlp/system-test/deid.test.js

+36
Original file line numberDiff line numberDiff line change
@@ -106,4 +106,40 @@ describe('deid', () => {
106106
}
107107
assert.include(output, 'INVALID_ARGUMENT');
108108
});
109+
110+
// dlp_deidentify_simple_word_list
111+
it('should deidentify using the word list provided', () => {
112+
const textToInspect =
113+
'Patient was seen in RM-YELLOW then transferred to rm green.';
114+
const wordsStr = 'RM-GREEN,RM-YELLOW,RM-ORANGE';
115+
const customInfoTypeName = 'CUSTOM_ROOM_ID';
116+
let output;
117+
try {
118+
output = execSync(
119+
`node deIdentifyWithSimpleWordList.js ${projectId} "${textToInspect}" "${wordsStr}" "${customInfoTypeName}"`
120+
);
121+
} catch (err) {
122+
output = err.message;
123+
}
124+
assert.include(
125+
output,
126+
'Patient was seen in [CUSTOM_ROOM_ID] then transferred to [CUSTOM_ROOM_ID].'
127+
);
128+
});
129+
130+
it('should handle deidentification errors', () => {
131+
const textToInspect =
132+
'Patient was seen in RM-YELLOW then transferred to rm green.';
133+
const wordsStr = 'RM-GREEN,RM-YELLOW,RM-ORANGE';
134+
const customInfoTypeName = 'CUSTOM_ROOM_ID';
135+
let output;
136+
try {
137+
output = execSync(
138+
`node deIdentifyWithSimpleWordList.js 'BAD_PROJECT_ID' "${textToInspect}" "${wordsStr}" "${customInfoTypeName}"`
139+
);
140+
} catch (err) {
141+
output = err.message;
142+
}
143+
assert.include(output, 'INVALID_ARGUMENT');
144+
});
109145
});

0 commit comments

Comments
 (0)