Skip to content

Commit 023fff0

Browse files
[DLP] Implemented deidentifying and reidentifying of table using fpe (#10234)
* Implemented deid table with fpe with test cases. * Implemented deid and reid of table field using FPE * Refactored as per the review comments * Unique employee ids * removed unused imports * Correct calling parser as per the parameter changes in the sample code * Passing bytes as input to the sample deid_table_fpe * Passing bytes as input to the sample reid_table_fpe * Created separate file for deid and reid table fpe samples . Removed these samples from deid.py to adhere to cognitive complexity * Created separate test file for deid and reid table fpe samples . Removed these tests from deid_test.py * Removed unused imports
1 parent 859efd7 commit 023fff0

File tree

4 files changed

+402
-1
lines changed

4 files changed

+402
-1
lines changed

dlp/snippets/deid.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2588,6 +2588,7 @@ def deidentify_table_with_multiple_crypto_hash(
25882588
"deid_fields_2",
25892589
help="List of column names in table to de-identify using transient_key_name_2.",
25902590
)
2591+
25912592
args = parser.parse_args()
25922593

25932594
if args.content == "deid_mask":

dlp/snippets/deid_table.py

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
# Copyright 2023 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Uses of the Data Loss Prevention API for de-identifying sensitive data
16+
contained in table."""
17+
18+
from __future__ import annotations
19+
20+
import argparse
21+
import base64
22+
23+
# [START dlp_deidentify_table_fpe]
24+
from typing import List # noqa: F811, E402, I100
25+
26+
import google.cloud.dlp # noqa: F811, E402
27+
28+
29+
def deidentify_table_with_fpe(
30+
project: str,
31+
table_header: List[str],
32+
table_rows: List[List[str]],
33+
deid_field_names: List[str],
34+
key_name: str = None,
35+
wrapped_key: bytes = None,
36+
alphabet: str = None,
37+
) -> None:
38+
"""Uses the Data Loss Prevention API to de-identify sensitive data in a
39+
table while maintaining format.
40+
41+
Args:
42+
project: The Google Cloud project id to use as a parent resource.
43+
table_header: List of strings representing table field names.
44+
table_rows: List of rows representing table data.
45+
deid_field_names: A list of fields in table to de-identify.
46+
key_name: The name of the Cloud KMS key used to encrypt ('wrap') the
47+
AES-256 key. Example:
48+
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
49+
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
50+
wrapped_key: The decrypted ('wrapped', in bytes) AES-256 key to use. This key
51+
should be encrypted using the Cloud KMS key specified by key_name.
52+
alphabet: The set of characters to replace sensitive ones with. For
53+
more information, see https://cloud.google.com/dlp/docs/reference/
54+
rest/v2/projects.deidentifyTemplates#ffxcommonnativealphabet
55+
"""
56+
57+
# Instantiate a client.
58+
dlp = google.cloud.dlp_v2.DlpServiceClient()
59+
60+
# Construct the `table`. For more details on the table schema, please see
61+
# https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
62+
headers = [{"name": val} for val in table_header]
63+
rows = []
64+
for row in table_rows:
65+
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
66+
67+
table = {"headers": headers, "rows": rows}
68+
69+
# Construct the `item` for table.
70+
item = {"table": table}
71+
72+
# Specify fields to be de-identified.
73+
deid_field_names = [{"name": _i} for _i in deid_field_names]
74+
75+
# Construct FPE configuration dictionary
76+
crypto_replace_ffx_fpe_config = {
77+
"crypto_key": {
78+
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name},
79+
},
80+
"common_alphabet": alphabet,
81+
}
82+
83+
# Construct deidentify configuration dictionary
84+
deidentify_config = {
85+
"record_transformations": {
86+
"field_transformations": [
87+
{
88+
"primitive_transformation": {
89+
"crypto_replace_ffx_fpe_config": crypto_replace_ffx_fpe_config
90+
},
91+
"fields": deid_field_names,
92+
}
93+
]
94+
}
95+
}
96+
97+
# Convert the project id into a full resource id.
98+
parent = f"projects/{project}"
99+
100+
# Call the API.
101+
response = dlp.deidentify_content(
102+
request={
103+
"parent": parent,
104+
"deidentify_config": deidentify_config,
105+
"item": item
106+
})
107+
108+
# Print out results.
109+
print(f"Table after de-identification: {response.item.table}")
110+
111+
112+
# [END dlp_deidentify_table_fpe]
113+
114+
115+
# [START dlp_reidentify_table_fpe]
116+
from typing import List # noqa: F811, E402, I100
117+
118+
import google.cloud.dlp # noqa: F811, E402
119+
120+
121+
def reidentify_table_with_fpe(
122+
project: str,
123+
table_header: List[str],
124+
table_rows: List[List[str]],
125+
reid_field_names: List[str],
126+
key_name: str = None,
127+
wrapped_key: bytes = None,
128+
alphabet: str = None,
129+
) -> None:
130+
"""Uses the Data Loss Prevention API to re-identify sensitive data in a
131+
table that was encrypted by Format Preserving Encryption (FPE).
132+
133+
Args:
134+
project: The Google Cloud project id to use as a parent resource.
135+
table_header: List of strings representing table field names.
136+
table_rows: List of rows representing table data.
137+
reid_field_names: A list of fields in table to re-identify.
138+
key_name: The name of the Cloud KMS key used to encrypt ('wrap') the
139+
AES-256 key. Example:
140+
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
141+
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
142+
wrapped_key: The decrypted ('wrapped', in bytes) AES-256 key to use. This key
143+
should be encrypted using the Cloud KMS key specified by key_name.
144+
alphabet: The set of characters to replace sensitive ones with. For
145+
more information, see https://cloud.google.com/dlp/docs/reference/
146+
rest/v2/projects.deidentifyTemplates#ffxcommonnativealphabet
147+
"""
148+
149+
# Instantiate a client.
150+
dlp = google.cloud.dlp_v2.DlpServiceClient()
151+
152+
# Construct the `table`. For more details on the table schema, please see
153+
# https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table
154+
headers = [{"name": val} for val in table_header]
155+
rows = []
156+
for row in table_rows:
157+
rows.append({"values": [{"string_value": cell_val} for cell_val in row]})
158+
table = {"headers": headers, "rows": rows}
159+
160+
# Convert table to `item`
161+
item = {"table": table}
162+
163+
# Specify fields to be re-identified/decrypted.
164+
reid_field_names = [{"name": _i} for _i in reid_field_names]
165+
166+
# Construct FPE configuration dictionary
167+
crypto_replace_ffx_fpe_config = {
168+
"crypto_key": {
169+
"kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}
170+
},
171+
"common_alphabet": alphabet,
172+
}
173+
174+
# Construct reidentify configuration dictionary
175+
reidentify_config = {
176+
"record_transformations": {
177+
"field_transformations": [
178+
{
179+
"primitive_transformation": {
180+
"crypto_replace_ffx_fpe_config": crypto_replace_ffx_fpe_config,
181+
},
182+
"fields": reid_field_names,
183+
}
184+
]
185+
}
186+
}
187+
188+
# Convert the project id into a full resource id.
189+
parent = f"projects/{project}"
190+
191+
# Call the API.
192+
response = dlp.reidentify_content(
193+
request={
194+
"parent": parent,
195+
"reidentify_config": reidentify_config,
196+
"item": item,
197+
})
198+
199+
# Print out results.
200+
print("Table after re-identification: {}".format(response.item.table))
201+
202+
203+
# [END dlp_reidentify_table_fpe]
204+
205+
if __name__ == "__main__":
206+
parser = argparse.ArgumentParser(description=__doc__)
207+
subparsers = parser.add_subparsers(
208+
dest="content", help="Select how to submit content to the API."
209+
)
210+
subparsers.required = True
211+
212+
table_fpe_parser = subparsers.add_parser(
213+
"deid_table_fpe",
214+
help="Deidentify sensitive data in a string using Format Preserving "
215+
"Encryption (FPE).",
216+
)
217+
table_fpe_parser.add_argument(
218+
"project",
219+
help="The Google Cloud project id to use as a parent resource.",
220+
)
221+
table_fpe_parser.add_argument(
222+
"table_header",
223+
help="List of strings representing table field names.",
224+
)
225+
table_fpe_parser.add_argument(
226+
"table_rows",
227+
help="List of rows representing table data",
228+
)
229+
table_fpe_parser.add_argument(
230+
"deid_field_names",
231+
help="A list of fields in table to de-identify.",
232+
)
233+
table_fpe_parser.add_argument(
234+
"key_name",
235+
help="The name of the Cloud KMS key used to encrypt ('wrap') the "
236+
"AES-256 key. Example: "
237+
"key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/"
238+
"keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'",
239+
)
240+
table_fpe_parser.add_argument(
241+
"wrapped_key",
242+
help="The encrypted ('wrapped') AES-256 key to use. This key should "
243+
"be encrypted using the Cloud KMS key specified by key_name.",
244+
)
245+
table_fpe_parser.add_argument(
246+
"-a",
247+
"--alphabet",
248+
default="ALPHA_NUMERIC",
249+
help="The set of characters to replace sensitive ones with. Commonly "
250+
'used subsets of the alphabet include "NUMERIC", "HEXADECIMAL", '
251+
'"UPPER_CASE_ALPHA_NUMERIC", "ALPHA_NUMERIC", '
252+
'"FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED"',
253+
)
254+
255+
reid_table_fpe_parser = subparsers.add_parser(
256+
"reid_table_fpe",
257+
help="Re-identify sensitive data in a table using Format Preserving "
258+
"Encryption (FPE).",
259+
)
260+
reid_table_fpe_parser.add_argument(
261+
"project",
262+
help="The Google Cloud project id to use as a parent resource.",
263+
)
264+
reid_table_fpe_parser.add_argument(
265+
"table_header",
266+
help="List of strings representing table field names.",
267+
)
268+
reid_table_fpe_parser.add_argument(
269+
"table_rows",
270+
help="List of rows representing table data",
271+
)
272+
reid_table_fpe_parser.add_argument(
273+
"reid_field_names",
274+
help="A list of fields in table to re-identify.",
275+
)
276+
reid_table_fpe_parser.add_argument(
277+
"key_name",
278+
help="The name of the Cloud KMS key used to encrypt ('wrap') the "
279+
"AES-256 key. Example: "
280+
"key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/"
281+
"keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'",
282+
)
283+
reid_table_fpe_parser.add_argument(
284+
"wrapped_key",
285+
help="The encrypted ('wrapped') AES-256 key to use. This key should "
286+
"be encrypted using the Cloud KMS key specified by key_name.",
287+
)
288+
reid_table_fpe_parser.add_argument(
289+
"-a",
290+
"--alphabet",
291+
default="ALPHA_NUMERIC",
292+
help="The set of characters to replace sensitive ones with. Commonly "
293+
'used subsets of the alphabet include "NUMERIC", "HEXADECIMAL", '
294+
'"UPPER_CASE_ALPHA_NUMERIC", "ALPHA_NUMERIC", '
295+
'"FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED"',
296+
)
297+
298+
args = parser.parse_args()
299+
300+
if args.content == "deid_table_fpe":
301+
deidentify_table_with_fpe(
302+
args.project,
303+
args.table_header,
304+
args.table_rows,
305+
args.deid_field_names,
306+
wrapped_key=base64.b64decode(args.wrapped_key),
307+
key_name=args.key_name,
308+
alphabet=args.alphabet,
309+
)
310+
elif args.content == "reid_table_fpe":
311+
reidentify_table_with_fpe(
312+
args.project,
313+
args.table_header,
314+
args.table_rows,
315+
args.reid_field_names,
316+
wrapped_key=base64.b64decode(args.wrapped_key),
317+
key_name=args.key_name,
318+
alphabet=args.alphabet,
319+
)

0 commit comments

Comments
 (0)