|
| 1 | +# Copyright 2023 Google LLC |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, software |
| 10 | +# distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +"""Uses of the Data Loss Prevention API for de-identifying sensitive data |
| 16 | +contained in table.""" |
| 17 | + |
| 18 | +from __future__ import annotations |
| 19 | + |
| 20 | +import argparse |
| 21 | +import base64 |
| 22 | + |
| 23 | +# [START dlp_deidentify_table_fpe] |
| 24 | +from typing import List # noqa: F811, E402, I100 |
| 25 | + |
| 26 | +import google.cloud.dlp # noqa: F811, E402 |
| 27 | + |
| 28 | + |
| 29 | +def deidentify_table_with_fpe( |
| 30 | + project: str, |
| 31 | + table_header: List[str], |
| 32 | + table_rows: List[List[str]], |
| 33 | + deid_field_names: List[str], |
| 34 | + key_name: str = None, |
| 35 | + wrapped_key: bytes = None, |
| 36 | + alphabet: str = None, |
| 37 | +) -> None: |
| 38 | + """Uses the Data Loss Prevention API to de-identify sensitive data in a |
| 39 | + table while maintaining format. |
| 40 | +
|
| 41 | + Args: |
| 42 | + project: The Google Cloud project id to use as a parent resource. |
| 43 | + table_header: List of strings representing table field names. |
| 44 | + table_rows: List of rows representing table data. |
| 45 | + deid_field_names: A list of fields in table to de-identify. |
| 46 | + key_name: The name of the Cloud KMS key used to encrypt ('wrap') the |
| 47 | + AES-256 key. Example: |
| 48 | + key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/ |
| 49 | + keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME' |
| 50 | + wrapped_key: The decrypted ('wrapped', in bytes) AES-256 key to use. This key |
| 51 | + should be encrypted using the Cloud KMS key specified by key_name. |
| 52 | + alphabet: The set of characters to replace sensitive ones with. For |
| 53 | + more information, see https://cloud.google.com/dlp/docs/reference/ |
| 54 | + rest/v2/projects.deidentifyTemplates#ffxcommonnativealphabet |
| 55 | + """ |
| 56 | + |
| 57 | + # Instantiate a client. |
| 58 | + dlp = google.cloud.dlp_v2.DlpServiceClient() |
| 59 | + |
| 60 | + # Construct the `table`. For more details on the table schema, please see |
| 61 | + # https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table |
| 62 | + headers = [{"name": val} for val in table_header] |
| 63 | + rows = [] |
| 64 | + for row in table_rows: |
| 65 | + rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) |
| 66 | + |
| 67 | + table = {"headers": headers, "rows": rows} |
| 68 | + |
| 69 | + # Construct the `item` for table. |
| 70 | + item = {"table": table} |
| 71 | + |
| 72 | + # Specify fields to be de-identified. |
| 73 | + deid_field_names = [{"name": _i} for _i in deid_field_names] |
| 74 | + |
| 75 | + # Construct FPE configuration dictionary |
| 76 | + crypto_replace_ffx_fpe_config = { |
| 77 | + "crypto_key": { |
| 78 | + "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name}, |
| 79 | + }, |
| 80 | + "common_alphabet": alphabet, |
| 81 | + } |
| 82 | + |
| 83 | + # Construct deidentify configuration dictionary |
| 84 | + deidentify_config = { |
| 85 | + "record_transformations": { |
| 86 | + "field_transformations": [ |
| 87 | + { |
| 88 | + "primitive_transformation": { |
| 89 | + "crypto_replace_ffx_fpe_config": crypto_replace_ffx_fpe_config |
| 90 | + }, |
| 91 | + "fields": deid_field_names, |
| 92 | + } |
| 93 | + ] |
| 94 | + } |
| 95 | + } |
| 96 | + |
| 97 | + # Convert the project id into a full resource id. |
| 98 | + parent = f"projects/{project}" |
| 99 | + |
| 100 | + # Call the API. |
| 101 | + response = dlp.deidentify_content( |
| 102 | + request={ |
| 103 | + "parent": parent, |
| 104 | + "deidentify_config": deidentify_config, |
| 105 | + "item": item |
| 106 | + }) |
| 107 | + |
| 108 | + # Print out results. |
| 109 | + print(f"Table after de-identification: {response.item.table}") |
| 110 | + |
| 111 | + |
| 112 | +# [END dlp_deidentify_table_fpe] |
| 113 | + |
| 114 | + |
| 115 | +# [START dlp_reidentify_table_fpe] |
| 116 | +from typing import List # noqa: F811, E402, I100 |
| 117 | + |
| 118 | +import google.cloud.dlp # noqa: F811, E402 |
| 119 | + |
| 120 | + |
| 121 | +def reidentify_table_with_fpe( |
| 122 | + project: str, |
| 123 | + table_header: List[str], |
| 124 | + table_rows: List[List[str]], |
| 125 | + reid_field_names: List[str], |
| 126 | + key_name: str = None, |
| 127 | + wrapped_key: bytes = None, |
| 128 | + alphabet: str = None, |
| 129 | +) -> None: |
| 130 | + """Uses the Data Loss Prevention API to re-identify sensitive data in a |
| 131 | + table that was encrypted by Format Preserving Encryption (FPE). |
| 132 | +
|
| 133 | + Args: |
| 134 | + project: The Google Cloud project id to use as a parent resource. |
| 135 | + table_header: List of strings representing table field names. |
| 136 | + table_rows: List of rows representing table data. |
| 137 | + reid_field_names: A list of fields in table to re-identify. |
| 138 | + key_name: The name of the Cloud KMS key used to encrypt ('wrap') the |
| 139 | + AES-256 key. Example: |
| 140 | + key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/ |
| 141 | + keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME' |
| 142 | + wrapped_key: The decrypted ('wrapped', in bytes) AES-256 key to use. This key |
| 143 | + should be encrypted using the Cloud KMS key specified by key_name. |
| 144 | + alphabet: The set of characters to replace sensitive ones with. For |
| 145 | + more information, see https://cloud.google.com/dlp/docs/reference/ |
| 146 | + rest/v2/projects.deidentifyTemplates#ffxcommonnativealphabet |
| 147 | + """ |
| 148 | + |
| 149 | + # Instantiate a client. |
| 150 | + dlp = google.cloud.dlp_v2.DlpServiceClient() |
| 151 | + |
| 152 | + # Construct the `table`. For more details on the table schema, please see |
| 153 | + # https://cloud.google.com/dlp/docs/reference/rest/v2/ContentItem#Table |
| 154 | + headers = [{"name": val} for val in table_header] |
| 155 | + rows = [] |
| 156 | + for row in table_rows: |
| 157 | + rows.append({"values": [{"string_value": cell_val} for cell_val in row]}) |
| 158 | + table = {"headers": headers, "rows": rows} |
| 159 | + |
| 160 | + # Convert table to `item` |
| 161 | + item = {"table": table} |
| 162 | + |
| 163 | + # Specify fields to be re-identified/decrypted. |
| 164 | + reid_field_names = [{"name": _i} for _i in reid_field_names] |
| 165 | + |
| 166 | + # Construct FPE configuration dictionary |
| 167 | + crypto_replace_ffx_fpe_config = { |
| 168 | + "crypto_key": { |
| 169 | + "kms_wrapped": {"wrapped_key": wrapped_key, "crypto_key_name": key_name} |
| 170 | + }, |
| 171 | + "common_alphabet": alphabet, |
| 172 | + } |
| 173 | + |
| 174 | + # Construct reidentify configuration dictionary |
| 175 | + reidentify_config = { |
| 176 | + "record_transformations": { |
| 177 | + "field_transformations": [ |
| 178 | + { |
| 179 | + "primitive_transformation": { |
| 180 | + "crypto_replace_ffx_fpe_config": crypto_replace_ffx_fpe_config, |
| 181 | + }, |
| 182 | + "fields": reid_field_names, |
| 183 | + } |
| 184 | + ] |
| 185 | + } |
| 186 | + } |
| 187 | + |
| 188 | + # Convert the project id into a full resource id. |
| 189 | + parent = f"projects/{project}" |
| 190 | + |
| 191 | + # Call the API. |
| 192 | + response = dlp.reidentify_content( |
| 193 | + request={ |
| 194 | + "parent": parent, |
| 195 | + "reidentify_config": reidentify_config, |
| 196 | + "item": item, |
| 197 | + }) |
| 198 | + |
| 199 | + # Print out results. |
| 200 | + print("Table after re-identification: {}".format(response.item.table)) |
| 201 | + |
| 202 | + |
| 203 | +# [END dlp_reidentify_table_fpe] |
| 204 | + |
| 205 | +if __name__ == "__main__": |
| 206 | + parser = argparse.ArgumentParser(description=__doc__) |
| 207 | + subparsers = parser.add_subparsers( |
| 208 | + dest="content", help="Select how to submit content to the API." |
| 209 | + ) |
| 210 | + subparsers.required = True |
| 211 | + |
| 212 | + table_fpe_parser = subparsers.add_parser( |
| 213 | + "deid_table_fpe", |
| 214 | + help="Deidentify sensitive data in a string using Format Preserving " |
| 215 | + "Encryption (FPE).", |
| 216 | + ) |
| 217 | + table_fpe_parser.add_argument( |
| 218 | + "project", |
| 219 | + help="The Google Cloud project id to use as a parent resource.", |
| 220 | + ) |
| 221 | + table_fpe_parser.add_argument( |
| 222 | + "table_header", |
| 223 | + help="List of strings representing table field names.", |
| 224 | + ) |
| 225 | + table_fpe_parser.add_argument( |
| 226 | + "table_rows", |
| 227 | + help="List of rows representing table data", |
| 228 | + ) |
| 229 | + table_fpe_parser.add_argument( |
| 230 | + "deid_field_names", |
| 231 | + help="A list of fields in table to de-identify.", |
| 232 | + ) |
| 233 | + table_fpe_parser.add_argument( |
| 234 | + "key_name", |
| 235 | + help="The name of the Cloud KMS key used to encrypt ('wrap') the " |
| 236 | + "AES-256 key. Example: " |
| 237 | + "key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/" |
| 238 | + "keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'", |
| 239 | + ) |
| 240 | + table_fpe_parser.add_argument( |
| 241 | + "wrapped_key", |
| 242 | + help="The encrypted ('wrapped') AES-256 key to use. This key should " |
| 243 | + "be encrypted using the Cloud KMS key specified by key_name.", |
| 244 | + ) |
| 245 | + table_fpe_parser.add_argument( |
| 246 | + "-a", |
| 247 | + "--alphabet", |
| 248 | + default="ALPHA_NUMERIC", |
| 249 | + help="The set of characters to replace sensitive ones with. Commonly " |
| 250 | + 'used subsets of the alphabet include "NUMERIC", "HEXADECIMAL", ' |
| 251 | + '"UPPER_CASE_ALPHA_NUMERIC", "ALPHA_NUMERIC", ' |
| 252 | + '"FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED"', |
| 253 | + ) |
| 254 | + |
| 255 | + reid_table_fpe_parser = subparsers.add_parser( |
| 256 | + "reid_table_fpe", |
| 257 | + help="Re-identify sensitive data in a table using Format Preserving " |
| 258 | + "Encryption (FPE).", |
| 259 | + ) |
| 260 | + reid_table_fpe_parser.add_argument( |
| 261 | + "project", |
| 262 | + help="The Google Cloud project id to use as a parent resource.", |
| 263 | + ) |
| 264 | + reid_table_fpe_parser.add_argument( |
| 265 | + "table_header", |
| 266 | + help="List of strings representing table field names.", |
| 267 | + ) |
| 268 | + reid_table_fpe_parser.add_argument( |
| 269 | + "table_rows", |
| 270 | + help="List of rows representing table data", |
| 271 | + ) |
| 272 | + reid_table_fpe_parser.add_argument( |
| 273 | + "reid_field_names", |
| 274 | + help="A list of fields in table to re-identify.", |
| 275 | + ) |
| 276 | + reid_table_fpe_parser.add_argument( |
| 277 | + "key_name", |
| 278 | + help="The name of the Cloud KMS key used to encrypt ('wrap') the " |
| 279 | + "AES-256 key. Example: " |
| 280 | + "key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/" |
| 281 | + "keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'", |
| 282 | + ) |
| 283 | + reid_table_fpe_parser.add_argument( |
| 284 | + "wrapped_key", |
| 285 | + help="The encrypted ('wrapped') AES-256 key to use. This key should " |
| 286 | + "be encrypted using the Cloud KMS key specified by key_name.", |
| 287 | + ) |
| 288 | + reid_table_fpe_parser.add_argument( |
| 289 | + "-a", |
| 290 | + "--alphabet", |
| 291 | + default="ALPHA_NUMERIC", |
| 292 | + help="The set of characters to replace sensitive ones with. Commonly " |
| 293 | + 'used subsets of the alphabet include "NUMERIC", "HEXADECIMAL", ' |
| 294 | + '"UPPER_CASE_ALPHA_NUMERIC", "ALPHA_NUMERIC", ' |
| 295 | + '"FFX_COMMON_NATIVE_ALPHABET_UNSPECIFIED"', |
| 296 | + ) |
| 297 | + |
| 298 | + args = parser.parse_args() |
| 299 | + |
| 300 | + if args.content == "deid_table_fpe": |
| 301 | + deidentify_table_with_fpe( |
| 302 | + args.project, |
| 303 | + args.table_header, |
| 304 | + args.table_rows, |
| 305 | + args.deid_field_names, |
| 306 | + wrapped_key=base64.b64decode(args.wrapped_key), |
| 307 | + key_name=args.key_name, |
| 308 | + alphabet=args.alphabet, |
| 309 | + ) |
| 310 | + elif args.content == "reid_table_fpe": |
| 311 | + reidentify_table_with_fpe( |
| 312 | + args.project, |
| 313 | + args.table_header, |
| 314 | + args.table_rows, |
| 315 | + args.reid_field_names, |
| 316 | + wrapped_key=base64.b64decode(args.wrapped_key), |
| 317 | + key_name=args.key_name, |
| 318 | + alphabet=args.alphabet, |
| 319 | + ) |
0 commit comments