2121
2222# [START dlp_deidentify_masking]
2323def deidentify_with_mask (
24- project , string , info_types , masking_character = None , number_to_mask = 0
24+ project , input_str , info_types , masking_character = None , number_to_mask = 0
2525):
2626 """Uses the Data Loss Prevention API to deidentify sensitive data in a
2727 string by masking it with a character.
2828 Args:
2929 project: The Google Cloud project id to use as a parent resource.
30- item : The string to deidentify (will be treated as text).
30+ input_str : The string to deidentify (will be treated as text).
3131 masking_character: The character to mask matching sensitive data with.
3232 number_to_mask: The maximum number of sensitive characters to mask in
3333 a match. If omitted or set to zero, the API will default to no
@@ -67,7 +67,7 @@ def deidentify_with_mask(
6767 }
6868
6969 # Construct item
70- item = {"value" : string }
70+ item = {"value" : input_str }
7171
7272 # Call the API
7373 response = dlp .deidentify_content (
@@ -83,11 +83,76 @@ def deidentify_with_mask(
8383
8484# [END dlp_deidentify_masking]
8585
86+ # [START dlp_deidentify_replace]
87+ def deidentify_with_replace (
88+ project ,
89+ input_str ,
90+ info_types ,
91+ replacement_str = "REPLACEMENT_STR" ,
92+ ):
93+ """Uses the Data Loss Prevention API to deidentify sensitive data in a
94+ string by replacing matched input values with a value you specify.
95+ Args:
96+ project: The Google Cloud project id to use as a parent resource.
97+ input_str: The string to deidentify (will be treated as text).
98+ info_types: A list of strings representing info types to look for.
99+ replacement_str: The string to replace all values that match given
100+ info types.
101+ Returns:
102+ None; the response from the API is printed to the terminal.
103+ """
104+ import google .cloud .dlp
105+
106+ # Instantiate a client
107+ dlp = google .cloud .dlp_v2 .DlpServiceClient ()
108+
109+ # Convert the project id into a full resource id.
110+ parent = dlp .project_path (project )
111+
112+ # Construct inspect configuration dictionary
113+ inspect_config = {
114+ "info_types" : [{"name" : info_type } for info_type in info_types ]
115+ }
116+
117+ # Construct deidentify configuration dictionary
118+ deidentify_config = {
119+ "info_type_transformations" : {
120+ "transformations" : [
121+ {
122+ "primitive_transformation" : {
123+ "replace_config" : {
124+ "new_value" : {
125+ "string_value" : replacement_str ,
126+ }
127+ }
128+ }
129+ }
130+ ]
131+ }
132+ }
133+
134+ # Construct item
135+ item = {"value" : input_str }
136+
137+ # Call the API
138+ response = dlp .deidentify_content (
139+ parent ,
140+ inspect_config = inspect_config ,
141+ deidentify_config = deidentify_config ,
142+ item = item ,
143+ )
144+
145+ # Print out the results.
146+ print (response .item .value )
147+
148+ # [END dlp_deidentify_replace]
86149
87150# [START dlp_deidentify_fpe]
151+
152+
88153def deidentify_with_fpe (
89154 project ,
90- string ,
155+ input_str ,
91156 info_types ,
92157 alphabet = None ,
93158 surrogate_type = None ,
@@ -98,7 +163,7 @@ def deidentify_with_fpe(
98163 string using Format Preserving Encryption (FPE).
99164 Args:
100165 project: The Google Cloud project id to use as a parent resource.
101- item : The string to deidentify (will be treated as text).
166+ input_str : The string to deidentify (will be treated as text).
102167 alphabet: The set of characters to replace sensitive ones with. For
103168 more information, see https://cloud.google.com/dlp/docs/reference/
104169 rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
@@ -166,7 +231,7 @@ def deidentify_with_fpe(
166231 }
167232
168233 # Convert string to item
169- item = {"value" : string }
234+ item = {"value" : input_str }
170235
171236 # Call the API
172237 response = dlp .deidentify_content (
@@ -186,7 +251,7 @@ def deidentify_with_fpe(
186251# [START dlp_reidentify_fpe]
187252def reidentify_with_fpe (
188253 project ,
189- string ,
254+ input_str ,
190255 alphabet = None ,
191256 surrogate_type = None ,
192257 key_name = None ,
@@ -196,7 +261,7 @@ def reidentify_with_fpe(
196261 string that was encrypted by Format Preserving Encryption (FPE).
197262 Args:
198263 project: The Google Cloud project id to use as a parent resource.
199- item : The string to deidentify (will be treated as text).
264+ input_str : The string to deidentify (will be treated as text).
200265 alphabet: The set of characters to replace sensitive ones with. For
201266 more information, see https://cloud.google.com/dlp/docs/reference/
202267 rest/v2beta2/organizations.deidentifyTemplates#ffxcommonnativealphabet
@@ -255,7 +320,7 @@ def reidentify_with_fpe(
255320 }
256321
257322 # Convert string to item
258- item = {"value" : string }
323+ item = {"value" : input_str }
259324
260325 # Call the API
261326 response = dlp .reidentify_content (
@@ -531,6 +596,28 @@ def redact_sensitive_data(project, item, info_types):
531596 help = "The character to mask matching sensitive data with." ,
532597 )
533598
599+ replace_parser = subparsers .add_parser (
600+ "deid_replace" ,
601+ help = "Deidentify sensitive data in a string by replacing it with "
602+ "another string." ,
603+ )
604+ replace_parser .add_argument (
605+ "--info_types" ,
606+ nargs = "+" ,
607+ help = "Strings representing info types to look for. A full list of "
608+ "info categories and types is available from the API. Examples "
609+ 'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
610+ "If unspecified, the three above examples will be used." ,
611+ default = ["FIRST_NAME" , "LAST_NAME" , "EMAIL_ADDRESS" ],
612+ )
613+ replace_parser .add_argument (
614+ "project" ,
615+ help = "The Google Cloud project id to use as a parent resource." ,
616+ )
617+ replace_parser .add_argument ("item" , help = "The string to deidentify." )
618+ replace_parser .add_argument ("replacement_str" , help = "The string to "
619+ "replace all matched values with." )
620+
534621 fpe_parser = subparsers .add_parser (
535622 "deid_fpe" ,
536623 help = "Deidentify sensitive data in a string using Format Preserving "
@@ -715,6 +802,13 @@ def redact_sensitive_data(project, item, info_types):
715802 masking_character = args .masking_character ,
716803 number_to_mask = args .number_to_mask ,
717804 )
805+ elif args .content == "deid_replace" :
806+ deidentify_with_replace (
807+ args .project ,
808+ args .item ,
809+ args .info_types ,
810+ replacement_str = args .replacement_str ,
811+ )
718812 elif args .content == "deid_fpe" :
719813 deidentify_with_fpe (
720814 args .project ,
0 commit comments