@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146146 self .LayerNorm = tf .keras .layers .LayerNormalization (epsilon = config .layer_norm_eps , name = "LayerNorm" )
147147 self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
148148
149- def build (self , input_shape = None ):
149+ def build (self , input_shape : tf . TensorShape ):
150150 with tf .name_scope ("word_embeddings" ):
151151 self .weight = self .add_weight (
152152 name = "weight" ,
@@ -168,12 +168,7 @@ def build(self, input_shape=None):
168168 initializer = get_initializer (self .initializer_range ),
169169 )
170170
171- if self .built :
172- return
173- self .built = True
174- if getattr (self , "LayerNorm" , None ) is not None :
175- with tf .name_scope (self .LayerNorm .name ):
176- self .LayerNorm .build ([None , None , self .config .embedding_size ])
171+ super ().build (input_shape )
177172
178173 # Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
179174 def call (
@@ -251,7 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
251246 # Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
252247 self .attention_dropout = tf .keras .layers .Dropout (rate = config .attention_probs_dropout_prob )
253248 self .output_dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
254- self .config = config
255249
256250 def transpose_for_scores (self , tensor : tf .Tensor , batch_size : int ) -> tf .Tensor :
257251 # Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -313,26 +307,6 @@ def call(
313307
314308 return outputs
315309
316- def build (self , input_shape = None ):
317- if self .built :
318- return
319- self .built = True
320- if getattr (self , "query" , None ) is not None :
321- with tf .name_scope (self .query .name ):
322- self .query .build (self .config .hidden_size )
323- if getattr (self , "key" , None ) is not None :
324- with tf .name_scope (self .key .name ):
325- self .key .build (self .config .hidden_size )
326- if getattr (self , "value" , None ) is not None :
327- with tf .name_scope (self .value .name ):
328- self .value .build (self .config .hidden_size )
329- if getattr (self , "dense" , None ) is not None :
330- with tf .name_scope (self .dense .name ):
331- self .dense .build (self .config .hidden_size )
332- if getattr (self , "LayerNorm" , None ) is not None :
333- with tf .name_scope (self .LayerNorm .name ):
334- self .LayerNorm .build ([None , None , self .config .hidden_size ])
335-
336310
337311class TFAlbertLayer (tf .keras .layers .Layer ):
338312 def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -355,7 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
355329 epsilon = config .layer_norm_eps , name = "full_layer_layer_norm"
356330 )
357331 self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
358- self .config = config
359332
360333 def call (
361334 self ,
@@ -383,23 +356,6 @@ def call(
383356
384357 return outputs
385358
386- def build (self , input_shape = None ):
387- if self .built :
388- return
389- self .built = True
390- if getattr (self , "attention" , None ) is not None :
391- with tf .name_scope (self .attention .name ):
392- self .attention .build (None )
393- if getattr (self , "ffn" , None ) is not None :
394- with tf .name_scope (self .ffn .name ):
395- self .ffn .build (self .config .hidden_size )
396- if getattr (self , "ffn_output" , None ) is not None :
397- with tf .name_scope (self .ffn_output .name ):
398- self .ffn_output .build (self .config .intermediate_size )
399- if getattr (self , "full_layer_layer_norm" , None ) is not None :
400- with tf .name_scope (self .full_layer_layer_norm .name ):
401- self .full_layer_layer_norm .build ([None , None , self .config .hidden_size ])
402-
403359
404360class TFAlbertLayerGroup (tf .keras .layers .Layer ):
405361 def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -443,15 +399,6 @@ def call(
443399
444400 return tuple (v for v in [hidden_states , layer_hidden_states , layer_attentions ] if v is not None )
445401
446- def build (self , input_shape = None ):
447- if self .built :
448- return
449- self .built = True
450- if getattr (self , "albert_layers" , None ) is not None :
451- for layer in self .albert_layers :
452- with tf .name_scope (layer .name ):
453- layer .build (None )
454-
455402
456403class TFAlbertTransformer (tf .keras .layers .Layer ):
457404 def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -469,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
469416 self .albert_layer_groups = [
470417 TFAlbertLayerGroup (config , name = f"albert_layer_groups_._{ i } " ) for i in range (config .num_hidden_groups )
471418 ]
472- self .config = config
473419
474420 def call (
475421 self ,
@@ -511,18 +457,6 @@ def call(
511457 last_hidden_state = hidden_states , hidden_states = all_hidden_states , attentions = all_attentions
512458 )
513459
514- def build (self , input_shape = None ):
515- if self .built :
516- return
517- self .built = True
518- if getattr (self , "embedding_hidden_mapping_in" , None ) is not None :
519- with tf .name_scope (self .embedding_hidden_mapping_in .name ):
520- self .embedding_hidden_mapping_in .build (self .config .embedding_size )
521- if getattr (self , "albert_layer_groups" , None ) is not None :
522- for layer in self .albert_layer_groups :
523- with tf .name_scope (layer .name ):
524- layer .build (None )
525-
526460
527461class TFAlbertPreTrainedModel (TFPreTrainedModel ):
528462 """
@@ -554,21 +488,13 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
554488 # an output-only bias for each token.
555489 self .decoder = input_embeddings
556490
557- def build (self , input_shape = None ):
491+ def build (self , input_shape : tf . TensorShape ):
558492 self .bias = self .add_weight (shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "bias" )
559493 self .decoder_bias = self .add_weight (
560494 shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "decoder/bias"
561495 )
562496
563- if self .built :
564- return
565- self .built = True
566- if getattr (self , "dense" , None ) is not None :
567- with tf .name_scope (self .dense .name ):
568- self .dense .build (self .config .hidden_size )
569- if getattr (self , "LayerNorm" , None ) is not None :
570- with tf .name_scope (self .LayerNorm .name ):
571- self .LayerNorm .build ([None , None , self .config .embedding_size ])
497+ super ().build (input_shape )
572498
573499 def get_output_embeddings (self ) -> tf .keras .layers .Layer :
574500 return self .decoder
@@ -724,20 +650,6 @@ def call(
724650 attentions = encoder_outputs .attentions ,
725651 )
726652
727- def build (self , input_shape = None ):
728- if self .built :
729- return
730- self .built = True
731- if getattr (self , "embeddings" , None ) is not None :
732- with tf .name_scope (self .embeddings .name ):
733- self .embeddings .build (None )
734- if getattr (self , "encoder" , None ) is not None :
735- with tf .name_scope (self .encoder .name ):
736- self .encoder .build (None )
737- if getattr (self , "pooler" , None ) is not None :
738- with tf .name_scope (self .pooler .name ):
739- self .pooler .build (None ) # TODO Matt might be wrong
740-
741653
742654@dataclass
743655class TFAlbertForPreTrainingOutput (ModelOutput ):
@@ -913,14 +825,6 @@ def call(
913825
914826 return outputs
915827
916- def build (self , input_shape = None ):
917- if self .built :
918- return
919- self .built = True
920- if getattr (self , "albert" , None ) is not None :
921- with tf .name_scope (self .albert .name ):
922- self .albert .build (None )
923-
924828
925829@add_start_docstrings (
926830 """
@@ -1017,20 +921,6 @@ def call(
1017921 attentions = outputs .attentions ,
1018922 )
1019923
1020- def build (self , input_shape = None ):
1021- if self .built :
1022- return
1023- self .built = True
1024- if getattr (self , "albert" , None ) is not None :
1025- with tf .name_scope (self .albert .name ):
1026- self .albert .build (None )
1027- if getattr (self , "predictions" , None ) is not None :
1028- with tf .name_scope (self .predictions .name ):
1029- self .predictions .build (None )
1030- if getattr (self , "sop_classifier" , None ) is not None :
1031- with tf .name_scope (self .sop_classifier .name ):
1032- self .sop_classifier .build (None )
1033-
1034924
1035925class TFAlbertSOPHead (tf .keras .layers .Layer ):
1036926 def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -1042,22 +932,13 @@ def __init__(self, config: AlbertConfig, **kwargs):
1042932 kernel_initializer = get_initializer (config .initializer_range ),
1043933 name = "classifier" ,
1044934 )
1045- self .config = config
1046935
1047936 def call (self , pooled_output : tf .Tensor , training : bool ) -> tf .Tensor :
1048937 dropout_pooled_output = self .dropout (inputs = pooled_output , training = training )
1049938 logits = self .classifier (inputs = dropout_pooled_output )
1050939
1051940 return logits
1052941
1053- def build (self , input_shape = None ):
1054- if self .built :
1055- return
1056- self .built = True
1057- if getattr (self , "classifier" , None ) is not None :
1058- with tf .name_scope (self .classifier .name ):
1059- self .classifier .build (self .config .hidden_size )
1060-
1061942
1062943@add_start_docstrings ("""Albert Model with a `language modeling` head on top.""" , ALBERT_START_DOCSTRING )
1063944class TFAlbertForMaskedLM (TFAlbertPreTrainedModel , TFMaskedLanguageModelingLoss ):
@@ -1154,17 +1035,6 @@ def call(
11541035 attentions = outputs .attentions ,
11551036 )
11561037
1157- def build (self , input_shape = None ):
1158- if self .built :
1159- return
1160- self .built = True
1161- if getattr (self , "albert" , None ) is not None :
1162- with tf .name_scope (self .albert .name ):
1163- self .albert .build (None )
1164- if getattr (self , "predictions" , None ) is not None :
1165- with tf .name_scope (self .predictions .name ):
1166- self .predictions .build (None )
1167-
11681038
11691039@add_start_docstrings (
11701040 """
@@ -1188,7 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
11881058 self .classifier = tf .keras .layers .Dense (
11891059 units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
11901060 )
1191- self .config = config
11921061
11931062 @unpack_inputs
11941063 @add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1248,17 +1117,6 @@ def call(
12481117 attentions = outputs .attentions ,
12491118 )
12501119
1251- def build (self , input_shape = None ):
1252- if self .built :
1253- return
1254- self .built = True
1255- if getattr (self , "albert" , None ) is not None :
1256- with tf .name_scope (self .albert .name ):
1257- self .albert .build (None )
1258- if getattr (self , "classifier" , None ) is not None :
1259- with tf .name_scope (self .classifier .name ):
1260- self .classifier .build (self .config .hidden_size )
1261-
12621120
12631121@add_start_docstrings (
12641122 """
@@ -1287,7 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
12871145 self .classifier = tf .keras .layers .Dense (
12881146 units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
12891147 )
1290- self .config = config
12911148
12921149 @unpack_inputs
12931150 @add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1343,17 +1200,6 @@ def call(
13431200 attentions = outputs .attentions ,
13441201 )
13451202
1346- def build (self , input_shape = None ):
1347- if self .built :
1348- return
1349- self .built = True
1350- if getattr (self , "albert" , None ) is not None :
1351- with tf .name_scope (self .albert .name ):
1352- self .albert .build (None )
1353- if getattr (self , "classifier" , None ) is not None :
1354- with tf .name_scope (self .classifier .name ):
1355- self .classifier .build (self .config .hidden_size )
1356-
13571203
13581204@add_start_docstrings (
13591205 """
@@ -1375,7 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
13751221 self .qa_outputs = tf .keras .layers .Dense (
13761222 units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "qa_outputs"
13771223 )
1378- self .config = config
13791224
13801225 @unpack_inputs
13811226 @add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1450,17 +1295,6 @@ def call(
14501295 attentions = outputs .attentions ,
14511296 )
14521297
1453- def build (self , input_shape = None ):
1454- if self .built :
1455- return
1456- self .built = True
1457- if getattr (self , "albert" , None ) is not None :
1458- with tf .name_scope (self .albert .name ):
1459- self .albert .build (None )
1460- if getattr (self , "qa_outputs" , None ) is not None :
1461- with tf .name_scope (self .qa_outputs .name ):
1462- self .qa_outputs .build (self .config .hidden_size )
1463-
14641298
14651299@add_start_docstrings (
14661300 """
@@ -1482,7 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
14821316 self .classifier = tf .keras .layers .Dense (
14831317 units = 1 , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
14841318 )
1485- self .config = config
14861319
14871320 @unpack_inputs
14881321 @add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, num_choices, sequence_length" ))
@@ -1561,14 +1394,3 @@ def call(
15611394 hidden_states = outputs .hidden_states ,
15621395 attentions = outputs .attentions ,
15631396 )
1564-
1565- def build (self , input_shape = None ):
1566- if self .built :
1567- return
1568- self .built = True
1569- if getattr (self , "albert" , None ) is not None :
1570- with tf .name_scope (self .albert .name ):
1571- self .albert .build (None )
1572- if getattr (self , "classifier" , None ) is not None :
1573- with tf .name_scope (self .classifier .name ):
1574- self .classifier .build (self .config .hidden_size )
0 commit comments