@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146
146
self .LayerNorm = tf .keras .layers .LayerNormalization (epsilon = config .layer_norm_eps , name = "LayerNorm" )
147
147
self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
148
148
149
- def build (self , input_shape = None ):
149
+ def build (self , input_shape : tf . TensorShape ):
150
150
with tf .name_scope ("word_embeddings" ):
151
151
self .weight = self .add_weight (
152
152
name = "weight" ,
@@ -168,14 +168,7 @@ def build(self, input_shape=None):
168
168
initializer = get_initializer (self .initializer_range ),
169
169
)
170
170
171
-
172
- if self .built :
173
- return
174
- self .built = True
175
- if getattr (self , "LayerNorm" , None ) is not None :
176
- with tf .name_scope (self .LayerNorm .name ):
177
- self .LayerNorm .build ([None , None , self .config .embedding_size ])
178
-
171
+ super ().build (input_shape )
179
172
180
173
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
181
174
def call (
@@ -253,7 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
253
246
# Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
254
247
self .attention_dropout = tf .keras .layers .Dropout (rate = config .attention_probs_dropout_prob )
255
248
self .output_dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
256
- self .config = config
257
249
258
250
def transpose_for_scores (self , tensor : tf .Tensor , batch_size : int ) -> tf .Tensor :
259
251
# Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -314,25 +306,6 @@ def call(
314
306
outputs = (attention_output ,) + self_outputs [1 :]
315
307
316
308
return outputs
317
- def build (self , input_shape = None ):
318
- if self .built :
319
- return
320
- self .built = True
321
- if getattr (self , "query" , None ) is not None :
322
- with tf .name_scope (self .query .name ):
323
- self .query .build (self .config .hidden_size )
324
- if getattr (self , "key" , None ) is not None :
325
- with tf .name_scope (self .key .name ):
326
- self .key .build (self .config .hidden_size )
327
- if getattr (self , "value" , None ) is not None :
328
- with tf .name_scope (self .value .name ):
329
- self .value .build (self .config .hidden_size )
330
- if getattr (self , "dense" , None ) is not None :
331
- with tf .name_scope (self .dense .name ):
332
- self .dense .build (self .config .hidden_size )
333
- if getattr (self , "LayerNorm" , None ) is not None :
334
- with tf .name_scope (self .LayerNorm .name ):
335
- self .LayerNorm .build ([None , None , self .config .hidden_size ])
336
309
337
310
338
311
class TFAlbertLayer (tf .keras .layers .Layer ):
@@ -356,7 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
356
329
epsilon = config .layer_norm_eps , name = "full_layer_layer_norm"
357
330
)
358
331
self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
359
- self .config = config
360
332
361
333
def call (
362
334
self ,
@@ -383,22 +355,6 @@ def call(
383
355
outputs = (hidden_states ,) + attention_outputs [1 :]
384
356
385
357
return outputs
386
- def build (self , input_shape = None ):
387
- if self .built :
388
- return
389
- self .built = True
390
- if getattr (self , "attention" , None ) is not None :
391
- with tf .name_scope (self .attention .name ):
392
- self .attention .build (None )
393
- if getattr (self , "ffn" , None ) is not None :
394
- with tf .name_scope (self .ffn .name ):
395
- self .ffn .build (self .config .hidden_size )
396
- if getattr (self , "ffn_output" , None ) is not None :
397
- with tf .name_scope (self .ffn_output .name ):
398
- self .ffn_output .build (self .config .intermediate_size )
399
- if getattr (self , "full_layer_layer_norm" , None ) is not None :
400
- with tf .name_scope (self .full_layer_layer_norm .name ):
401
- self .full_layer_layer_norm .build ([None , None , self .config .hidden_size ])
402
358
403
359
404
360
class TFAlbertLayerGroup (tf .keras .layers .Layer ):
@@ -442,14 +398,6 @@ def call(
442
398
layer_hidden_states = layer_hidden_states + (hidden_states ,)
443
399
444
400
return tuple (v for v in [hidden_states , layer_hidden_states , layer_attentions ] if v is not None )
445
- def build (self , input_shape = None ):
446
- if self .built :
447
- return
448
- self .built = True
449
- if getattr (self , "albert_layers" , None ) is not None :
450
- for layer in self .albert_layers :
451
- with tf .name_scope (layer .name ):
452
- layer .build (None )
453
401
454
402
455
403
class TFAlbertTransformer (tf .keras .layers .Layer ):
@@ -468,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
468
416
self .albert_layer_groups = [
469
417
TFAlbertLayerGroup (config , name = f"albert_layer_groups_._{ i } " ) for i in range (config .num_hidden_groups )
470
418
]
471
- self .config = config
472
419
473
420
def call (
474
421
self ,
@@ -509,17 +456,6 @@ def call(
509
456
return TFBaseModelOutput (
510
457
last_hidden_state = hidden_states , hidden_states = all_hidden_states , attentions = all_attentions
511
458
)
512
- def build (self , input_shape = None ):
513
- if self .built :
514
- return
515
- self .built = True
516
- if getattr (self , "embedding_hidden_mapping_in" , None ) is not None :
517
- with tf .name_scope (self .embedding_hidden_mapping_in .name ):
518
- self .embedding_hidden_mapping_in .build (self .config .embedding_size )
519
- if getattr (self , "albert_layer_groups" , None ) is not None :
520
- for layer in self .albert_layer_groups :
521
- with tf .name_scope (layer .name ):
522
- layer .build (None )
523
459
524
460
525
461
class TFAlbertPreTrainedModel (TFPreTrainedModel ):
@@ -552,23 +488,13 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
552
488
# an output-only bias for each token.
553
489
self .decoder = input_embeddings
554
490
555
- def build (self , input_shape = None ):
491
+ def build (self , input_shape : tf . TensorShape ):
556
492
self .bias = self .add_weight (shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "bias" )
557
493
self .decoder_bias = self .add_weight (
558
494
shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "decoder/bias"
559
495
)
560
496
561
-
562
- if self .built :
563
- return
564
- self .built = True
565
- if getattr (self , "dense" , None ) is not None :
566
- with tf .name_scope (self .dense .name ):
567
- self .dense .build (self .config .hidden_size )
568
- if getattr (self , "LayerNorm" , None ) is not None :
569
- with tf .name_scope (self .LayerNorm .name ):
570
- self .LayerNorm .build ([None , None , self .config .embedding_size ])
571
-
497
+ super ().build (input_shape )
572
498
573
499
def get_output_embeddings (self ) -> tf .keras .layers .Layer :
574
500
return self .decoder
@@ -723,19 +649,6 @@ def call(
723
649
hidden_states = encoder_outputs .hidden_states ,
724
650
attentions = encoder_outputs .attentions ,
725
651
)
726
- def build (self , input_shape = None ):
727
- if self .built :
728
- return
729
- self .built = True
730
- if getattr (self , "embeddings" , None ) is not None :
731
- with tf .name_scope (self .embeddings .name ):
732
- self .embeddings .build (None )
733
- if getattr (self , "encoder" , None ) is not None :
734
- with tf .name_scope (self .encoder .name ):
735
- self .encoder .build (None )
736
- if getattr (self , "pooler" , None ) is not None :
737
- with tf .name_scope (self .pooler .name ):
738
- self .pooler .build (None ) # TODO Matt might be wrong
739
652
740
653
741
654
@dataclass
@@ -911,13 +824,6 @@ def call(
911
824
)
912
825
913
826
return outputs
914
- def build (self , input_shape = None ):
915
- if self .built :
916
- return
917
- self .built = True
918
- if getattr (self , "albert" , None ) is not None :
919
- with tf .name_scope (self .albert .name ):
920
- self .albert .build (None )
921
827
922
828
923
829
@add_start_docstrings (
@@ -1014,19 +920,6 @@ def call(
1014
920
hidden_states = outputs .hidden_states ,
1015
921
attentions = outputs .attentions ,
1016
922
)
1017
- def build (self , input_shape = None ):
1018
- if self .built :
1019
- return
1020
- self .built = True
1021
- if getattr (self , "albert" , None ) is not None :
1022
- with tf .name_scope (self .albert .name ):
1023
- self .albert .build (None )
1024
- if getattr (self , "predictions" , None ) is not None :
1025
- with tf .name_scope (self .predictions .name ):
1026
- self .predictions .build (None )
1027
- if getattr (self , "sop_classifier" , None ) is not None :
1028
- with tf .name_scope (self .sop_classifier .name ):
1029
- self .sop_classifier .build (None )
1030
923
1031
924
1032
925
class TFAlbertSOPHead (tf .keras .layers .Layer ):
@@ -1039,20 +932,12 @@ def __init__(self, config: AlbertConfig, **kwargs):
1039
932
kernel_initializer = get_initializer (config .initializer_range ),
1040
933
name = "classifier" ,
1041
934
)
1042
- self .config = config
1043
935
1044
936
def call (self , pooled_output : tf .Tensor , training : bool ) -> tf .Tensor :
1045
937
dropout_pooled_output = self .dropout (inputs = pooled_output , training = training )
1046
938
logits = self .classifier (inputs = dropout_pooled_output )
1047
939
1048
940
return logits
1049
- def build (self , input_shape = None ):
1050
- if self .built :
1051
- return
1052
- self .built = True
1053
- if getattr (self , "classifier" , None ) is not None :
1054
- with tf .name_scope (self .classifier .name ):
1055
- self .classifier .build (self .config .hidden_size )
1056
941
1057
942
1058
943
@add_start_docstrings ("""Albert Model with a `language modeling` head on top.""" , ALBERT_START_DOCSTRING )
@@ -1149,16 +1034,6 @@ def call(
1149
1034
hidden_states = outputs .hidden_states ,
1150
1035
attentions = outputs .attentions ,
1151
1036
)
1152
- def build (self , input_shape = None ):
1153
- if self .built :
1154
- return
1155
- self .built = True
1156
- if getattr (self , "albert" , None ) is not None :
1157
- with tf .name_scope (self .albert .name ):
1158
- self .albert .build (None )
1159
- if getattr (self , "predictions" , None ) is not None :
1160
- with tf .name_scope (self .predictions .name ):
1161
- self .predictions .build (None )
1162
1037
1163
1038
1164
1039
@add_start_docstrings (
@@ -1183,7 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1183
1058
self .classifier = tf .keras .layers .Dense (
1184
1059
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1185
1060
)
1186
- self .config = config
1187
1061
1188
1062
@unpack_inputs
1189
1063
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1242,16 +1116,6 @@ def call(
1242
1116
hidden_states = outputs .hidden_states ,
1243
1117
attentions = outputs .attentions ,
1244
1118
)
1245
- def build (self , input_shape = None ):
1246
- if self .built :
1247
- return
1248
- self .built = True
1249
- if getattr (self , "albert" , None ) is not None :
1250
- with tf .name_scope (self .albert .name ):
1251
- self .albert .build (None )
1252
- if getattr (self , "classifier" , None ) is not None :
1253
- with tf .name_scope (self .classifier .name ):
1254
- self .classifier .build (self .config .hidden_size )
1255
1119
1256
1120
1257
1121
@add_start_docstrings (
@@ -1281,7 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1281
1145
self .classifier = tf .keras .layers .Dense (
1282
1146
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1283
1147
)
1284
- self .config = config
1285
1148
1286
1149
@unpack_inputs
1287
1150
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1336,16 +1199,6 @@ def call(
1336
1199
hidden_states = outputs .hidden_states ,
1337
1200
attentions = outputs .attentions ,
1338
1201
)
1339
- def build (self , input_shape = None ):
1340
- if self .built :
1341
- return
1342
- self .built = True
1343
- if getattr (self , "albert" , None ) is not None :
1344
- with tf .name_scope (self .albert .name ):
1345
- self .albert .build (None )
1346
- if getattr (self , "classifier" , None ) is not None :
1347
- with tf .name_scope (self .classifier .name ):
1348
- self .classifier .build (self .config .hidden_size )
1349
1202
1350
1203
1351
1204
@add_start_docstrings (
@@ -1368,7 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1368
1221
self .qa_outputs = tf .keras .layers .Dense (
1369
1222
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "qa_outputs"
1370
1223
)
1371
- self .config = config
1372
1224
1373
1225
@unpack_inputs
1374
1226
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1442,16 +1294,6 @@ def call(
1442
1294
hidden_states = outputs .hidden_states ,
1443
1295
attentions = outputs .attentions ,
1444
1296
)
1445
- def build (self , input_shape = None ):
1446
- if self .built :
1447
- return
1448
- self .built = True
1449
- if getattr (self , "albert" , None ) is not None :
1450
- with tf .name_scope (self .albert .name ):
1451
- self .albert .build (None )
1452
- if getattr (self , "qa_outputs" , None ) is not None :
1453
- with tf .name_scope (self .qa_outputs .name ):
1454
- self .qa_outputs .build (self .config .hidden_size )
1455
1297
1456
1298
1457
1299
@add_start_docstrings (
@@ -1474,7 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1474
1316
self .classifier = tf .keras .layers .Dense (
1475
1317
units = 1 , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1476
1318
)
1477
- self .config = config
1478
1319
1479
1320
@unpack_inputs
1480
1321
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, num_choices, sequence_length" ))
@@ -1553,13 +1394,3 @@ def call(
1553
1394
hidden_states = outputs .hidden_states ,
1554
1395
attentions = outputs .attentions ,
1555
1396
)
1556
- def build (self , input_shape = None ):
1557
- if self .built :
1558
- return
1559
- self .built = True
1560
- if getattr (self , "albert" , None ) is not None :
1561
- with tf .name_scope (self .albert .name ):
1562
- self .albert .build (None )
1563
- if getattr (self , "classifier" , None ) is not None :
1564
- with tf .name_scope (self .classifier .name ):
1565
- self .classifier .build (self .config .hidden_size )
0 commit comments