@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146
146
self .LayerNorm = tf .keras .layers .LayerNormalization (epsilon = config .layer_norm_eps , name = "LayerNorm" )
147
147
self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
148
148
149
- def build (self , input_shape = None ):
149
+ def build (self , input_shape : tf . TensorShape ):
150
150
with tf .name_scope ("word_embeddings" ):
151
151
self .weight = self .add_weight (
152
152
name = "weight" ,
@@ -168,12 +168,7 @@ def build(self, input_shape=None):
168
168
initializer = get_initializer (self .initializer_range ),
169
169
)
170
170
171
- if self .built :
172
- return
173
- self .built = True
174
- if getattr (self , "LayerNorm" , None ) is not None :
175
- with tf .name_scope (self .LayerNorm .name ):
176
- self .LayerNorm .build ([None , None , self .config .embedding_size ])
171
+ super ().build (input_shape )
177
172
178
173
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
179
174
def call (
@@ -251,7 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
251
246
# Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
252
247
self .attention_dropout = tf .keras .layers .Dropout (rate = config .attention_probs_dropout_prob )
253
248
self .output_dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
254
- self .config = config
255
249
256
250
def transpose_for_scores (self , tensor : tf .Tensor , batch_size : int ) -> tf .Tensor :
257
251
# Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -313,26 +307,6 @@ def call(
313
307
314
308
return outputs
315
309
316
- def build (self , input_shape = None ):
317
- if self .built :
318
- return
319
- self .built = True
320
- if getattr (self , "query" , None ) is not None :
321
- with tf .name_scope (self .query .name ):
322
- self .query .build (self .config .hidden_size )
323
- if getattr (self , "key" , None ) is not None :
324
- with tf .name_scope (self .key .name ):
325
- self .key .build (self .config .hidden_size )
326
- if getattr (self , "value" , None ) is not None :
327
- with tf .name_scope (self .value .name ):
328
- self .value .build (self .config .hidden_size )
329
- if getattr (self , "dense" , None ) is not None :
330
- with tf .name_scope (self .dense .name ):
331
- self .dense .build (self .config .hidden_size )
332
- if getattr (self , "LayerNorm" , None ) is not None :
333
- with tf .name_scope (self .LayerNorm .name ):
334
- self .LayerNorm .build ([None , None , self .config .hidden_size ])
335
-
336
310
337
311
class TFAlbertLayer (tf .keras .layers .Layer ):
338
312
def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -355,7 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
355
329
epsilon = config .layer_norm_eps , name = "full_layer_layer_norm"
356
330
)
357
331
self .dropout = tf .keras .layers .Dropout (rate = config .hidden_dropout_prob )
358
- self .config = config
359
332
360
333
def call (
361
334
self ,
@@ -383,23 +356,6 @@ def call(
383
356
384
357
return outputs
385
358
386
- def build (self , input_shape = None ):
387
- if self .built :
388
- return
389
- self .built = True
390
- if getattr (self , "attention" , None ) is not None :
391
- with tf .name_scope (self .attention .name ):
392
- self .attention .build (None )
393
- if getattr (self , "ffn" , None ) is not None :
394
- with tf .name_scope (self .ffn .name ):
395
- self .ffn .build (self .config .hidden_size )
396
- if getattr (self , "ffn_output" , None ) is not None :
397
- with tf .name_scope (self .ffn_output .name ):
398
- self .ffn_output .build (self .config .intermediate_size )
399
- if getattr (self , "full_layer_layer_norm" , None ) is not None :
400
- with tf .name_scope (self .full_layer_layer_norm .name ):
401
- self .full_layer_layer_norm .build ([None , None , self .config .hidden_size ])
402
-
403
359
404
360
class TFAlbertLayerGroup (tf .keras .layers .Layer ):
405
361
def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -443,15 +399,6 @@ def call(
443
399
444
400
return tuple (v for v in [hidden_states , layer_hidden_states , layer_attentions ] if v is not None )
445
401
446
- def build (self , input_shape = None ):
447
- if self .built :
448
- return
449
- self .built = True
450
- if getattr (self , "albert_layers" , None ) is not None :
451
- for layer in self .albert_layers :
452
- with tf .name_scope (layer .name ):
453
- layer .build (None )
454
-
455
402
456
403
class TFAlbertTransformer (tf .keras .layers .Layer ):
457
404
def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -469,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
469
416
self .albert_layer_groups = [
470
417
TFAlbertLayerGroup (config , name = f"albert_layer_groups_._{ i } " ) for i in range (config .num_hidden_groups )
471
418
]
472
- self .config = config
473
419
474
420
def call (
475
421
self ,
@@ -511,18 +457,6 @@ def call(
511
457
last_hidden_state = hidden_states , hidden_states = all_hidden_states , attentions = all_attentions
512
458
)
513
459
514
- def build (self , input_shape = None ):
515
- if self .built :
516
- return
517
- self .built = True
518
- if getattr (self , "embedding_hidden_mapping_in" , None ) is not None :
519
- with tf .name_scope (self .embedding_hidden_mapping_in .name ):
520
- self .embedding_hidden_mapping_in .build (self .config .embedding_size )
521
- if getattr (self , "albert_layer_groups" , None ) is not None :
522
- for layer in self .albert_layer_groups :
523
- with tf .name_scope (layer .name ):
524
- layer .build (None )
525
-
526
460
527
461
class TFAlbertPreTrainedModel (TFPreTrainedModel ):
528
462
"""
@@ -554,21 +488,13 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
554
488
# an output-only bias for each token.
555
489
self .decoder = input_embeddings
556
490
557
- def build (self , input_shape = None ):
491
+ def build (self , input_shape : tf . TensorShape ):
558
492
self .bias = self .add_weight (shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "bias" )
559
493
self .decoder_bias = self .add_weight (
560
494
shape = (self .config .vocab_size ,), initializer = "zeros" , trainable = True , name = "decoder/bias"
561
495
)
562
496
563
- if self .built :
564
- return
565
- self .built = True
566
- if getattr (self , "dense" , None ) is not None :
567
- with tf .name_scope (self .dense .name ):
568
- self .dense .build (self .config .hidden_size )
569
- if getattr (self , "LayerNorm" , None ) is not None :
570
- with tf .name_scope (self .LayerNorm .name ):
571
- self .LayerNorm .build ([None , None , self .config .embedding_size ])
497
+ super ().build (input_shape )
572
498
573
499
def get_output_embeddings (self ) -> tf .keras .layers .Layer :
574
500
return self .decoder
@@ -724,20 +650,6 @@ def call(
724
650
attentions = encoder_outputs .attentions ,
725
651
)
726
652
727
- def build (self , input_shape = None ):
728
- if self .built :
729
- return
730
- self .built = True
731
- if getattr (self , "embeddings" , None ) is not None :
732
- with tf .name_scope (self .embeddings .name ):
733
- self .embeddings .build (None )
734
- if getattr (self , "encoder" , None ) is not None :
735
- with tf .name_scope (self .encoder .name ):
736
- self .encoder .build (None )
737
- if getattr (self , "pooler" , None ) is not None :
738
- with tf .name_scope (self .pooler .name ):
739
- self .pooler .build (None ) # TODO Matt might be wrong
740
-
741
653
742
654
@dataclass
743
655
class TFAlbertForPreTrainingOutput (ModelOutput ):
@@ -913,14 +825,6 @@ def call(
913
825
914
826
return outputs
915
827
916
- def build (self , input_shape = None ):
917
- if self .built :
918
- return
919
- self .built = True
920
- if getattr (self , "albert" , None ) is not None :
921
- with tf .name_scope (self .albert .name ):
922
- self .albert .build (None )
923
-
924
828
925
829
@add_start_docstrings (
926
830
"""
@@ -1017,20 +921,6 @@ def call(
1017
921
attentions = outputs .attentions ,
1018
922
)
1019
923
1020
- def build (self , input_shape = None ):
1021
- if self .built :
1022
- return
1023
- self .built = True
1024
- if getattr (self , "albert" , None ) is not None :
1025
- with tf .name_scope (self .albert .name ):
1026
- self .albert .build (None )
1027
- if getattr (self , "predictions" , None ) is not None :
1028
- with tf .name_scope (self .predictions .name ):
1029
- self .predictions .build (None )
1030
- if getattr (self , "sop_classifier" , None ) is not None :
1031
- with tf .name_scope (self .sop_classifier .name ):
1032
- self .sop_classifier .build (None )
1033
-
1034
924
1035
925
class TFAlbertSOPHead (tf .keras .layers .Layer ):
1036
926
def __init__ (self , config : AlbertConfig , ** kwargs ):
@@ -1042,22 +932,13 @@ def __init__(self, config: AlbertConfig, **kwargs):
1042
932
kernel_initializer = get_initializer (config .initializer_range ),
1043
933
name = "classifier" ,
1044
934
)
1045
- self .config = config
1046
935
1047
936
def call (self , pooled_output : tf .Tensor , training : bool ) -> tf .Tensor :
1048
937
dropout_pooled_output = self .dropout (inputs = pooled_output , training = training )
1049
938
logits = self .classifier (inputs = dropout_pooled_output )
1050
939
1051
940
return logits
1052
941
1053
- def build (self , input_shape = None ):
1054
- if self .built :
1055
- return
1056
- self .built = True
1057
- if getattr (self , "classifier" , None ) is not None :
1058
- with tf .name_scope (self .classifier .name ):
1059
- self .classifier .build (self .config .hidden_size )
1060
-
1061
942
1062
943
@add_start_docstrings ("""Albert Model with a `language modeling` head on top.""" , ALBERT_START_DOCSTRING )
1063
944
class TFAlbertForMaskedLM (TFAlbertPreTrainedModel , TFMaskedLanguageModelingLoss ):
@@ -1154,17 +1035,6 @@ def call(
1154
1035
attentions = outputs .attentions ,
1155
1036
)
1156
1037
1157
- def build (self , input_shape = None ):
1158
- if self .built :
1159
- return
1160
- self .built = True
1161
- if getattr (self , "albert" , None ) is not None :
1162
- with tf .name_scope (self .albert .name ):
1163
- self .albert .build (None )
1164
- if getattr (self , "predictions" , None ) is not None :
1165
- with tf .name_scope (self .predictions .name ):
1166
- self .predictions .build (None )
1167
-
1168
1038
1169
1039
@add_start_docstrings (
1170
1040
"""
@@ -1188,7 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1188
1058
self .classifier = tf .keras .layers .Dense (
1189
1059
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1190
1060
)
1191
- self .config = config
1192
1061
1193
1062
@unpack_inputs
1194
1063
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1248,17 +1117,6 @@ def call(
1248
1117
attentions = outputs .attentions ,
1249
1118
)
1250
1119
1251
- def build (self , input_shape = None ):
1252
- if self .built :
1253
- return
1254
- self .built = True
1255
- if getattr (self , "albert" , None ) is not None :
1256
- with tf .name_scope (self .albert .name ):
1257
- self .albert .build (None )
1258
- if getattr (self , "classifier" , None ) is not None :
1259
- with tf .name_scope (self .classifier .name ):
1260
- self .classifier .build (self .config .hidden_size )
1261
-
1262
1120
1263
1121
@add_start_docstrings (
1264
1122
"""
@@ -1287,7 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1287
1145
self .classifier = tf .keras .layers .Dense (
1288
1146
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1289
1147
)
1290
- self .config = config
1291
1148
1292
1149
@unpack_inputs
1293
1150
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1343,17 +1200,6 @@ def call(
1343
1200
attentions = outputs .attentions ,
1344
1201
)
1345
1202
1346
- def build (self , input_shape = None ):
1347
- if self .built :
1348
- return
1349
- self .built = True
1350
- if getattr (self , "albert" , None ) is not None :
1351
- with tf .name_scope (self .albert .name ):
1352
- self .albert .build (None )
1353
- if getattr (self , "classifier" , None ) is not None :
1354
- with tf .name_scope (self .classifier .name ):
1355
- self .classifier .build (self .config .hidden_size )
1356
-
1357
1203
1358
1204
@add_start_docstrings (
1359
1205
"""
@@ -1375,7 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1375
1221
self .qa_outputs = tf .keras .layers .Dense (
1376
1222
units = config .num_labels , kernel_initializer = get_initializer (config .initializer_range ), name = "qa_outputs"
1377
1223
)
1378
- self .config = config
1379
1224
1380
1225
@unpack_inputs
1381
1226
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, sequence_length" ))
@@ -1450,17 +1295,6 @@ def call(
1450
1295
attentions = outputs .attentions ,
1451
1296
)
1452
1297
1453
- def build (self , input_shape = None ):
1454
- if self .built :
1455
- return
1456
- self .built = True
1457
- if getattr (self , "albert" , None ) is not None :
1458
- with tf .name_scope (self .albert .name ):
1459
- self .albert .build (None )
1460
- if getattr (self , "qa_outputs" , None ) is not None :
1461
- with tf .name_scope (self .qa_outputs .name ):
1462
- self .qa_outputs .build (self .config .hidden_size )
1463
-
1464
1298
1465
1299
@add_start_docstrings (
1466
1300
"""
@@ -1482,7 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
1482
1316
self .classifier = tf .keras .layers .Dense (
1483
1317
units = 1 , kernel_initializer = get_initializer (config .initializer_range ), name = "classifier"
1484
1318
)
1485
- self .config = config
1486
1319
1487
1320
@unpack_inputs
1488
1321
@add_start_docstrings_to_model_forward (ALBERT_INPUTS_DOCSTRING .format ("batch_size, num_choices, sequence_length" ))
@@ -1561,14 +1394,3 @@ def call(
1561
1394
hidden_states = outputs .hidden_states ,
1562
1395
attentions = outputs .attentions ,
1563
1396
)
1564
-
1565
- def build (self , input_shape = None ):
1566
- if self .built :
1567
- return
1568
- self .built = True
1569
- if getattr (self , "albert" , None ) is not None :
1570
- with tf .name_scope (self .albert .name ):
1571
- self .albert .build (None )
1572
- if getattr (self , "classifier" , None ) is not None :
1573
- with tf .name_scope (self .classifier .name ):
1574
- self .classifier .build (self .config .hidden_size )
0 commit comments