Skip to content

Commit b85e78e

Browse files
committed
Revert "Attempt #3"
This reverts commit b9df7a0.
1 parent f226cb8 commit b85e78e

File tree

66 files changed

+178
-10434
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+178
-10434
lines changed

src/transformers/models/albert/modeling_tf_albert.py

+4-191
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146146
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
147147
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
148148

149-
def build(self, input_shape=None):
149+
def build(self, input_shape: tf.TensorShape):
150150
with tf.name_scope("word_embeddings"):
151151
self.weight = self.add_weight(
152152
name="weight",
@@ -168,12 +168,7 @@ def build(self, input_shape=None):
168168
initializer=get_initializer(self.initializer_range),
169169
)
170170

171-
if self.built:
172-
return
173-
self.built = True
174-
if getattr(self, "LayerNorm", None) is not None:
175-
with tf.name_scope(self.LayerNorm.name):
176-
self.LayerNorm.build([None, None, self.config.embedding_size])
171+
super().build(input_shape)
177172

178173
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
179174
def call(
@@ -251,8 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
251246
# Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
252247
self.attention_dropout = tf.keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
253248
self.output_dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
254-
self.hidden_size = config.hidden_size
255-
self.config = config
256249

257250
def transpose_for_scores(self, tensor: tf.Tensor, batch_size: int) -> tf.Tensor:
258251
# Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -314,26 +307,6 @@ def call(
314307

315308
return outputs
316309

317-
def build(self, input_shape=None):
318-
if self.built:
319-
return
320-
self.built = True
321-
if getattr(self, "query", None) is not None:
322-
with tf.name_scope(self.query.name):
323-
self.query.build(self.config.hidden_size)
324-
if getattr(self, "key", None) is not None:
325-
with tf.name_scope(self.key.name):
326-
self.key.build(self.config.hidden_size)
327-
if getattr(self, "value", None) is not None:
328-
with tf.name_scope(self.value.name):
329-
self.value.build(self.config.hidden_size)
330-
if getattr(self, "dense", None) is not None:
331-
with tf.name_scope(self.dense.name):
332-
self.dense.build(self.config.hidden_size)
333-
if getattr(self, "LayerNorm", None) is not None:
334-
with tf.name_scope(self.LayerNorm.name):
335-
self.LayerNorm.build([None, None, self.config.hidden_size])
336-
337310

338311
class TFAlbertLayer(tf.keras.layers.Layer):
339312
def __init__(self, config: AlbertConfig, **kwargs):
@@ -356,9 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
356329
epsilon=config.layer_norm_eps, name="full_layer_layer_norm"
357330
)
358331
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
359-
self.intermediate_size = config.intermediate_size
360-
self.hidden_size = config.hidden_size
361-
self.config = config
362332

363333
def call(
364334
self,
@@ -386,23 +356,6 @@ def call(
386356

387357
return outputs
388358

389-
def build(self, input_shape=None):
390-
if self.built:
391-
return
392-
self.built = True
393-
if getattr(self, "attention", None) is not None:
394-
with tf.name_scope(self.attention.name):
395-
self.attention.build(None)
396-
if getattr(self, "ffn", None) is not None:
397-
with tf.name_scope(self.ffn.name):
398-
self.ffn.build(self.config.hidden_size)
399-
if getattr(self, "ffn_output", None) is not None:
400-
with tf.name_scope(self.ffn_output.name):
401-
self.ffn_output.build(self.config.intermediate_size)
402-
if getattr(self, "full_layer_layer_norm", None) is not None:
403-
with tf.name_scope(self.full_layer_layer_norm.name):
404-
self.full_layer_layer_norm.build([None, None, self.config.hidden_size])
405-
406359

407360
class TFAlbertLayerGroup(tf.keras.layers.Layer):
408361
def __init__(self, config: AlbertConfig, **kwargs):
@@ -446,15 +399,6 @@ def call(
446399

447400
return tuple(v for v in [hidden_states, layer_hidden_states, layer_attentions] if v is not None)
448401

449-
def build(self, input_shape=None):
450-
if self.built:
451-
return
452-
self.built = True
453-
if getattr(self, "albert_layers", None) is not None:
454-
for layer in self.albert_layers:
455-
with tf.name_scope(layer.name):
456-
layer.build(None)
457-
458402

459403
class TFAlbertTransformer(tf.keras.layers.Layer):
460404
def __init__(self, config: AlbertConfig, **kwargs):
@@ -472,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
472416
self.albert_layer_groups = [
473417
TFAlbertLayerGroup(config, name=f"albert_layer_groups_._{i}") for i in range(config.num_hidden_groups)
474418
]
475-
self.config = config
476419

477420
def call(
478421
self,
@@ -514,18 +457,6 @@ def call(
514457
last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions
515458
)
516459

517-
def build(self, input_shape=None):
518-
if self.built:
519-
return
520-
self.built = True
521-
if getattr(self, "embedding_hidden_mapping_in", None) is not None:
522-
with tf.name_scope(self.embedding_hidden_mapping_in.name):
523-
self.embedding_hidden_mapping_in.build(self.config.embedding_size)
524-
if getattr(self, "albert_layer_groups", None) is not None:
525-
for layer in self.albert_layer_groups:
526-
with tf.name_scope(layer.name):
527-
layer.build(None)
528-
529460

530461
class TFAlbertPreTrainedModel(TFPreTrainedModel):
531462
"""
@@ -556,23 +487,14 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
556487
# The output weights are the same as the input embeddings, but there is
557488
# an output-only bias for each token.
558489
self.decoder = input_embeddings
559-
self.hidden_size = config.hidden_size
560490

561-
def build(self, input_shape=None):
491+
def build(self, input_shape: tf.TensorShape):
562492
self.bias = self.add_weight(shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="bias")
563493
self.decoder_bias = self.add_weight(
564494
shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="decoder/bias"
565495
)
566496

567-
if self.built:
568-
return
569-
self.built = True
570-
if getattr(self, "dense", None) is not None:
571-
with tf.name_scope(self.dense.name):
572-
self.dense.build(self.config.hidden_size)
573-
if getattr(self, "LayerNorm", None) is not None:
574-
with tf.name_scope(self.LayerNorm.name):
575-
self.LayerNorm.build([None, None, self.config.embedding_size])
497+
super().build(input_shape)
576498

577499
def get_output_embeddings(self) -> tf.keras.layers.Layer:
578500
return self.decoder
@@ -728,20 +650,6 @@ def call(
728650
attentions=encoder_outputs.attentions,
729651
)
730652

731-
def build(self, input_shape=None):
732-
if self.built:
733-
return
734-
self.built = True
735-
if getattr(self, "embeddings", None) is not None:
736-
with tf.name_scope(self.embeddings.name):
737-
self.embeddings.build(None)
738-
if getattr(self, "encoder", None) is not None:
739-
with tf.name_scope(self.encoder.name):
740-
self.encoder.build(None)
741-
if getattr(self, "pooler", None) is not None:
742-
with tf.name_scope(self.pooler.name):
743-
self.pooler.build(None) # TODO Matt might be wrong
744-
745653

746654
@dataclass
747655
class TFAlbertForPreTrainingOutput(ModelOutput):
@@ -917,14 +825,6 @@ def call(
917825

918826
return outputs
919827

920-
def build(self, input_shape=None):
921-
if self.built:
922-
return
923-
self.built = True
924-
if getattr(self, "albert", None) is not None:
925-
with tf.name_scope(self.albert.name):
926-
self.albert.build(None)
927-
928828

929829
@add_start_docstrings(
930830
"""
@@ -1021,20 +921,6 @@ def call(
1021921
attentions=outputs.attentions,
1022922
)
1023923

1024-
def build(self, input_shape=None):
1025-
if self.built:
1026-
return
1027-
self.built = True
1028-
if getattr(self, "albert", None) is not None:
1029-
with tf.name_scope(self.albert.name):
1030-
self.albert.build(None)
1031-
if getattr(self, "predictions", None) is not None:
1032-
with tf.name_scope(self.predictions.name):
1033-
self.predictions.build(None)
1034-
if getattr(self, "sop_classifier", None) is not None:
1035-
with tf.name_scope(self.sop_classifier.name):
1036-
self.sop_classifier.build(None)
1037-
1038924

1039925
class TFAlbertSOPHead(tf.keras.layers.Layer):
1040926
def __init__(self, config: AlbertConfig, **kwargs):
@@ -1046,23 +932,13 @@ def __init__(self, config: AlbertConfig, **kwargs):
1046932
kernel_initializer=get_initializer(config.initializer_range),
1047933
name="classifier",
1048934
)
1049-
self.hidden_size = config.hidden_size
1050-
self.config = config
1051935

1052936
def call(self, pooled_output: tf.Tensor, training: bool) -> tf.Tensor:
1053937
dropout_pooled_output = self.dropout(inputs=pooled_output, training=training)
1054938
logits = self.classifier(inputs=dropout_pooled_output)
1055939

1056940
return logits
1057941

1058-
def build(self, input_shape=None):
1059-
if self.built:
1060-
return
1061-
self.built = True
1062-
if getattr(self, "classifier", None) is not None:
1063-
with tf.name_scope(self.classifier.name):
1064-
self.classifier.build(self.config.hidden_size)
1065-
1066942

1067943
@add_start_docstrings("""Albert Model with a `language modeling` head on top.""", ALBERT_START_DOCSTRING)
1068944
class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss):
@@ -1159,17 +1035,6 @@ def call(
11591035
attentions=outputs.attentions,
11601036
)
11611037

1162-
def build(self, input_shape=None):
1163-
if self.built:
1164-
return
1165-
self.built = True
1166-
if getattr(self, "albert", None) is not None:
1167-
with tf.name_scope(self.albert.name):
1168-
self.albert.build(None)
1169-
if getattr(self, "predictions", None) is not None:
1170-
with tf.name_scope(self.predictions.name):
1171-
self.predictions.build(None)
1172-
11731038

11741039
@add_start_docstrings(
11751040
"""
@@ -1193,8 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
11931058
self.classifier = tf.keras.layers.Dense(
11941059
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
11951060
)
1196-
self.hidden_size = config.hidden_size
1197-
self.config = config
11981061

11991062
@unpack_inputs
12001063
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1254,17 +1117,6 @@ def call(
12541117
attentions=outputs.attentions,
12551118
)
12561119

1257-
def build(self, input_shape=None):
1258-
if self.built:
1259-
return
1260-
self.built = True
1261-
if getattr(self, "albert", None) is not None:
1262-
with tf.name_scope(self.albert.name):
1263-
self.albert.build(None)
1264-
if getattr(self, "classifier", None) is not None:
1265-
with tf.name_scope(self.classifier.name):
1266-
self.classifier.build(self.config.hidden_size)
1267-
12681120

12691121
@add_start_docstrings(
12701122
"""
@@ -1293,8 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
12931145
self.classifier = tf.keras.layers.Dense(
12941146
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
12951147
)
1296-
self.hidden_size = config.hidden_size
1297-
self.config = config
12981148

12991149
@unpack_inputs
13001150
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1350,17 +1200,6 @@ def call(
13501200
attentions=outputs.attentions,
13511201
)
13521202

1353-
def build(self, input_shape=None):
1354-
if self.built:
1355-
return
1356-
self.built = True
1357-
if getattr(self, "albert", None) is not None:
1358-
with tf.name_scope(self.albert.name):
1359-
self.albert.build(None)
1360-
if getattr(self, "classifier", None) is not None:
1361-
with tf.name_scope(self.classifier.name):
1362-
self.classifier.build(self.config.hidden_size)
1363-
13641203

13651204
@add_start_docstrings(
13661205
"""
@@ -1382,8 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
13821221
self.qa_outputs = tf.keras.layers.Dense(
13831222
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
13841223
)
1385-
self.hidden_size = config.hidden_size
1386-
self.config = config
13871224

13881225
@unpack_inputs
13891226
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1458,17 +1295,6 @@ def call(
14581295
attentions=outputs.attentions,
14591296
)
14601297

1461-
def build(self, input_shape=None):
1462-
if self.built:
1463-
return
1464-
self.built = True
1465-
if getattr(self, "albert", None) is not None:
1466-
with tf.name_scope(self.albert.name):
1467-
self.albert.build(None)
1468-
if getattr(self, "qa_outputs", None) is not None:
1469-
with tf.name_scope(self.qa_outputs.name):
1470-
self.qa_outputs.build(self.config.hidden_size)
1471-
14721298

14731299
@add_start_docstrings(
14741300
"""
@@ -1490,8 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
14901316
self.classifier = tf.keras.layers.Dense(
14911317
units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
14921318
)
1493-
self.hidden_size = config.hidden_size
1494-
self.config = config
14951319

14961320
@unpack_inputs
14971321
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@@ -1570,14 +1394,3 @@ def call(
15701394
hidden_states=outputs.hidden_states,
15711395
attentions=outputs.attentions,
15721396
)
1573-
1574-
def build(self, input_shape=None):
1575-
if self.built:
1576-
return
1577-
self.built = True
1578-
if getattr(self, "albert", None) is not None:
1579-
with tf.name_scope(self.albert.name):
1580-
self.albert.build(None)
1581-
if getattr(self, "classifier", None) is not None:
1582-
with tf.name_scope(self.classifier.name):
1583-
self.classifier.build(self.config.hidden_size)

0 commit comments

Comments
 (0)