Skip to content

Commit 8e0cb06

Browse files
committed
Revert "Attempt seven - snag list is steadily decreasing"
This reverts commit 46fbd97.
1 parent 46fbd97 commit 8e0cb06

File tree

64 files changed

+152
-10170
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+152
-10170
lines changed

src/transformers/models/albert/modeling_tf_albert.py

+4-182
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146146
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
147147
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
148148

149-
def build(self, input_shape=None):
149+
def build(self, input_shape: tf.TensorShape):
150150
with tf.name_scope("word_embeddings"):
151151
self.weight = self.add_weight(
152152
name="weight",
@@ -168,12 +168,7 @@ def build(self, input_shape=None):
168168
initializer=get_initializer(self.initializer_range),
169169
)
170170

171-
if self.built:
172-
return
173-
self.built = True
174-
if getattr(self, "LayerNorm", None) is not None:
175-
with tf.name_scope(self.LayerNorm.name):
176-
self.LayerNorm.build([None, None, self.config.embedding_size])
171+
super().build(input_shape)
177172

178173
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
179174
def call(
@@ -251,7 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
251246
# Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
252247
self.attention_dropout = tf.keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
253248
self.output_dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
254-
self.config = config
255249

256250
def transpose_for_scores(self, tensor: tf.Tensor, batch_size: int) -> tf.Tensor:
257251
# Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -313,26 +307,6 @@ def call(
313307

314308
return outputs
315309

316-
def build(self, input_shape=None):
317-
if self.built:
318-
return
319-
self.built = True
320-
if getattr(self, "query", None) is not None:
321-
with tf.name_scope(self.query.name):
322-
self.query.build(self.config.hidden_size)
323-
if getattr(self, "key", None) is not None:
324-
with tf.name_scope(self.key.name):
325-
self.key.build(self.config.hidden_size)
326-
if getattr(self, "value", None) is not None:
327-
with tf.name_scope(self.value.name):
328-
self.value.build(self.config.hidden_size)
329-
if getattr(self, "dense", None) is not None:
330-
with tf.name_scope(self.dense.name):
331-
self.dense.build(self.config.hidden_size)
332-
if getattr(self, "LayerNorm", None) is not None:
333-
with tf.name_scope(self.LayerNorm.name):
334-
self.LayerNorm.build([None, None, self.config.hidden_size])
335-
336310

337311
class TFAlbertLayer(tf.keras.layers.Layer):
338312
def __init__(self, config: AlbertConfig, **kwargs):
@@ -355,7 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
355329
epsilon=config.layer_norm_eps, name="full_layer_layer_norm"
356330
)
357331
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
358-
self.config = config
359332

360333
def call(
361334
self,
@@ -383,23 +356,6 @@ def call(
383356

384357
return outputs
385358

386-
def build(self, input_shape=None):
387-
if self.built:
388-
return
389-
self.built = True
390-
if getattr(self, "attention", None) is not None:
391-
with tf.name_scope(self.attention.name):
392-
self.attention.build(None)
393-
if getattr(self, "ffn", None) is not None:
394-
with tf.name_scope(self.ffn.name):
395-
self.ffn.build(self.config.hidden_size)
396-
if getattr(self, "ffn_output", None) is not None:
397-
with tf.name_scope(self.ffn_output.name):
398-
self.ffn_output.build(self.config.intermediate_size)
399-
if getattr(self, "full_layer_layer_norm", None) is not None:
400-
with tf.name_scope(self.full_layer_layer_norm.name):
401-
self.full_layer_layer_norm.build([None, None, self.config.hidden_size])
402-
403359

404360
class TFAlbertLayerGroup(tf.keras.layers.Layer):
405361
def __init__(self, config: AlbertConfig, **kwargs):
@@ -443,15 +399,6 @@ def call(
443399

444400
return tuple(v for v in [hidden_states, layer_hidden_states, layer_attentions] if v is not None)
445401

446-
def build(self, input_shape=None):
447-
if self.built:
448-
return
449-
self.built = True
450-
if getattr(self, "albert_layers", None) is not None:
451-
for layer in self.albert_layers:
452-
with tf.name_scope(layer.name):
453-
layer.build(None)
454-
455402

456403
class TFAlbertTransformer(tf.keras.layers.Layer):
457404
def __init__(self, config: AlbertConfig, **kwargs):
@@ -469,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
469416
self.albert_layer_groups = [
470417
TFAlbertLayerGroup(config, name=f"albert_layer_groups_._{i}") for i in range(config.num_hidden_groups)
471418
]
472-
self.config = config
473419

474420
def call(
475421
self,
@@ -511,18 +457,6 @@ def call(
511457
last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions
512458
)
513459

514-
def build(self, input_shape=None):
515-
if self.built:
516-
return
517-
self.built = True
518-
if getattr(self, "embedding_hidden_mapping_in", None) is not None:
519-
with tf.name_scope(self.embedding_hidden_mapping_in.name):
520-
self.embedding_hidden_mapping_in.build(self.config.embedding_size)
521-
if getattr(self, "albert_layer_groups", None) is not None:
522-
for layer in self.albert_layer_groups:
523-
with tf.name_scope(layer.name):
524-
layer.build(None)
525-
526460

527461
class TFAlbertPreTrainedModel(TFPreTrainedModel):
528462
"""
@@ -554,21 +488,13 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
554488
# an output-only bias for each token.
555489
self.decoder = input_embeddings
556490

557-
def build(self, input_shape=None):
491+
def build(self, input_shape: tf.TensorShape):
558492
self.bias = self.add_weight(shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="bias")
559493
self.decoder_bias = self.add_weight(
560494
shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="decoder/bias"
561495
)
562496

563-
if self.built:
564-
return
565-
self.built = True
566-
if getattr(self, "dense", None) is not None:
567-
with tf.name_scope(self.dense.name):
568-
self.dense.build(self.config.hidden_size)
569-
if getattr(self, "LayerNorm", None) is not None:
570-
with tf.name_scope(self.LayerNorm.name):
571-
self.LayerNorm.build([None, None, self.config.embedding_size])
497+
super().build(input_shape)
572498

573499
def get_output_embeddings(self) -> tf.keras.layers.Layer:
574500
return self.decoder
@@ -724,20 +650,6 @@ def call(
724650
attentions=encoder_outputs.attentions,
725651
)
726652

727-
def build(self, input_shape=None):
728-
if self.built:
729-
return
730-
self.built = True
731-
if getattr(self, "embeddings", None) is not None:
732-
with tf.name_scope(self.embeddings.name):
733-
self.embeddings.build(None)
734-
if getattr(self, "encoder", None) is not None:
735-
with tf.name_scope(self.encoder.name):
736-
self.encoder.build(None)
737-
if getattr(self, "pooler", None) is not None:
738-
with tf.name_scope(self.pooler.name):
739-
self.pooler.build(None) # TODO Matt might be wrong
740-
741653

742654
@dataclass
743655
class TFAlbertForPreTrainingOutput(ModelOutput):
@@ -913,14 +825,6 @@ def call(
913825

914826
return outputs
915827

916-
def build(self, input_shape=None):
917-
if self.built:
918-
return
919-
self.built = True
920-
if getattr(self, "albert", None) is not None:
921-
with tf.name_scope(self.albert.name):
922-
self.albert.build(None)
923-
924828

925829
@add_start_docstrings(
926830
"""
@@ -1017,20 +921,6 @@ def call(
1017921
attentions=outputs.attentions,
1018922
)
1019923

1020-
def build(self, input_shape=None):
1021-
if self.built:
1022-
return
1023-
self.built = True
1024-
if getattr(self, "albert", None) is not None:
1025-
with tf.name_scope(self.albert.name):
1026-
self.albert.build(None)
1027-
if getattr(self, "predictions", None) is not None:
1028-
with tf.name_scope(self.predictions.name):
1029-
self.predictions.build(None)
1030-
if getattr(self, "sop_classifier", None) is not None:
1031-
with tf.name_scope(self.sop_classifier.name):
1032-
self.sop_classifier.build(None)
1033-
1034924

1035925
class TFAlbertSOPHead(tf.keras.layers.Layer):
1036926
def __init__(self, config: AlbertConfig, **kwargs):
@@ -1042,22 +932,13 @@ def __init__(self, config: AlbertConfig, **kwargs):
1042932
kernel_initializer=get_initializer(config.initializer_range),
1043933
name="classifier",
1044934
)
1045-
self.config = config
1046935

1047936
def call(self, pooled_output: tf.Tensor, training: bool) -> tf.Tensor:
1048937
dropout_pooled_output = self.dropout(inputs=pooled_output, training=training)
1049938
logits = self.classifier(inputs=dropout_pooled_output)
1050939

1051940
return logits
1052941

1053-
def build(self, input_shape=None):
1054-
if self.built:
1055-
return
1056-
self.built = True
1057-
if getattr(self, "classifier", None) is not None:
1058-
with tf.name_scope(self.classifier.name):
1059-
self.classifier.build(self.config.hidden_size)
1060-
1061942

1062943
@add_start_docstrings("""Albert Model with a `language modeling` head on top.""", ALBERT_START_DOCSTRING)
1063944
class TFAlbertForMaskedLM(TFAlbertPreTrainedModel, TFMaskedLanguageModelingLoss):
@@ -1154,17 +1035,6 @@ def call(
11541035
attentions=outputs.attentions,
11551036
)
11561037

1157-
def build(self, input_shape=None):
1158-
if self.built:
1159-
return
1160-
self.built = True
1161-
if getattr(self, "albert", None) is not None:
1162-
with tf.name_scope(self.albert.name):
1163-
self.albert.build(None)
1164-
if getattr(self, "predictions", None) is not None:
1165-
with tf.name_scope(self.predictions.name):
1166-
self.predictions.build(None)
1167-
11681038

11691039
@add_start_docstrings(
11701040
"""
@@ -1188,7 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
11881058
self.classifier = tf.keras.layers.Dense(
11891059
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
11901060
)
1191-
self.config = config
11921061

11931062
@unpack_inputs
11941063
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1248,17 +1117,6 @@ def call(
12481117
attentions=outputs.attentions,
12491118
)
12501119

1251-
def build(self, input_shape=None):
1252-
if self.built:
1253-
return
1254-
self.built = True
1255-
if getattr(self, "albert", None) is not None:
1256-
with tf.name_scope(self.albert.name):
1257-
self.albert.build(None)
1258-
if getattr(self, "classifier", None) is not None:
1259-
with tf.name_scope(self.classifier.name):
1260-
self.classifier.build(self.config.hidden_size)
1261-
12621120

12631121
@add_start_docstrings(
12641122
"""
@@ -1287,7 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
12871145
self.classifier = tf.keras.layers.Dense(
12881146
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
12891147
)
1290-
self.config = config
12911148

12921149
@unpack_inputs
12931150
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1343,17 +1200,6 @@ def call(
13431200
attentions=outputs.attentions,
13441201
)
13451202

1346-
def build(self, input_shape=None):
1347-
if self.built:
1348-
return
1349-
self.built = True
1350-
if getattr(self, "albert", None) is not None:
1351-
with tf.name_scope(self.albert.name):
1352-
self.albert.build(None)
1353-
if getattr(self, "classifier", None) is not None:
1354-
with tf.name_scope(self.classifier.name):
1355-
self.classifier.build(self.config.hidden_size)
1356-
13571203

13581204
@add_start_docstrings(
13591205
"""
@@ -1375,7 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
13751221
self.qa_outputs = tf.keras.layers.Dense(
13761222
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
13771223
)
1378-
self.config = config
13791224

13801225
@unpack_inputs
13811226
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1450,17 +1295,6 @@ def call(
14501295
attentions=outputs.attentions,
14511296
)
14521297

1453-
def build(self, input_shape=None):
1454-
if self.built:
1455-
return
1456-
self.built = True
1457-
if getattr(self, "albert", None) is not None:
1458-
with tf.name_scope(self.albert.name):
1459-
self.albert.build(None)
1460-
if getattr(self, "qa_outputs", None) is not None:
1461-
with tf.name_scope(self.qa_outputs.name):
1462-
self.qa_outputs.build(self.config.hidden_size)
1463-
14641298

14651299
@add_start_docstrings(
14661300
"""
@@ -1482,7 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
14821316
self.classifier = tf.keras.layers.Dense(
14831317
units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
14841318
)
1485-
self.config = config
14861319

14871320
@unpack_inputs
14881321
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@@ -1561,14 +1394,3 @@ def call(
15611394
hidden_states=outputs.hidden_states,
15621395
attentions=outputs.attentions,
15631396
)
1564-
1565-
def build(self, input_shape=None):
1566-
if self.built:
1567-
return
1568-
self.built = True
1569-
if getattr(self, "albert", None) is not None:
1570-
with tf.name_scope(self.albert.name):
1571-
self.albert.build(None)
1572-
if getattr(self, "classifier", None) is not None:
1573-
with tf.name_scope(self.classifier.name):
1574-
self.classifier.build(self.config.hidden_size)

0 commit comments

Comments
 (0)