Skip to content

Commit d54b581

Browse files
committed
Revert "Try again with the missing attribute!"
This reverts commit 760c6f3.
1 parent 760c6f3 commit d54b581

File tree

64 files changed

+148
-9279
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

64 files changed

+148
-9279
lines changed

src/transformers/models/albert/modeling_tf_albert.py

+4-173
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ def __init__(self, config: AlbertConfig, **kwargs):
146146
self.LayerNorm = tf.keras.layers.LayerNormalization(epsilon=config.layer_norm_eps, name="LayerNorm")
147147
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
148148

149-
def build(self, input_shape=None):
149+
def build(self, input_shape: tf.TensorShape):
150150
with tf.name_scope("word_embeddings"):
151151
self.weight = self.add_weight(
152152
name="weight",
@@ -168,14 +168,7 @@ def build(self, input_shape=None):
168168
initializer=get_initializer(self.initializer_range),
169169
)
170170

171-
172-
if self.built:
173-
return
174-
self.built = True
175-
if getattr(self, "LayerNorm", None) is not None:
176-
with tf.name_scope(self.LayerNorm.name):
177-
self.LayerNorm.build([None, None, self.config.embedding_size])
178-
171+
super().build(input_shape)
179172

180173
# Copied from transformers.models.bert.modeling_tf_bert.TFBertEmbeddings.call
181174
def call(
@@ -253,7 +246,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
253246
# Two different dropout probabilities; see https://github.com/google-research/albert/blob/master/modeling.py#L971-L993
254247
self.attention_dropout = tf.keras.layers.Dropout(rate=config.attention_probs_dropout_prob)
255248
self.output_dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
256-
self.config = config
257249

258250
def transpose_for_scores(self, tensor: tf.Tensor, batch_size: int) -> tf.Tensor:
259251
# Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
@@ -314,25 +306,6 @@ def call(
314306
outputs = (attention_output,) + self_outputs[1:]
315307

316308
return outputs
317-
def build(self, input_shape=None):
318-
if self.built:
319-
return
320-
self.built = True
321-
if getattr(self, "query", None) is not None:
322-
with tf.name_scope(self.query.name):
323-
self.query.build(self.config.hidden_size)
324-
if getattr(self, "key", None) is not None:
325-
with tf.name_scope(self.key.name):
326-
self.key.build(self.config.hidden_size)
327-
if getattr(self, "value", None) is not None:
328-
with tf.name_scope(self.value.name):
329-
self.value.build(self.config.hidden_size)
330-
if getattr(self, "dense", None) is not None:
331-
with tf.name_scope(self.dense.name):
332-
self.dense.build(self.config.hidden_size)
333-
if getattr(self, "LayerNorm", None) is not None:
334-
with tf.name_scope(self.LayerNorm.name):
335-
self.LayerNorm.build([None, None, self.config.hidden_size])
336309

337310

338311
class TFAlbertLayer(tf.keras.layers.Layer):
@@ -356,7 +329,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
356329
epsilon=config.layer_norm_eps, name="full_layer_layer_norm"
357330
)
358331
self.dropout = tf.keras.layers.Dropout(rate=config.hidden_dropout_prob)
359-
self.config = config
360332

361333
def call(
362334
self,
@@ -383,22 +355,6 @@ def call(
383355
outputs = (hidden_states,) + attention_outputs[1:]
384356

385357
return outputs
386-
def build(self, input_shape=None):
387-
if self.built:
388-
return
389-
self.built = True
390-
if getattr(self, "attention", None) is not None:
391-
with tf.name_scope(self.attention.name):
392-
self.attention.build(None)
393-
if getattr(self, "ffn", None) is not None:
394-
with tf.name_scope(self.ffn.name):
395-
self.ffn.build(self.config.hidden_size)
396-
if getattr(self, "ffn_output", None) is not None:
397-
with tf.name_scope(self.ffn_output.name):
398-
self.ffn_output.build(self.config.intermediate_size)
399-
if getattr(self, "full_layer_layer_norm", None) is not None:
400-
with tf.name_scope(self.full_layer_layer_norm.name):
401-
self.full_layer_layer_norm.build([None, None, self.config.hidden_size])
402358

403359

404360
class TFAlbertLayerGroup(tf.keras.layers.Layer):
@@ -442,14 +398,6 @@ def call(
442398
layer_hidden_states = layer_hidden_states + (hidden_states,)
443399

444400
return tuple(v for v in [hidden_states, layer_hidden_states, layer_attentions] if v is not None)
445-
def build(self, input_shape=None):
446-
if self.built:
447-
return
448-
self.built = True
449-
if getattr(self, "albert_layers", None) is not None:
450-
for layer in self.albert_layers:
451-
with tf.name_scope(layer.name):
452-
layer.build(None)
453401

454402

455403
class TFAlbertTransformer(tf.keras.layers.Layer):
@@ -468,7 +416,6 @@ def __init__(self, config: AlbertConfig, **kwargs):
468416
self.albert_layer_groups = [
469417
TFAlbertLayerGroup(config, name=f"albert_layer_groups_._{i}") for i in range(config.num_hidden_groups)
470418
]
471-
self.config = config
472419

473420
def call(
474421
self,
@@ -509,17 +456,6 @@ def call(
509456
return TFBaseModelOutput(
510457
last_hidden_state=hidden_states, hidden_states=all_hidden_states, attentions=all_attentions
511458
)
512-
def build(self, input_shape=None):
513-
if self.built:
514-
return
515-
self.built = True
516-
if getattr(self, "embedding_hidden_mapping_in", None) is not None:
517-
with tf.name_scope(self.embedding_hidden_mapping_in.name):
518-
self.embedding_hidden_mapping_in.build(self.config.embedding_size)
519-
if getattr(self, "albert_layer_groups", None) is not None:
520-
for layer in self.albert_layer_groups:
521-
with tf.name_scope(layer.name):
522-
layer.build(None)
523459

524460

525461
class TFAlbertPreTrainedModel(TFPreTrainedModel):
@@ -552,23 +488,13 @@ def __init__(self, config: AlbertConfig, input_embeddings: tf.keras.layers.Layer
552488
# an output-only bias for each token.
553489
self.decoder = input_embeddings
554490

555-
def build(self, input_shape=None):
491+
def build(self, input_shape: tf.TensorShape):
556492
self.bias = self.add_weight(shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="bias")
557493
self.decoder_bias = self.add_weight(
558494
shape=(self.config.vocab_size,), initializer="zeros", trainable=True, name="decoder/bias"
559495
)
560496

561-
562-
if self.built:
563-
return
564-
self.built = True
565-
if getattr(self, "dense", None) is not None:
566-
with tf.name_scope(self.dense.name):
567-
self.dense.build(self.config.hidden_size)
568-
if getattr(self, "LayerNorm", None) is not None:
569-
with tf.name_scope(self.LayerNorm.name):
570-
self.LayerNorm.build([None, None, self.config.embedding_size])
571-
497+
super().build(input_shape)
572498

573499
def get_output_embeddings(self) -> tf.keras.layers.Layer:
574500
return self.decoder
@@ -723,19 +649,6 @@ def call(
723649
hidden_states=encoder_outputs.hidden_states,
724650
attentions=encoder_outputs.attentions,
725651
)
726-
def build(self, input_shape=None):
727-
if self.built:
728-
return
729-
self.built = True
730-
if getattr(self, "embeddings", None) is not None:
731-
with tf.name_scope(self.embeddings.name):
732-
self.embeddings.build(None)
733-
if getattr(self, "encoder", None) is not None:
734-
with tf.name_scope(self.encoder.name):
735-
self.encoder.build(None)
736-
if getattr(self, "pooler", None) is not None:
737-
with tf.name_scope(self.pooler.name):
738-
self.pooler.build(None) # TODO Matt might be wrong
739652

740653

741654
@dataclass
@@ -911,13 +824,6 @@ def call(
911824
)
912825

913826
return outputs
914-
def build(self, input_shape=None):
915-
if self.built:
916-
return
917-
self.built = True
918-
if getattr(self, "albert", None) is not None:
919-
with tf.name_scope(self.albert.name):
920-
self.albert.build(None)
921827

922828

923829
@add_start_docstrings(
@@ -1014,19 +920,6 @@ def call(
1014920
hidden_states=outputs.hidden_states,
1015921
attentions=outputs.attentions,
1016922
)
1017-
def build(self, input_shape=None):
1018-
if self.built:
1019-
return
1020-
self.built = True
1021-
if getattr(self, "albert", None) is not None:
1022-
with tf.name_scope(self.albert.name):
1023-
self.albert.build(None)
1024-
if getattr(self, "predictions", None) is not None:
1025-
with tf.name_scope(self.predictions.name):
1026-
self.predictions.build(None)
1027-
if getattr(self, "sop_classifier", None) is not None:
1028-
with tf.name_scope(self.sop_classifier.name):
1029-
self.sop_classifier.build(None)
1030923

1031924

1032925
class TFAlbertSOPHead(tf.keras.layers.Layer):
@@ -1039,20 +932,12 @@ def __init__(self, config: AlbertConfig, **kwargs):
1039932
kernel_initializer=get_initializer(config.initializer_range),
1040933
name="classifier",
1041934
)
1042-
self.config = config
1043935

1044936
def call(self, pooled_output: tf.Tensor, training: bool) -> tf.Tensor:
1045937
dropout_pooled_output = self.dropout(inputs=pooled_output, training=training)
1046938
logits = self.classifier(inputs=dropout_pooled_output)
1047939

1048940
return logits
1049-
def build(self, input_shape=None):
1050-
if self.built:
1051-
return
1052-
self.built = True
1053-
if getattr(self, "classifier", None) is not None:
1054-
with tf.name_scope(self.classifier.name):
1055-
self.classifier.build(self.config.hidden_size)
1056941

1057942

1058943
@add_start_docstrings("""Albert Model with a `language modeling` head on top.""", ALBERT_START_DOCSTRING)
@@ -1149,16 +1034,6 @@ def call(
11491034
hidden_states=outputs.hidden_states,
11501035
attentions=outputs.attentions,
11511036
)
1152-
def build(self, input_shape=None):
1153-
if self.built:
1154-
return
1155-
self.built = True
1156-
if getattr(self, "albert", None) is not None:
1157-
with tf.name_scope(self.albert.name):
1158-
self.albert.build(None)
1159-
if getattr(self, "predictions", None) is not None:
1160-
with tf.name_scope(self.predictions.name):
1161-
self.predictions.build(None)
11621037

11631038

11641039
@add_start_docstrings(
@@ -1183,7 +1058,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
11831058
self.classifier = tf.keras.layers.Dense(
11841059
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
11851060
)
1186-
self.config = config
11871061

11881062
@unpack_inputs
11891063
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1242,16 +1116,6 @@ def call(
12421116
hidden_states=outputs.hidden_states,
12431117
attentions=outputs.attentions,
12441118
)
1245-
def build(self, input_shape=None):
1246-
if self.built:
1247-
return
1248-
self.built = True
1249-
if getattr(self, "albert", None) is not None:
1250-
with tf.name_scope(self.albert.name):
1251-
self.albert.build(None)
1252-
if getattr(self, "classifier", None) is not None:
1253-
with tf.name_scope(self.classifier.name):
1254-
self.classifier.build(self.config.hidden_size)
12551119

12561120

12571121
@add_start_docstrings(
@@ -1281,7 +1145,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
12811145
self.classifier = tf.keras.layers.Dense(
12821146
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
12831147
)
1284-
self.config = config
12851148

12861149
@unpack_inputs
12871150
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1336,16 +1199,6 @@ def call(
13361199
hidden_states=outputs.hidden_states,
13371200
attentions=outputs.attentions,
13381201
)
1339-
def build(self, input_shape=None):
1340-
if self.built:
1341-
return
1342-
self.built = True
1343-
if getattr(self, "albert", None) is not None:
1344-
with tf.name_scope(self.albert.name):
1345-
self.albert.build(None)
1346-
if getattr(self, "classifier", None) is not None:
1347-
with tf.name_scope(self.classifier.name):
1348-
self.classifier.build(self.config.hidden_size)
13491202

13501203

13511204
@add_start_docstrings(
@@ -1368,7 +1221,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
13681221
self.qa_outputs = tf.keras.layers.Dense(
13691222
units=config.num_labels, kernel_initializer=get_initializer(config.initializer_range), name="qa_outputs"
13701223
)
1371-
self.config = config
13721224

13731225
@unpack_inputs
13741226
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, sequence_length"))
@@ -1442,16 +1294,6 @@ def call(
14421294
hidden_states=outputs.hidden_states,
14431295
attentions=outputs.attentions,
14441296
)
1445-
def build(self, input_shape=None):
1446-
if self.built:
1447-
return
1448-
self.built = True
1449-
if getattr(self, "albert", None) is not None:
1450-
with tf.name_scope(self.albert.name):
1451-
self.albert.build(None)
1452-
if getattr(self, "qa_outputs", None) is not None:
1453-
with tf.name_scope(self.qa_outputs.name):
1454-
self.qa_outputs.build(self.config.hidden_size)
14551297

14561298

14571299
@add_start_docstrings(
@@ -1474,7 +1316,6 @@ def __init__(self, config: AlbertConfig, *inputs, **kwargs):
14741316
self.classifier = tf.keras.layers.Dense(
14751317
units=1, kernel_initializer=get_initializer(config.initializer_range), name="classifier"
14761318
)
1477-
self.config = config
14781319

14791320
@unpack_inputs
14801321
@add_start_docstrings_to_model_forward(ALBERT_INPUTS_DOCSTRING.format("batch_size, num_choices, sequence_length"))
@@ -1553,13 +1394,3 @@ def call(
15531394
hidden_states=outputs.hidden_states,
15541395
attentions=outputs.attentions,
15551396
)
1556-
def build(self, input_shape=None):
1557-
if self.built:
1558-
return
1559-
self.built = True
1560-
if getattr(self, "albert", None) is not None:
1561-
with tf.name_scope(self.albert.name):
1562-
self.albert.build(None)
1563-
if getattr(self, "classifier", None) is not None:
1564-
with tf.name_scope(self.classifier.name):
1565-
self.classifier.build(self.config.hidden_size)

0 commit comments

Comments
 (0)