@@ -668,7 +668,8 @@ class GPT2DoubleHeadsModelOutput(ModelOutput):
668
668
GPT2_START_DOCSTRING ,
669
669
)
670
670
class GPT2Model (GPT2PreTrainedModel ):
671
- _keys_to_ignore_on_load_missing = ["attn.masked_bias" ]
671
+ _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias" , r"h\.\d+\.attn\.masked_bias" ]
672
+ _keys_to_ignore_on_load_missing = [r"attn.masked_bias" , r"h\.\d+\.attn\.masked_bias" , r"h\.\d+\.attn\.bias" ]
672
673
673
674
def __init__ (self , config ):
674
675
super ().__init__ (config )
@@ -1149,6 +1150,7 @@ def _reorder_cache(
1149
1150
GPT2_START_DOCSTRING ,
1150
1151
)
1151
1152
class GPT2DoubleHeadsModel (GPT2PreTrainedModel ):
1153
+ _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias" , r"h\.\d+\.attn\.masked_bias" ]
1152
1154
_keys_to_ignore_on_load_missing = [r"attn.masked_bias" , r"attn.bias" , r"lm_head.weight" ]
1153
1155
1154
1156
def __init__ (self , config ):
@@ -1377,6 +1379,7 @@ def _reorder_cache(
1377
1379
GPT2_START_DOCSTRING ,
1378
1380
)
1379
1381
class GPT2ForSequenceClassification (GPT2PreTrainedModel ):
1382
+ _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias" , r"h\.\d+\.attn\.masked_bias" ]
1380
1383
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias" , r"lm_head.weight" ]
1381
1384
1382
1385
def __init__ (self , config ):
@@ -1600,6 +1603,7 @@ def forward(
1600
1603
GPT2_START_DOCSTRING ,
1601
1604
)
1602
1605
class GPT2ForQuestionAnswering (GPT2PreTrainedModel ):
1606
+ _keys_to_ignore_on_load_unexpected = [r"h\.\d+\.attn\.bias" , r"h\.\d+\.attn\.masked_bias" ]
1603
1607
_keys_to_ignore_on_load_missing = [r"h\.\d+\.attn\.masked_bias" , r"h\.\d+\.attn\.bias" , r"lm_head.weight" ]
1604
1608
1605
1609
def __init__ (self , config ):
0 commit comments