File tree 1 file changed +4
-4
lines changed
src/transformers/models/gemma
1 file changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -74,9 +74,9 @@ class GemmaConfig(PretrainedConfig):
74
74
relevant if `config.is_decoder=True`.
75
75
pad_token_id (`int`, *optional*):
76
76
Padding token id.
77
- bos_token_id (`int`, *optional*, defaults to 1 ):
77
+ bos_token_id (`int`, *optional*, defaults to 2 ):
78
78
Beginning of stream token id.
79
- eos_token_id (`int`, *optional*, defaults to 2 ):
79
+ eos_token_id (`int`, *optional*, defaults to 1 ):
80
80
End of stream token id.
81
81
pretraining_tp (`int`, *optional*, defaults to 1):
82
82
Experimental feature. Tensor parallelism rank used during pretraining. Please refer to [this
@@ -131,8 +131,8 @@ def __init__(
131
131
rms_norm_eps = 1e-6 ,
132
132
use_cache = True ,
133
133
pad_token_id = None ,
134
- bos_token_id = 1 ,
135
- eos_token_id = 2 ,
134
+ bos_token_id = 2 ,
135
+ eos_token_id = 1 ,
136
136
tie_word_embeddings = True ,
137
137
rope_theta = 10000.0 ,
138
138
rope_scaling = None ,
You can’t perform that action at this time.
0 commit comments