@@ -86,8 +86,8 @@ def single_decode_with_kv_cache(
86
86
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
87
87
Defaults to ``NONE``.
88
88
logits_cap : bool
89
- Whether to apply logits cap to attention scores .
90
- If ``True``, the attention scores will be capped according to formula (proposed in
89
+ Whether to apply logits cap to pre- attention logits .
90
+ If ``True``, the logits will be capped according to formula (proposed in
91
91
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
92
92
Defaults to ``False``.
93
93
q_scale : Optional[float]
@@ -199,8 +199,8 @@ def batch_decode_with_padded_kv_cache(
199
199
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
200
200
Defaults to ``NONE``.
201
201
logits_cap : bool
202
- Whether to apply logits cap to attention scores .
203
- If ``True``, the attention scores will be capped according to formula (proposed in
202
+ Whether to apply logits cap to pre- attention logits .
203
+ If ``True``, the logits will be capped according to formula (proposed in
204
204
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
205
205
Defaults to ``False``.
206
206
q_scale : Optional[float]
@@ -312,8 +312,8 @@ def batch_decode_with_padded_kv_cache_return_lse(
312
312
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
313
313
Defaults to ``NONE``.
314
314
logits_cap : bool
315
- Whether to apply logits cap to attention scores .
316
- If ``True``, the attention scores will be capped according to formula (proposed in
315
+ Whether to apply logits cap to pre- attention logits .
316
+ If ``True``, the logits will be capped according to formula (proposed in
317
317
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
318
318
Defaults to ``False``.
319
319
q_scale : Optional[float]
@@ -592,8 +592,8 @@ def begin_forward(
592
592
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
593
593
Defaults to ``NONE``.
594
594
logits_cap: bool
595
- Whether to apply logits cap to attention scores .
596
- If ``True``, the attention scores will be capped according to formula (proposed in
595
+ Whether to apply logits cap to pre- attention logits .
596
+ If ``True``, the logits will be capped according to formula (proposed in
597
597
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
598
598
Defaults to ``False``.
599
599
data_type : Union[str, torch.dtype]
@@ -704,8 +704,8 @@ def forward(
704
704
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
705
705
Defaults to ``NONE``.
706
706
logits_cap: bool
707
- Whether to apply logits cap to attention scores .
708
- If ``True``, the attention scores will be capped according to formula (proposed in
707
+ Whether to apply logits cap to pre- attention logits .
708
+ If ``True``, the logits will be capped according to formula (proposed in
709
709
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
710
710
Defaults to ``False``.
711
711
q_scale : Optional[float]
@@ -789,8 +789,8 @@ def forward_return_lse(
789
789
``NONE``/``ROPE_LLAMA`` (LLAMA style rotary embedding) /``ALIBI``.
790
790
Defaults to ``NONE``.
791
791
logits_cap: bool
792
- Whether to apply logits cap to attention scores .
793
- If ``True``, the attention scores will be capped according to formula (proposed in
792
+ Whether to apply logits cap to pre- attention logits .
793
+ If ``True``, the logits will be capped according to formula (proposed in
794
794
Grok-1): :math:`30 \times \mathrm{tanh}(x / 30)`, where :math:`x` is the input logits.
795
795
Defaults to ``False``.
796
796
q_scale : Optional[float]
0 commit comments