@@ -4242,6 +4242,22 @@ static inline int ggml_up(int n, int m) {
4242
4242
#define ggml_assert_aligned(ptr) \
4243
4243
GGML_ASSERT(((uintptr_t) (ptr))%GGML_MEM_ALIGN == 0)
4244
4244
4245
+ float get_theta_scale(int n_dims,int n_past,int n_ctx)
4246
+ {
4247
+ if(n_ctx<=2048) //normie mode
4248
+ {
4249
+ return powf(10000.0, -2.0f/n_dims);
4250
+ }
4251
+ else
4252
+ {
4253
+ //using scaled NTK aware ctx
4254
+ float a = (n_ctx<=4096?4.0:8.0);
4255
+ float m = powf(a, n_dims / (n_dims - 2.0));
4256
+ float s = powf(10000.0 * m, -2.0f/n_dims);
4257
+ return s;
4258
+ }
4259
+ }
4260
+
4245
4261
////////////////////////////////////////////////////////////////////////////////
4246
4262
4247
4263
struct ggml_context * ggml_init(struct ggml_init_params params) {
@@ -12531,7 +12547,7 @@ static void ggml_compute_forward_rope_f32(
12531
12547
// row index used to determine which thread to use
12532
12548
int ir = 0;
12533
12549
12534
- const float theta_scale = powf(10000.0, -2.0f/ n_dims);
12550
+ const float theta_scale = get_theta_scale( n_dims,n_past,n_ctx );
12535
12551
12536
12552
const bool is_neox = mode & 2;
12537
12553
const bool is_glm = mode & 4;
@@ -12571,9 +12587,7 @@ static void ggml_compute_forward_rope_f32(
12571
12587
dst_data[n_dims/2*3] = x2*sin_block_theta + x3*cos_block_theta;
12572
12588
}
12573
12589
} else if (!is_neox) {
12574
- if (n_ctx > GGML_TRAINING_CTX) {
12575
- theta = theta * GGML_TRAINING_CTX / n_ctx;
12576
- }
12590
+
12577
12591
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
12578
12592
const float cos_theta = cosf(theta);
12579
12593
const float sin_theta = sinf(theta);
@@ -12674,7 +12688,7 @@ static void ggml_compute_forward_rope_f16(
12674
12688
// row index used to determine which thread to use
12675
12689
int ir = 0;
12676
12690
12677
- const float theta_scale = powf(10000.0, -2.0f/ n_dims);
12691
+ const float theta_scale = get_theta_scale( n_dims,n_past,n_ctx );
12678
12692
12679
12693
const bool is_neox = mode & 2;
12680
12694
const bool is_glm = mode & 4;
@@ -12714,9 +12728,6 @@ static void ggml_compute_forward_rope_f16(
12714
12728
dst_data[n_dims/2*3] = GGML_FP32_TO_FP16(x2*sin_block_theta + x3*cos_block_theta);
12715
12729
}
12716
12730
} if (!is_neox) {
12717
- if (n_ctx > GGML_TRAINING_CTX) {
12718
- theta = theta * GGML_TRAINING_CTX / n_ctx;
12719
- }
12720
12731
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
12721
12732
const float cos_theta = cosf(theta);
12722
12733
const float sin_theta = sinf(theta);
@@ -12842,7 +12853,7 @@ static void ggml_compute_forward_rope_back_f32(
12842
12853
// row index used to determine which thread to use
12843
12854
int ir = 0;
12844
12855
12845
- const float theta_scale = powf(10000.0, -2.0f/ n_dims);
12856
+ const float theta_scale = get_theta_scale( n_dims,n_past,n_ctx );
12846
12857
12847
12858
const bool is_neox = mode & 2;
12848
12859
@@ -12856,9 +12867,6 @@ static void ggml_compute_forward_rope_back_f32(
12856
12867
float theta = (float)p;
12857
12868
12858
12869
if (!is_neox) {
12859
- if (n_ctx > GGML_TRAINING_CTX) {
12860
- theta = theta * GGML_TRAINING_CTX / n_ctx;
12861
- }
12862
12870
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
12863
12871
const float cos_theta = cosf(theta);
12864
12872
const float sin_theta = sinf(theta);
@@ -12959,7 +12967,7 @@ static void ggml_compute_forward_rope_back_f16(
12959
12967
// row index used to determine which thread to use
12960
12968
int ir = 0;
12961
12969
12962
- const float theta_scale = powf(10000.0, -2.0f/ n_dims);
12970
+ const float theta_scale = get_theta_scale( n_dims,n_past,n_ctx );
12963
12971
12964
12972
const bool is_neox = mode & 2;
12965
12973
@@ -12973,9 +12981,6 @@ static void ggml_compute_forward_rope_back_f16(
12973
12981
float theta = (float)p;
12974
12982
12975
12983
if (!is_neox) {
12976
- if (n_ctx > GGML_TRAINING_CTX) {
12977
- theta = theta * GGML_TRAINING_CTX / n_ctx;
12978
- }
12979
12984
for (int64_t i0 = 0; i0 < ne0; i0 += 2) {
12980
12985
const float cos_theta = cosf(theta);
12981
12986
const float sin_theta = sinf(theta);
0 commit comments