@@ -443,8 +443,8 @@ struct ggml_tensor * forward(
443
443
// wk shape [n_embd, n_embd, 1, 1]
444
444
// Qcur shape [n_embd/n_head, n_head, N, 1]
445
445
// Kcur shape [n_embd/n_head, n_head, N, 1]
446
- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 );
447
- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 );
446
+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 , 0 );
447
+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_3d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N), n_past, n_rot, 0 , 0 );
448
448
449
449
// store key and value to memory
450
450
{
@@ -700,8 +700,8 @@ struct ggml_tensor * forward_batch(
700
700
// wk shape [n_embd, n_embd, 1, 1]
701
701
// Qcur shape [n_embd/n_head, n_head, N, n_batch]
702
702
// Kcur shape [n_embd/n_head, n_head, N, n_batch]
703
- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
704
- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
703
+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
704
+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
705
705
assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
706
706
assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
707
707
@@ -985,8 +985,8 @@ struct ggml_tensor * forward_batch_wo_cache(
985
985
// wk shape [n_embd, n_embd, 1, 1]
986
986
// Qcur shape [n_embd/n_head, n_head, N, n_batch]
987
987
// Kcur shape [n_embd/n_head, n_head, N, n_batch]
988
- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
989
- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
988
+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
989
+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
990
990
assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
991
991
assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
992
992
@@ -1207,8 +1207,8 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
1207
1207
// compute Q and K and RoPE them
1208
1208
// wq shape [n_embd, n_embd, 1, 1]
1209
1209
// wk shape [n_embd, n_embd, 1, 1]
1210
- struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
1211
- struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 );
1210
+ struct ggml_tensor * Qcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wq , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
1211
+ struct ggml_tensor * Kcur = ggml_rope_inplace (ctx0, ggml_reshape_4d (ctx0, ggml_mul_mat (ctx0, model->layers [il].wk , cur), n_embd/n_head, n_head, N, n_batch), n_past, n_rot, 0 , 0 );
1212
1212
assert_shape_4d (Qcur, n_embd/n_head, n_head, N, n_batch);
1213
1213
assert_shape_4d (Kcur, n_embd/n_head, n_head, N, n_batch);
1214
1214
@@ -1607,10 +1607,10 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
1607
1607
use_buf (-1 ); struct ggml_tensor * t04 = expand (gf, ggml_mul (ctx0, t02, t03)); assert_shape_2d (t04, n_embd, N*n_batch);
1608
1608
use_buf (-1 ); struct ggml_tensor * t05 = expand (gf, ggml_mul_mat (ctx0, layer.wq , t04)); assert_shape_2d (t05, n_embd, N*n_batch);
1609
1609
use_buf (-1 ); struct ggml_tensor * t06 = expand (gf, ggml_reshape_4d (ctx0, t05, n_embd/n_head, n_head, N, n_batch)); assert_shape_4d (t06, n_embd/n_head, n_head, N, n_batch);
1610
- use_buf (-1 ); struct ggml_tensor * t07 = expand (gf, ggml_rope_inplace (ctx0, t06, n_past, n_rot, rope_mode)); assert_shape_4d (t07, n_embd/n_head, n_head, N, n_batch);
1610
+ use_buf (-1 ); struct ggml_tensor * t07 = expand (gf, ggml_rope_inplace (ctx0, t06, n_past, n_rot, rope_mode, 0 )); assert_shape_4d (t07, n_embd/n_head, n_head, N, n_batch);
1611
1611
use_buf (-1 ); struct ggml_tensor * t08 = expand (gf, ggml_mul_mat (ctx0, layer.wk , t04)); assert_shape_2d (t08, n_embd, N*n_batch);
1612
1612
use_buf (-1 ); struct ggml_tensor * t09 = expand (gf, ggml_reshape_4d (ctx0, t08, n_embd/n_head, n_head, N, n_batch)); assert_shape_4d (t09, n_embd/n_head, n_head, N, n_batch);
1613
- use_buf (-1 ); struct ggml_tensor * t10 = expand (gf, ggml_rope_inplace (ctx0, t09, n_past, n_rot, rope_mode)); assert_shape_4d (t10, n_embd/n_head, n_head, N, n_batch);
1613
+ use_buf (-1 ); struct ggml_tensor * t10 = expand (gf, ggml_rope_inplace (ctx0, t09, n_past, n_rot, rope_mode, 0 )); assert_shape_4d (t10, n_embd/n_head, n_head, N, n_batch);
1614
1614
use_buf (-1 ); struct ggml_tensor * t11 = expand (gf, ggml_mul_mat (ctx0, t04, layer.wv )); assert_shape_2d (t11, N*n_batch, n_embd);
1615
1615
use_buf (-1 ); struct ggml_tensor * t12 = expand (gf, ggml_reshape_4d (ctx0, t11, N, n_batch, n_embd/n_head, n_head)); assert_shape_4d (t12, N, n_batch, n_embd/n_head, n_head);
1616
1616
use_buf (-1 ); struct ggml_tensor * t13 = expand (gf, ggml_permute (ctx0, t07, 0 , 2 , 1 , 3 )); assert_shape_4d (t13, n_embd/n_head, N, n_head, n_batch);
0 commit comments