train : fix KQ_pos allocation (ggml-org#3392)

ggerganov · xaedes · web-flow · commit bc34dd4f5b5a · 2023-09-29T19:05:18.000+03:00
* train : fix KQ_pos allocation

* make sure KQ_pos is not reallocated in finetune

---------

Co-authored-by: xaedes &lt;xaedes@gmail.com&gt;
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
@@ -626,7 +626,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
 
     // KQ_pos - contains the positions
     struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
-    {
+    ggml_allocr_alloc(alloc, KQ_pos);
+    if (!ggml_allocr_is_measure(alloc)) {
         int * data = (int *) KQ_pos->data;
         for (int i = 0; i < N; ++i) {
             data[i] = n_past + i;
@@ -786,6 +787,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
     ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
     GGML_ASSERT(t36->grad->data == NULL && t36->grad->view_src == NULL);
     ggml_allocr_alloc(alloc, t36->grad);
+    // KQ_pos
+    ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one));
 
     // make sure base model tensors data cannot be used in viewable operations
     ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, model->tok_embeddings, one));
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -334,7 +334,8 @@ static struct ggml_tensor * llama_build_train_graphs(
 
     // KQ_pos - contains the positions
     struct ggml_tensor * KQ_pos = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, N);
-    {
+    ggml_allocr_alloc(alloc, KQ_pos);
+    if (!ggml_allocr_is_measure(alloc)) {
         int * data = (int *) KQ_pos->data;
         for (int i = 0; i < N; ++i) {
             data[i] = n_past + i;