Skip to content

Commit 5bf2a27

Browse files
ggml : remove src0 and src1 from ggml_tensor and rename opt to src (ggml-org#2178)
* Add ggml changes * Update train-text-from-scratch for change * mpi : adapt to new ggml_tensor->src --------- Co-authored-by: Georgi Gerganov <[email protected]>
1 parent c9c74b4 commit 5bf2a27

File tree

6 files changed

+371
-421
lines changed

6 files changed

+371
-421
lines changed

examples/train-text-from-scratch/train-text-from-scratch.cpp

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1354,17 +1354,9 @@ struct ggml_tensor * expand(struct ggml_cgraph * g, struct ggml_tensor * t) {
13541354
}
13551355
}
13561356

1357-
if (t->src0) {
1358-
expand(g, t->src0);
1359-
}
1360-
1361-
if (t->src1) {
1362-
expand(g, t->src1);
1363-
}
1364-
1365-
for (int i = 0; i < GGML_MAX_OPT; ++i) {
1366-
if (t->opt[i]) {
1367-
expand(g, t->opt[i]);
1357+
for (int i = 0; i < GGML_MAX_SRC; ++i) {
1358+
if (t->src[i]) {
1359+
expand(g, t->src[i]);
13681360
}
13691361
}
13701362

ggml-cuda.cu

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3200,36 +3200,36 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
32003200
}
32013201

32023202
// recursively assign CUDA buffers until a compute tensor is found
3203-
if (tensor->src0 != nullptr && tensor->src0->backend == GGML_BACKEND_CPU) {
3204-
const ggml_op src0_op = tensor->src0->op;
3203+
if (tensor->src[0] != nullptr && tensor->src[0]->backend == GGML_BACKEND_CPU) {
3204+
const ggml_op src0_op = tensor->src[0]->op;
32053205
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) {
3206-
ggml_cuda_assign_buffers_impl(tensor->src0, scratch, force_inplace);
3206+
ggml_cuda_assign_buffers_impl(tensor->src[0], scratch, force_inplace);
32073207
}
32083208
}
3209-
if (tensor->op == GGML_OP_CPY && tensor->src1->backend == GGML_BACKEND_CPU) {
3210-
ggml_cuda_assign_buffers_impl(tensor->src1, scratch, force_inplace);
3209+
if (tensor->op == GGML_OP_CPY && tensor->src[1]->backend == GGML_BACKEND_CPU) {
3210+
ggml_cuda_assign_buffers_impl(tensor->src[1], scratch, force_inplace);
32113211
}
32123212

32133213
tensor->backend = GGML_BACKEND_GPU;
32143214
struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
32153215
memset(extra, 0, sizeof(*extra));
32163216

3217-
const bool inplace = (tensor->src0 != nullptr && tensor->src0->data == tensor->data) ||
3217+
const bool inplace = (tensor->src[0] != nullptr && tensor->src[0]->data == tensor->data) ||
32183218
tensor->op == GGML_OP_VIEW ||
32193219
force_inplace;
32203220
const size_t size = ggml_nbytes(tensor);
32213221

32223222
CUDA_CHECK(cudaSetDevice(g_main_device));
3223-
if (inplace && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT)) {
3224-
struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src0->extra;
3223+
if (inplace && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT)) {
3224+
struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src[0]->extra;
32253225
char * src0_ddc = (char *) src0_extra->data_device[g_main_device];
32263226
size_t offset = 0;
32273227
if (tensor->op == GGML_OP_VIEW) {
3228-
memcpy(&offset, tensor->opt[0]->data, sizeof(size_t));
3228+
memcpy(&offset, tensor->src[2]->data, sizeof(size_t));
32293229
}
32303230
extra->data_device[g_main_device] = src0_ddc + offset;
32313231
} else if (tensor->op == GGML_OP_CPY) {
3232-
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src1->extra;
3232+
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src[1]->extra;
32333233
void * src1_ddv = src1_extra->data_device[g_main_device];
32343234
extra->data_device[g_main_device] = src1_ddv;
32353235
} else if (scratch) {
@@ -3300,8 +3300,8 @@ void ggml_cuda_free_scratch() {
33003300
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
33013301
ggml_cuda_func_t func;
33023302
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
3303-
|| (tensor->src0 != nullptr && (tensor->src0->backend == GGML_BACKEND_GPU || tensor->src0->backend == GGML_BACKEND_GPU_SPLIT))
3304-
|| (tensor->src1 != nullptr && tensor->src1->backend == GGML_BACKEND_GPU);
3303+
|| (tensor->src[0] != nullptr && (tensor->src[0]->backend == GGML_BACKEND_GPU || tensor->src[0]->backend == GGML_BACKEND_GPU_SPLIT))
3304+
|| (tensor->src[1] != nullptr && tensor->src[1]->backend == GGML_BACKEND_GPU);
33053305

33063306
switch (tensor->op) {
33073307
case GGML_OP_ADD:
@@ -3329,7 +3329,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
33293329
func = ggml_cuda_rms_norm;
33303330
break;
33313331
case GGML_OP_MUL_MAT:
3332-
if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) {
3332+
if (!any_on_device && !ggml_cuda_can_mul_mat(tensor->src[0], tensor->src[1], tensor)) {
33333333
return false;
33343334
}
33353335
func = ggml_cuda_mul_mat;
@@ -3383,6 +3383,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
33833383
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
33843384
return true;
33853385
}
3386-
func(tensor->src0, tensor->src1, tensor);
3386+
func(tensor->src[0], tensor->src[1], tensor);
33873387
return true;
33883388
}

ggml-metal.m

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -393,8 +393,8 @@ void ggml_metal_graph_compute(
393393
for (int i = node_start; i < node_end; ++i) {
394394
metal_printf("%s: encoding node %3d, op = %8s\n", __func__, i, ggml_op_name(gf->nodes[i]->op));
395395

396-
struct ggml_tensor * src0 = gf->nodes[i]->src0;
397-
struct ggml_tensor * src1 = gf->nodes[i]->src1;
396+
struct ggml_tensor * src0 = gf->nodes[i]->src[0];
397+
struct ggml_tensor * src1 = gf->nodes[i]->src[1];
398398
struct ggml_tensor * dst = gf->nodes[i];
399399

400400
const int64_t ne00 = src0 ? src0->ne[0] : 0;

ggml-mpi.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -175,11 +175,11 @@ void ggml_mpi_graph_compute_pre(
175175
// attach the input data to all nodes that need it
176176
// TODO: not great - should be able to do this without modifying the compute graph (see next TODO below)
177177
for (int i = idx_l0; i < idx_l1; i++) {
178-
if (gf->nodes[i]->src0 == gf->nodes[idx_l0]) {
179-
gf->nodes[i]->src0 = inp0;
178+
if (gf->nodes[i]->src[0] == gf->nodes[idx_l0]) {
179+
gf->nodes[i]->src[0] = inp0;
180180
}
181-
if (gf->nodes[i]->src1 == gf->nodes[idx_l0]) {
182-
gf->nodes[i]->src1 = inp0;
181+
if (gf->nodes[i]->src[1] == gf->nodes[idx_l0]) {
182+
gf->nodes[i]->src[1] = inp0;
183183
}
184184
}
185185

0 commit comments

Comments
 (0)