@@ -3200,36 +3200,36 @@ void ggml_cuda_assign_buffers_impl(struct ggml_tensor * tensor, bool scratch, bo
3200
3200
}
3201
3201
3202
3202
// recursively assign CUDA buffers until a compute tensor is found
3203
- if (tensor->src0 != nullptr && tensor->src0 ->backend == GGML_BACKEND_CPU) {
3204
- const ggml_op src0_op = tensor->src0 ->op ;
3203
+ if (tensor->src [ 0 ] != nullptr && tensor->src [ 0 ] ->backend == GGML_BACKEND_CPU) {
3204
+ const ggml_op src0_op = tensor->src [ 0 ] ->op ;
3205
3205
if (src0_op == GGML_OP_RESHAPE || src0_op == GGML_OP_TRANSPOSE || src0_op == GGML_OP_VIEW) {
3206
- ggml_cuda_assign_buffers_impl (tensor->src0 , scratch, force_inplace);
3206
+ ggml_cuda_assign_buffers_impl (tensor->src [ 0 ] , scratch, force_inplace);
3207
3207
}
3208
3208
}
3209
- if (tensor->op == GGML_OP_CPY && tensor->src1 ->backend == GGML_BACKEND_CPU) {
3210
- ggml_cuda_assign_buffers_impl (tensor->src1 , scratch, force_inplace);
3209
+ if (tensor->op == GGML_OP_CPY && tensor->src [ 1 ] ->backend == GGML_BACKEND_CPU) {
3210
+ ggml_cuda_assign_buffers_impl (tensor->src [ 1 ] , scratch, force_inplace);
3211
3211
}
3212
3212
3213
3213
tensor->backend = GGML_BACKEND_GPU;
3214
3214
struct ggml_tensor_extra_gpu * extra = new ggml_tensor_extra_gpu;
3215
3215
memset (extra, 0 , sizeof (*extra));
3216
3216
3217
- const bool inplace = (tensor->src0 != nullptr && tensor->src0 ->data == tensor->data ) ||
3217
+ const bool inplace = (tensor->src [ 0 ] != nullptr && tensor->src [ 0 ] ->data == tensor->data ) ||
3218
3218
tensor->op == GGML_OP_VIEW ||
3219
3219
force_inplace;
3220
3220
const size_t size = ggml_nbytes (tensor);
3221
3221
3222
3222
CUDA_CHECK (cudaSetDevice (g_main_device));
3223
- if (inplace && (tensor->src0 ->backend == GGML_BACKEND_GPU || tensor->src0 ->backend == GGML_BACKEND_GPU_SPLIT)) {
3224
- struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src0 ->extra ;
3223
+ if (inplace && (tensor->src [ 0 ] ->backend == GGML_BACKEND_GPU || tensor->src [ 0 ] ->backend == GGML_BACKEND_GPU_SPLIT)) {
3224
+ struct ggml_tensor_extra_gpu * src0_extra = (ggml_tensor_extra_gpu * ) tensor->src [ 0 ] ->extra ;
3225
3225
char * src0_ddc = (char *) src0_extra->data_device [g_main_device];
3226
3226
size_t offset = 0 ;
3227
3227
if (tensor->op == GGML_OP_VIEW) {
3228
- memcpy (&offset, tensor->opt [ 0 ]->data , sizeof (size_t ));
3228
+ memcpy (&offset, tensor->src [ 2 ]->data , sizeof (size_t ));
3229
3229
}
3230
3230
extra->data_device [g_main_device] = src0_ddc + offset;
3231
3231
} else if (tensor->op == GGML_OP_CPY) {
3232
- struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src1 ->extra ;
3232
+ struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu * ) tensor->src [ 1 ] ->extra ;
3233
3233
void * src1_ddv = src1_extra->data_device [g_main_device];
3234
3234
extra->data_device [g_main_device] = src1_ddv;
3235
3235
} else if (scratch) {
@@ -3300,8 +3300,8 @@ void ggml_cuda_free_scratch() {
3300
3300
bool ggml_cuda_compute_forward (struct ggml_compute_params * params, struct ggml_tensor * tensor){
3301
3301
ggml_cuda_func_t func;
3302
3302
const bool any_on_device = tensor->backend == GGML_BACKEND_GPU
3303
- || (tensor->src0 != nullptr && (tensor->src0 ->backend == GGML_BACKEND_GPU || tensor->src0 ->backend == GGML_BACKEND_GPU_SPLIT))
3304
- || (tensor->src1 != nullptr && tensor->src1 ->backend == GGML_BACKEND_GPU);
3303
+ || (tensor->src [ 0 ] != nullptr && (tensor->src [ 0 ] ->backend == GGML_BACKEND_GPU || tensor->src [ 0 ] ->backend == GGML_BACKEND_GPU_SPLIT))
3304
+ || (tensor->src [ 1 ] != nullptr && tensor->src [ 1 ] ->backend == GGML_BACKEND_GPU);
3305
3305
3306
3306
switch (tensor->op ) {
3307
3307
case GGML_OP_ADD:
@@ -3329,7 +3329,7 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
3329
3329
func = ggml_cuda_rms_norm;
3330
3330
break ;
3331
3331
case GGML_OP_MUL_MAT:
3332
- if (!any_on_device && !ggml_cuda_can_mul_mat (tensor->src0 , tensor->src1 , tensor)) {
3332
+ if (!any_on_device && !ggml_cuda_can_mul_mat (tensor->src [ 0 ] , tensor->src [ 1 ] , tensor)) {
3333
3333
return false ;
3334
3334
}
3335
3335
func = ggml_cuda_mul_mat;
@@ -3383,6 +3383,6 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
3383
3383
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
3384
3384
return true ;
3385
3385
}
3386
- func (tensor->src0 , tensor->src1 , tensor);
3386
+ func (tensor->src [ 0 ] , tensor->src [ 1 ] , tensor);
3387
3387
return true ;
3388
3388
}
0 commit comments