Skip to content

Commit d626b55

Browse files
committed
wip : avoid inplace ops
1 parent 5ee92c3 commit d626b55

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

ggml-metal.m

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
id<MTLBuffer> buffer;
2121
};
2222

23+
static void * g_ptr_base = (void *)0x1234;
24+
2325
struct ggml_metal_context {
2426
int n_cb;
2527

@@ -222,8 +224,8 @@ void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
222224
default: {}
223225
}
224226

225-
*offs = (size_t) tensor->data;
226-
printf("%s: offs = %zu\n", __func__, *offs);
227+
*offs = (size_t) tensor->data - (size_t) g_ptr_base;
228+
printf("%s: offs = %zu, %p\n", __func__, *offs, tensor->extra);
227229
return ((struct ggml_metal_buffer_wrapper *) tensor->extra)->buffer;
228230
}
229231

@@ -917,7 +919,7 @@ static void ggml_backend_metal_free_data(struct ggml_backend_buffer * alloc) {
917919

918920
printf("XXXXXXXXXXXXXXX ALOC: %p %p %p size = %zu\n", (void * )wrapper, (void *)&wrapper->buffer, (void *)[wrapper->buffer contents], size);
919921

920-
struct ggml_backend_buffer * buffer = ggml_allocator_simple_init(nil, size, TENSOR_ALIGNMENT);
922+
struct ggml_backend_buffer * buffer = ggml_allocator_simple_init(g_ptr_base, size, TENSOR_ALIGNMENT);
921923
buffer->interface.init_tensor = ggml_backend_metal_init_tensor;
922924
buffer->interface.free_data = ggml_backend_metal_free_data;
923925
buffer->backend_data = wrapper;
@@ -932,7 +934,7 @@ static void ggml_backend_metal_set_tensor_async(struct ggml_backend * backend, s
932934
struct ggml_metal_buffer_wrapper * wrapper = (struct ggml_metal_buffer_wrapper *)tensor->extra;
933935
char * contents = (char *)[wrapper->buffer contents];
934936

935-
const size_t t_data = (size_t) tensor->data;
937+
const size_t t_data = (size_t) tensor->data - (size_t) g_ptr_base;
936938

937939
printf("XXXXXXXXXXXXXXX SET : %p %p %p offset = %zu\n", (void *)(tensor->data), (void *)&wrapper->buffer, (void *)contents, offset);
938940

@@ -945,12 +947,13 @@ static void ggml_backend_metal_set_tensor_async(struct ggml_backend * backend, s
945947

946948
static void ggml_backend_metal_get_tensor_async(struct ggml_backend * backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
947949
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
950+
printf("XXXXXXXXXXXXXXX GET : %d %p\n", (void *)(tensor->data), (void *)tensor->extra);
948951
GGML_ASSERT(tensor->extra != nil && "tensor not allocated");
949952

950953
struct ggml_metal_buffer_wrapper * wrapper = (struct ggml_metal_buffer_wrapper *)tensor->extra;
951954
char * contents = (char *)[wrapper->buffer contents];
952955

953-
const size_t t_data = (size_t) tensor->data;
956+
const size_t t_data = (size_t) tensor->data - (size_t) g_ptr_base;
954957

955958
printf("XXXXXXXXXXXXXXX GET : %p %p %p offset = %zu\n", (void *)(tensor->data), (void *)&wrapper->buffer, (void *)contents, offset);
956959

llama.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1370,10 +1370,10 @@ static ggml_graph_splits llama_build_graph(
13701370
struct ggml_tensor * tmpv = ggml_mul_mat(ctx_l, model.layers[il].wv, cur);
13711371
ggml_set_name(tmpv, "tmpv");
13721372

1373-
struct ggml_tensor * Kcur = ggml_rope_custom_inplace(ctx_l, ggml_reshape_3d(ctx_l, tmpk, n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, 0);
1373+
struct ggml_tensor * Kcur = ggml_rope(ctx_l, ggml_reshape_3d(ctx_l, tmpk, n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
13741374
ggml_set_name(Kcur, "Kcur");
13751375

1376-
struct ggml_tensor * Qcur = ggml_rope_custom_inplace(ctx_l, ggml_reshape_3d(ctx_l, tmpq, n_embd/n_head, n_head, N), n_past, n_rot, 0, freq_base, freq_scale, 0);
1376+
struct ggml_tensor * Qcur = ggml_rope(ctx_l, ggml_reshape_3d(ctx_l, tmpq, n_embd/n_head, n_head, N), n_past, n_rot, 0, 0);
13771377
ggml_set_name(Qcur, "Qcur");
13781378

13791379
struct ggml_tensor * Vcur = ggml_transpose(ctx_l, ggml_reshape_2d(ctx_l, tmpv, n_embd, N));

0 commit comments

Comments
 (0)