Skip to content

Commit 30042ea

Browse files
ggerganovarthw
authored andcommitted
ggml : alloc ggml_contexts on the heap (whisper/2525)
1 parent b5ec3ef commit 30042ea

File tree

2 files changed

+21
-49
lines changed

2 files changed

+21
-49
lines changed

ggml/include/ggml.h

+4-3
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,6 @@
217217

218218
#define GGML_MAX_DIMS 4
219219
#define GGML_MAX_PARAMS 2048
220-
#define GGML_MAX_CONTEXTS 64
221220
#define GGML_MAX_SRC 10
222221
#define GGML_MAX_N_THREADS 512
223222
#define GGML_MAX_OP_PARAMS 64
@@ -657,6 +656,7 @@ extern "C" {
657656
};
658657

659658
// scratch buffer
659+
// TODO: deprecate and remove
660660
struct ggml_scratch {
661661
size_t offs;
662662
size_t size;
@@ -760,8 +760,9 @@ extern "C" {
760760

761761
// main
762762

763-
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
764-
GGML_API void ggml_free(struct ggml_context * ctx);
763+
GGML_API struct ggml_context * ggml_init (struct ggml_init_params params);
764+
GGML_API void ggml_reset(struct ggml_context * ctx);
765+
GGML_API void ggml_free (struct ggml_context * ctx);
765766

766767
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
767768

ggml/src/ggml.c

+17-46
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,7 @@ void ggml_abort(const char * file, int line, const char * fmt, ...) {
306306
}
307307

308308
#define GGML_DEBUG 0
309+
309310
#define GGML_GELU_FP16
310311
#define GGML_GELU_QUICK_FP16
311312

@@ -2014,7 +2015,7 @@ static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
20142015

20152016
struct ggml_context {
20162017
size_t mem_size;
2017-
void* mem_buffer;
2018+
void * mem_buffer;
20182019
bool mem_buffer_owned;
20192020
bool no_alloc;
20202021
bool no_alloc_save; // this is used to save the no_alloc state when using scratch buffers
@@ -3263,7 +3264,6 @@ struct ggml_numa_nodes {
32633264
//
32643265

32653266
struct ggml_state {
3266-
struct ggml_context_container contexts[GGML_MAX_CONTEXTS];
32673267
struct ggml_numa_nodes numa;
32683268
};
32693269

@@ -3845,7 +3845,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38453845
const uint64_t t_start = ggml_time_us(); UNUSED(t_start);
38463846

38473847
g_state = (struct ggml_state) {
3848-
/*.contexts =*/ { { 0 } },
38493848
/*.numa =*/ {
38503849
.n_nodes = 0,
38513850
.total_cpus = 0,
@@ -3864,26 +3863,9 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
38643863
is_first_call = false;
38653864
}
38663865

3867-
// find non-used context in g_state
3868-
struct ggml_context * ctx = NULL;
3869-
3870-
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3871-
if (!g_state.contexts[i].used) {
3872-
g_state.contexts[i].used = true;
3873-
ctx = &g_state.contexts[i].context;
3874-
3875-
GGML_PRINT_DEBUG("%s: found unused context %d\n", __func__, i);
3876-
break;
3877-
}
3878-
}
3879-
3880-
if (ctx == NULL) {
3881-
GGML_PRINT_DEBUG("%s: no unused context found\n", __func__);
3882-
3883-
ggml_critical_section_end();
3866+
ggml_critical_section_end();
38843867

3885-
return NULL;
3886-
}
3868+
struct ggml_context * ctx = GGML_MALLOC(sizeof(struct ggml_context));
38873869

38883870
// allow to call ggml_init with 0 size
38893871
if (params.mem_size == 0) {
@@ -3911,42 +3893,31 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
39113893

39123894
GGML_PRINT_DEBUG("%s: context initialized\n", __func__);
39133895

3914-
ggml_critical_section_end();
3915-
39163896
return ctx;
39173897
}
39183898

3919-
void ggml_free(struct ggml_context * ctx) {
3899+
void ggml_reset(struct ggml_context * ctx) {
39203900
if (ctx == NULL) {
39213901
return;
39223902
}
39233903

3924-
// make this function thread safe
3925-
ggml_critical_section_start();
3926-
3927-
bool found = false;
3928-
3929-
for (int i = 0; i < GGML_MAX_CONTEXTS; i++) {
3930-
if (&g_state.contexts[i].context == ctx) {
3931-
g_state.contexts[i].used = false;
3932-
3933-
GGML_PRINT_DEBUG("%s: context %d has been freed. memory used = %zu\n",
3934-
__func__, i, ggml_used_mem(ctx));
3935-
3936-
if (ctx->mem_buffer_owned) {
3937-
ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
3938-
}
3904+
ctx->n_objects = 0;
3905+
ctx->objects_begin = NULL;
3906+
ctx->objects_end = NULL;
3907+
ctx->scratch = (struct ggml_scratch) { 0, 0, NULL, };
3908+
ctx->scratch_save = (struct ggml_scratch) { 0, 0, NULL, };
3909+
}
39393910

3940-
found = true;
3941-
break;
3942-
}
3911+
void ggml_free(struct ggml_context * ctx) {
3912+
if (ctx == NULL) {
3913+
return;
39433914
}
39443915

3945-
if (!found) {
3946-
GGML_PRINT_DEBUG("%s: context not found\n", __func__);
3916+
if (ctx->mem_buffer_owned) {
3917+
ggml_aligned_free(ctx->mem_buffer, ctx->mem_size);
39473918
}
39483919

3949-
ggml_critical_section_end();
3920+
GGML_FREE(ctx);
39503921
}
39513922

39523923
size_t ggml_used_mem(const struct ggml_context * ctx) {

0 commit comments

Comments
 (0)