Skip to content

Commit c6328bc

Browse files
threadpool: futher api cleanup and prep for future refactoring
All threadpool related functions and structs use ggml_threadpool prefix.
1 parent e3c2202 commit c6328bc

File tree

8 files changed

+56
-56
lines changed

8 files changed

+56
-56
lines changed

examples/llama-bench/llama-bench.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -1531,7 +1531,7 @@ int main(int argc, char ** argv) {
15311531
tpp.poll = t.poll;
15321532
tpp.prio = params.prio;
15331533

1534-
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
1534+
struct ggml_threadpool* threadpool = ggml_threadpool_create(&tpp);
15351535
if (!threadpool) {
15361536
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
15371537
exit(1);
@@ -1578,7 +1578,7 @@ int main(int argc, char ** argv) {
15781578

15791579
llama_free(ctx);
15801580

1581-
ggml_release_threadpool(threadpool);
1581+
ggml_threadpool_release(threadpool);
15821582
}
15831583

15841584
llama_free_model(lmodel);

examples/main/main.cpp

+5-5
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,9 @@ int main(int argc, char ** argv) {
232232

233233
set_process_priority(params.cpuparams.priority);
234234

235-
struct ggml_compute_threadpool * threadpool_batch = NULL;
235+
struct ggml_threadpool * threadpool_batch = NULL;
236236
if (!ggml_threadpool_params_match(&tpp, &tpp_batch)) {
237-
threadpool_batch = ggml_create_threadpool(&tpp_batch);
237+
threadpool_batch = ggml_threadpool_create(&tpp_batch);
238238
if (!threadpool_batch) {
239239
LOG_TEE("%s: batch threadpool create failed : n_threads %d\n", __func__, tpp_batch.n_threads);
240240
exit(1);
@@ -244,7 +244,7 @@ int main(int argc, char ** argv) {
244244
tpp.paused = true;
245245
}
246246

247-
struct ggml_compute_threadpool * threadpool = ggml_create_threadpool(&tpp);
247+
struct ggml_threadpool * threadpool = ggml_threadpool_create(&tpp);
248248
if (!threadpool) {
249249
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
250250
exit(1);
@@ -1023,8 +1023,8 @@ int main(int argc, char ** argv) {
10231023
llama_sampling_free(ctx_sampling);
10241024
llama_backend_free();
10251025

1026-
ggml_release_threadpool(threadpool);
1027-
ggml_release_threadpool(threadpool_batch);
1026+
ggml_threadpool_release(threadpool);
1027+
ggml_threadpool_release(threadpool_batch);
10281028

10291029
#ifndef LOG_DISABLE_LOGS
10301030
LOG_TEE("Log end\n");

ggml/include/ggml-backend.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ extern "C" {
102102

103103
GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
104104
GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
105-
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_compute_threadpool_t threadpool);
105+
GGML_API void ggml_backend_cpu_set_threadpool (ggml_backend_t backend_cpu, ggml_threadpool_t threadpool);
106106
GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
107107

108108
// Create a backend buffer from an existing pointer

ggml/include/ggml.h

+9-9
Original file line numberDiff line numberDiff line change
@@ -645,9 +645,9 @@ extern "C" {
645645
bool paused; // start in paused state
646646
};
647647

648-
struct ggml_compute_threadpool; // forward declaration, see ggml.c
648+
struct ggml_threadpool; // forward declaration, see ggml.c
649649

650-
typedef struct ggml_compute_threadpool * ggml_compute_threadpool_t;
650+
typedef struct ggml_threadpool * ggml_threadpool_t;
651651

652652
// the compute plan that needs to be prepared for ggml_graph_compute()
653653
// since https://github.com/ggerganov/ggml/issues/287
@@ -656,7 +656,7 @@ extern "C" {
656656
uint8_t * work_data; // work buffer, to be allocated by caller before calling to `ggml_graph_compute()`
657657

658658
int n_threads;
659-
struct ggml_compute_threadpool * threadpool;
659+
struct ggml_threadpool * threadpool;
660660

661661
// abort ggml_graph_compute when true
662662
ggml_abort_callback abort_callback;
@@ -2039,18 +2039,18 @@ extern "C" {
20392039
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
20402040
GGML_API void ggml_threadpool_params_init(struct ggml_threadpool_params *p, int n_threads);
20412041
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
2042-
GGML_API struct ggml_compute_threadpool* ggml_create_threadpool (struct ggml_threadpool_params * params);
2043-
GGML_API void ggml_release_threadpool (struct ggml_compute_threadpool * threadpool);
2044-
GGML_API int ggml_threadpool_get_n_threads(struct ggml_compute_threadpool * threadpool);
2045-
GGML_API void ggml_pause_threadpool (struct ggml_compute_threadpool * threadpool);
2046-
GGML_API void ggml_resume_threadpool (struct ggml_compute_threadpool * threadpool);
2042+
GGML_API struct ggml_threadpool* ggml_threadpool_create (struct ggml_threadpool_params * params);
2043+
GGML_API void ggml_threadpool_release (struct ggml_threadpool * threadpool);
2044+
GGML_API int ggml_threadpool_get_n_threads(struct ggml_threadpool * threadpool);
2045+
GGML_API void ggml_threadpool_pause (struct ggml_threadpool * threadpool);
2046+
GGML_API void ggml_threadpool_resume (struct ggml_threadpool * threadpool);
20472047

20482048
// ggml_graph_plan() has to be called before ggml_graph_compute()
20492049
// when plan.work_size > 0, caller must allocate memory for plan.work_data
20502050
GGML_API struct ggml_cplan ggml_graph_plan(
20512051
const struct ggml_cgraph * cgraph,
20522052
int n_threads, /* = GGML_DEFAULT_N_THREADS */
2053-
struct ggml_compute_threadpool * threadpool /* = NULL */ );
2053+
struct ggml_threadpool * threadpool /* = NULL */ );
20542054
GGML_API enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan);
20552055

20562056
// same as ggml_graph_compute() but the work data is allocated as a part of the context

ggml/src/ggml-backend.c

+3-3
Original file line numberDiff line numberDiff line change
@@ -723,7 +723,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
723723

724724
struct ggml_backend_cpu_context {
725725
int n_threads;
726-
ggml_compute_threadpool_t threadpool;
726+
ggml_threadpool_t threadpool;
727727

728728
void * work_data;
729729
size_t work_size;
@@ -906,14 +906,14 @@ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
906906
ctx->n_threads = n_threads;
907907
}
908908

909-
void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_compute_threadpool_t threadpool) {
909+
void ggml_backend_cpu_set_threadpool(ggml_backend_t backend_cpu, ggml_threadpool_t threadpool) {
910910
GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
911911

912912
struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
913913

914914
if (ctx->threadpool && ctx->threadpool != threadpool) {
915915
// already had a different threadpool, pause/suspend it before switching
916-
ggml_pause_threadpool(ctx->threadpool);
916+
ggml_threadpool_pause(ctx->threadpool);
917917
}
918918
ctx->threadpool = threadpool;
919919
}

ggml/src/ggml.c

+27-27
Original file line numberDiff line numberDiff line change
@@ -1955,7 +1955,7 @@ typedef pthread_mutex_t ggml_mutex_t;
19551955
#endif
19561956

19571957
// Threadpool def
1958-
struct ggml_compute_threadpool {
1958+
struct ggml_threadpool {
19591959
ggml_mutex_t mutex; // mutex for cond.var
19601960
ggml_cond_t cond; // cond.var for waiting for new work
19611961

@@ -1990,7 +1990,7 @@ struct ggml_compute_state {
19901990
int last_graph;
19911991
bool pending;
19921992
#endif
1993-
struct ggml_compute_threadpool * threadpool;
1993+
struct ggml_threadpool * threadpool;
19941994
int ith;
19951995
};
19961996

@@ -2002,7 +2002,7 @@ struct ggml_compute_params {
20022002
size_t wsize;
20032003
void * wdata;
20042004

2005-
struct ggml_compute_threadpool * threadpool;
2005+
struct ggml_threadpool * threadpool;
20062006
};
20072007

20082008
//
@@ -3110,15 +3110,15 @@ inline static void ggml_critical_section_start(void) {
31103110
}
31113111

31123112
#ifdef GGML_USE_OPENMP
3113-
static void ggml_barrier(struct ggml_compute_threadpool * threadpool) {
3113+
static void ggml_barrier(struct ggml_threadpool * threadpool) {
31143114
if (threadpool->n_threads_cur == 1) {
31153115
return;
31163116
}
31173117

31183118
#pragma omp barrier
31193119
}
31203120
#else
3121-
static void ggml_barrier(struct ggml_compute_threadpool * threadpool) {
3121+
static void ggml_barrier(struct ggml_threadpool * threadpool) {
31223122
if (threadpool->n_threads_cur == 1) {
31233123
return;
31243124
}
@@ -18837,7 +18837,7 @@ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask
1883718837
}
1883818838
}
1883918839

18840-
void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
18840+
void ggml_threadpool_release(struct ggml_threadpool* threadpool) {
1884118841
if (!threadpool) return;
1884218842

1884318843
#ifndef GGML_USE_OPENMP
@@ -18868,36 +18868,36 @@ void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
1886818868

1886918869
#ifndef GGML_USE_OPENMP
1887018870
// pause/resume must be called under mutex
18871-
static void ggml_pause_threadpool_locked(struct ggml_compute_threadpool * threadpool) {
18871+
static void ggml_threadpool_pause_locked(struct ggml_threadpool * threadpool) {
1887218872
GGML_PRINT_DEBUG("Pausing threadpool\n");
1887318873
threadpool->pause = true;
1887418874
ggml_cond_broadcast(&threadpool->cond);
1887518875
}
1887618876

18877-
static void ggml_resume_threadpool_locked(struct ggml_compute_threadpool * threadpool) {
18877+
static void ggml_threadpool_resume_locked(struct ggml_threadpool * threadpool) {
1887818878
GGML_PRINT_DEBUG("Resuming threadpool\n");
1887918879
threadpool->pause = false;
1888018880
ggml_cond_broadcast(&threadpool->cond);
1888118881
}
1888218882
#endif
1888318883

18884-
void ggml_pause_threadpool(struct ggml_compute_threadpool * threadpool) {
18884+
void ggml_threadpool_pause(struct ggml_threadpool * threadpool) {
1888518885
#ifndef GGML_USE_OPENMP
1888618886
ggml_mutex_lock(&threadpool->mutex);
1888718887
if (!threadpool->pause) {
18888-
ggml_pause_threadpool_locked(threadpool);
18888+
ggml_threadpool_pause_locked(threadpool);
1888918889
}
1889018890
ggml_mutex_unlock(&threadpool->mutex);
1889118891
#else
1889218892
UNUSED(threadpool);
1889318893
#endif
1889418894
}
1889518895

18896-
void ggml_resume_threadpool(struct ggml_compute_threadpool * threadpool) {
18896+
void ggml_threadpool_resume(struct ggml_threadpool * threadpool) {
1889718897
#ifndef GGML_USE_OPENMP
1889818898
ggml_mutex_lock(&threadpool->mutex);
1889918899
if (threadpool->pause) {
18900-
ggml_resume_threadpool_locked(threadpool);
18900+
ggml_threadpool_resume_locked(threadpool);
1890118901
}
1890218902
ggml_mutex_unlock(&threadpool->mutex);
1890318903
#else
@@ -18908,7 +18908,7 @@ void ggml_resume_threadpool(struct ggml_compute_threadpool * threadpool) {
1890818908
struct ggml_cplan ggml_graph_plan(
1890918909
const struct ggml_cgraph * cgraph,
1891018910
int n_threads,
18911-
struct ggml_compute_threadpool * threadpool) {
18911+
struct ggml_threadpool * threadpool) {
1891218912

1891318913
if (threadpool == NULL) {
1891418914
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
@@ -19119,7 +19119,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
1911919119
#ifndef GGML_USE_OPENMP
1912019120

1912119121
static inline bool ggml_graph_compute_ready(struct ggml_compute_state * state) {
19122-
struct ggml_compute_threadpool * threadpool = state->threadpool;
19122+
struct ggml_threadpool * threadpool = state->threadpool;
1912319123

1912419124
if (state->pending || threadpool->stop || threadpool->pause) { return true; }
1912519125

@@ -19134,7 +19134,7 @@ static inline bool ggml_graph_compute_ready(struct ggml_compute_state * state) {
1913419134
}
1913519135

1913619136
static inline bool ggml_graph_compute_poll_for_work(struct ggml_compute_state * state) {
19137-
struct ggml_compute_threadpool * threadpool = state->threadpool;
19137+
struct ggml_threadpool * threadpool = state->threadpool;
1913819138

1913919139
// This seems to make 0 ... 100 a decent range for polling level across modern processors.
1914019140
// Perhaps, we can adjust it dynamically based on load and things.
@@ -19149,7 +19149,7 @@ static inline bool ggml_graph_compute_poll_for_work(struct ggml_compute_state *
1914919149
}
1915019150

1915119151
static inline bool ggml_graph_compute_check_for_work(struct ggml_compute_state * state) {
19152-
struct ggml_compute_threadpool * threadpool = state->threadpool;
19152+
struct ggml_threadpool * threadpool = state->threadpool;
1915319153

1915419154
if (ggml_graph_compute_poll_for_work(state)) {
1915519155
return state->pending;
@@ -19168,7 +19168,7 @@ static inline bool ggml_graph_compute_check_for_work(struct ggml_compute_state *
1916819168

1916919169
static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1917019170
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
19171-
struct ggml_compute_threadpool * threadpool = state->threadpool;
19171+
struct ggml_threadpool * threadpool = state->threadpool;
1917219172

1917319173
ggml_thread_apply_priority(threadpool->prio);
1917419174
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
@@ -19205,7 +19205,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1920519205
}
1920619206

1920719207
// Start processing new graph
19208-
static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpool)
19208+
static void ggml_graph_compute_kickoff(struct ggml_threadpool * threadpool)
1920919209
{
1921019210
// always take the mutex here because the worker threads are doing hybrid poll/wait
1921119211

@@ -19221,7 +19221,7 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
1922119221
}
1922219222

1922319223
// resume does cond broadcast
19224-
ggml_resume_threadpool_locked(threadpool);
19224+
ggml_threadpool_resume_locked(threadpool);
1922519225
} else {
1922619226
ggml_cond_broadcast(&threadpool->cond);
1922719227
}
@@ -19254,13 +19254,13 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
1925419254
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
1925519255
}
1925619256

19257-
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19257+
static struct ggml_threadpool * ggml_threadpool_create_impl(
1925819258
struct ggml_threadpool_params * tpp,
1925919259
struct ggml_cgraph * cgraph,
1926019260
struct ggml_cplan * cplan) {
1926119261

19262-
struct ggml_compute_threadpool * threadpool =
19263-
GGML_ALIGNED_MALLOC(sizeof(struct ggml_compute_threadpool));
19262+
struct ggml_threadpool * threadpool =
19263+
GGML_ALIGNED_MALLOC(sizeof(struct ggml_threadpool));
1926419264
{
1926519265
threadpool->cgraph = cgraph;
1926619266
threadpool->cplan = cplan;
@@ -19320,8 +19320,8 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
1932019320
return threadpool;
1932119321
}
1932219322

19323-
struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_params * tpp) {
19324-
return ggml_create_threadpool_impl(tpp, NULL, NULL);
19323+
struct ggml_threadpool * ggml_threadpool_create(struct ggml_threadpool_params * tpp) {
19324+
return ggml_threadpool_create_impl(tpp, NULL, NULL);
1932519325
}
1932619326

1932719327
enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
@@ -19330,7 +19330,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1933019330
GGML_ASSERT(cplan->work_size == 0 || cplan->work_data != NULL);
1933119331

1933219332
int n_threads = cplan->n_threads;
19333-
struct ggml_compute_threadpool * threadpool = cplan->threadpool;
19333+
struct ggml_threadpool * threadpool = cplan->threadpool;
1933419334

1933519335
bool disposable_threadpool = false;
1933619336

@@ -19339,7 +19339,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1933919339
disposable_threadpool = true;
1934019340

1934119341
struct ggml_threadpool_params ttp = ggml_threadpool_params_default(n_threads);
19342-
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
19342+
threadpool = ggml_threadpool_create_impl(&ttp, cgraph, cplan);
1934319343
} else {
1934419344
// Reset some of the parameters that need resetting
1934519345
// No worker threads should be accessing the parameters below at this stage
@@ -19384,7 +19384,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1938419384
enum ggml_status ret = threadpool->ec;
1938519385

1938619386
if (disposable_threadpool) {
19387-
ggml_release_threadpool(threadpool);
19387+
ggml_threadpool_release(threadpool);
1938819388
}
1938919389

1939019390
return ret;

include/llama.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -431,8 +431,8 @@ extern "C" {
431431
// Optional: an auto threadpool gets created in ggml if not passed explicitly
432432
LLAMA_API void llama_attach_threadpool(
433433
struct llama_context * ctx,
434-
ggml_compute_threadpool_t threadpool,
435-
ggml_compute_threadpool_t threadpool_batch);
434+
ggml_threadpool_t threadpool,
435+
ggml_threadpool_t threadpool_batch);
436436
LLAMA_API void llama_detach_threadpool(struct llama_context * ctx);
437437

438438
// Call once at the end of the program - currently only used for MPI

0 commit comments

Comments
 (0)