Skip to content

Commit 8ecdd36

Browse files
max-krasnyanskyfmz
authored and
fmz
committed
bench: create fresh threadpool for each test
For benchmarking it's better to start a fresh pool for each test with the exact number of threads needed for that test. Having larger pools is suboptimal (causes more load, etc).
1 parent b32512a commit 8ecdd36

File tree

1 file changed

+18
-24
lines changed

1 file changed

+18
-24
lines changed

examples/llama-bench/llama-bench.cpp

+18-24
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,6 @@ static void print_usage(int /* argc */, char ** argv) {
291291
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
292292
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
293293
printf(" --numa <distribute|isolate|numactl> (default: disabled)\n");
294-
printf(" -mt, --max-threads <n> (default: %d)\n", cmd_params_defaults.cpuparams.n_threads);
295294
printf(" -C, --cpu-mask <hex> (default: 0x0)\n");
296295
printf(" --cpu-strict <0|1> (default: %d)\n", cmd_params_defaults.cpuparams.strict_cpu);
297296
printf(" --priority <0|1|2|3> (default: %d)\n", cmd_params_defaults.cpuparams.priority);
@@ -499,12 +498,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
499498
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
500499
else { invalid_param = true; break; }
501500
}
502-
} else if (arg == "-mt" || arg == "--max-threads") {
503-
if (++i >= argc) {
504-
invalid_param = true;
505-
break;
506-
}
507-
params.cpuparams.n_threads = std::stoi(argv[i]);
508501
} else if (arg == "-C" || arg == "--cpu-mask") {
509502
if (++i >= argc) {
510503
invalid_param = true;
@@ -1435,21 +1428,6 @@ int main(int argc, char ** argv) {
14351428

14361429
postprocess_cpu_params(params.cpuparams);
14371430

1438-
struct ggml_threadpool_params tpp;
1439-
tpp.n_threads = params.cpuparams.n_threads;
1440-
tpp.mask_specified = params.cpuparams.mask_valid;
1441-
tpp.strict_cpu = params.cpuparams.strict_cpu;
1442-
tpp.prio = params.cpuparams.priority;
1443-
tpp.poll = params.cpuparams.poll;
1444-
1445-
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1446-
1447-
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
1448-
if (!threadpool) {
1449-
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
1450-
exit(1);
1451-
}
1452-
14531431
for (const auto & inst : params_instances) {
14541432
// keep the same model between tests when possible
14551433
if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) {
@@ -1475,6 +1453,22 @@ int main(int argc, char ** argv) {
14751453
test t(inst, lmodel, ctx);
14761454

14771455
llama_kv_cache_clear(ctx);
1456+
1457+
struct ggml_threadpool_params tpp;
1458+
tpp.n_threads = t.n_threads;
1459+
tpp.mask_specified = params.cpuparams.mask_valid;
1460+
tpp.strict_cpu = params.cpuparams.strict_cpu;
1461+
tpp.prio = params.cpuparams.priority;
1462+
tpp.poll = params.cpuparams.poll;
1463+
1464+
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1465+
1466+
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
1467+
if (!threadpool) {
1468+
LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads);
1469+
exit(1);
1470+
}
1471+
14781472
llama_attach_threadpool(ctx, threadpool);
14791473

14801474
// warmup run
@@ -1515,9 +1509,9 @@ int main(int argc, char ** argv) {
15151509
llama_print_timings(ctx);
15161510

15171511
llama_free(ctx);
1518-
}
15191512

1520-
ggml_release_threadpool(threadpool);
1513+
ggml_release_threadpool(threadpool);
1514+
}
15211515

15221516
llama_free_model(lmodel);
15231517

0 commit comments

Comments
 (0)