@@ -291,7 +291,6 @@ static void print_usage(int /* argc */, char ** argv) {
291
291
printf (" -fa, --flash-attn <0|1> (default: %s)\n " , join (cmd_params_defaults.flash_attn , " ," ).c_str ());
292
292
printf (" -mmp, --mmap <0|1> (default: %s)\n " , join (cmd_params_defaults.use_mmap , " ," ).c_str ());
293
293
printf (" --numa <distribute|isolate|numactl> (default: disabled)\n " );
294
- printf (" -mt, --max-threads <n> (default: %d)\n " , cmd_params_defaults.cpuparams .n_threads );
295
294
printf (" -C, --cpu-mask <hex> (default: 0x0)\n " );
296
295
printf (" --cpu-strict <0|1> (default: %d)\n " , cmd_params_defaults.cpuparams .strict_cpu );
297
296
printf (" --priority <0|1|2|3> (default: %d)\n " , cmd_params_defaults.cpuparams .priority );
@@ -499,12 +498,6 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
499
498
else if (value == " numactl" ) { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
500
499
else { invalid_param = true ; break ; }
501
500
}
502
- } else if (arg == " -mt" || arg == " --max-threads" ) {
503
- if (++i >= argc) {
504
- invalid_param = true ;
505
- break ;
506
- }
507
- params.cpuparams .n_threads = std::stoi (argv[i]);
508
501
} else if (arg == " -C" || arg == " --cpu-mask" ) {
509
502
if (++i >= argc) {
510
503
invalid_param = true ;
@@ -1435,21 +1428,6 @@ int main(int argc, char ** argv) {
1435
1428
1436
1429
postprocess_cpu_params (params.cpuparams );
1437
1430
1438
- struct ggml_threadpool_params tpp;
1439
- tpp.n_threads = params.cpuparams .n_threads ;
1440
- tpp.mask_specified = params.cpuparams .mask_valid ;
1441
- tpp.strict_cpu = params.cpuparams .strict_cpu ;
1442
- tpp.prio = params.cpuparams .priority ;
1443
- tpp.poll = params.cpuparams .poll ;
1444
-
1445
- std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1446
-
1447
- struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1448
- if (!threadpool) {
1449
- LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1450
- exit (1 );
1451
- }
1452
-
1453
1431
for (const auto & inst : params_instances) {
1454
1432
// keep the same model between tests when possible
1455
1433
if (!lmodel || !prev_inst || !inst.equal_mparams (*prev_inst)) {
@@ -1475,6 +1453,22 @@ int main(int argc, char ** argv) {
1475
1453
test t (inst, lmodel, ctx);
1476
1454
1477
1455
llama_kv_cache_clear (ctx);
1456
+
1457
+ struct ggml_threadpool_params tpp;
1458
+ tpp.n_threads = t.n_threads ;
1459
+ tpp.mask_specified = params.cpuparams .mask_valid ;
1460
+ tpp.strict_cpu = params.cpuparams .strict_cpu ;
1461
+ tpp.prio = params.cpuparams .priority ;
1462
+ tpp.poll = params.cpuparams .poll ;
1463
+
1464
+ std::memcpy (&tpp.cpumask [0 ], ¶ms.cpuparams .cpumask [0 ], GGML_MAX_N_THREADS);
1465
+
1466
+ struct ggml_compute_threadpool * threadpool = ggml_create_threadpool (&tpp);
1467
+ if (!threadpool) {
1468
+ LOG_TEE (" %s: threadpool create failed : n_threads %d\n " , __func__, tpp.n_threads );
1469
+ exit (1 );
1470
+ }
1471
+
1478
1472
llama_attach_threadpool (ctx, threadpool);
1479
1473
1480
1474
// warmup run
@@ -1515,9 +1509,9 @@ int main(int argc, char ** argv) {
1515
1509
llama_print_timings (ctx);
1516
1510
1517
1511
llama_free (ctx);
1518
- }
1519
1512
1520
- ggml_release_threadpool (threadpool);
1513
+ ggml_release_threadpool (threadpool);
1514
+ }
1521
1515
1522
1516
llama_free_model (lmodel);
1523
1517
0 commit comments