Skip to content

Commit 1f31478

Browse files
author
Michael Coppola
committed
server.cpp now accepts parameter -tb N, --threads-batch N
1 parent 019ba1d commit 1f31478

File tree

1 file changed

+19
-9
lines changed

1 file changed

+19
-9
lines changed

examples/server/server.cpp

+19-9
Original file line numberDiff line numberDiff line change
@@ -774,15 +774,16 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
774774
printf("usage: %s [options]\n", argv0);
775775
printf("\n");
776776
printf("options:\n");
777-
printf(" -h, --help show this help message and exit\n");
778-
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
779-
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
780-
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
781-
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
782-
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
783-
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
784-
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
785-
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
777+
printf(" -h, --help show this help message and exit\n");
778+
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
779+
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
780+
printf(" -tb N, --threads-batch N number of threads to use during batch and prompt processing (default: same as --threads)\n");
781+
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
782+
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
783+
printf(" --rope-freq-scale N RoPE frequency scaling factor (default: loaded from model)\n");
784+
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
785+
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
786+
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
786787
if (llama_mlock_supported())
787788
{
788789
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
@@ -927,6 +928,15 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
927928
}
928929
params.n_threads = std::stoi(argv[i]);
929930
}
931+
else if (arg == "--threads-batch" || arg == "-tb")
932+
{
933+
if (++i >= argc)
934+
{
935+
invalid_param = true;
936+
break;
937+
}
938+
params.n_threads_batch = std::stoi(argv[i]);
939+
}
930940
else if (arg == "-b" || arg == "--batch-size")
931941
{
932942
if (++i >= argc)

0 commit comments

Comments
 (0)