Skip to content

Commit f643120

Browse files
authored
docker: add perplexity and bench commands to full image (#11438)
Signed-off-by: rare-magma <[email protected]>
1 parent 6e84b0a commit f643120

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

.devops/tools.sh

+9-1
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,13 @@ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
1313
exec ./llama-quantize "$@"
1414
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
1515
exec ./llama-cli "$@"
16+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17+
exec ./llama-bench "$@"
18+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19+
exec ./llama-perplexity "$@"
1620
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
1721
echo "Converting PTH to GGML..."
18-
for i in `ls $1/$2/ggml-model-f16.bin*`; do
22+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
1923
if [ -f "${i/f16/q4_0}" ]; then
2024
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
2125
else
@@ -30,6 +34,10 @@ else
3034
echo "Available commands: "
3135
echo " --run (-r): Run a model previously converted into ggml"
3236
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38+
echo " ex: -m model.gguf"
39+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40+
echo " ex: -m model.gguf -f file.txt"
3341
echo " --convert (-c): Convert a llama model into ggml"
3442
echo " ex: --outtype f16 \"/models/7B/\" "
3543
echo " --quantize (-q): Optimize with quantization process ggml"

0 commit comments

Comments
 (0)