Skip to content

Commit 5d500e8

Browse files
authored
ci : add 7B CUDA tests (ggml-org#2319)
* ci : add 7B CUDA tests ggml-ci * ci : add Q2_K to the tests * ci : bump CUDA ppl chunks ggml-ci * ci : increase CUDA TG len + add --ignore-eos * ci : reduce CUDA ppl cunks down to 4 to save time
1 parent 7d5f184 commit 5d500e8

File tree

2 files changed

+168
-16
lines changed

2 files changed

+168
-16
lines changed

ci/README.md

+5
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,10 @@ It is a good practice, before publishing changes to execute the full CI locally
1616

1717
```bash
1818
mkdir tmp
19+
20+
# CPU-only build
1921
bash ./ci/run.sh ./tmp/results ./tmp/mnt
22+
23+
# with CUDA support
24+
GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
2025
```

ci/run.sh

+163-16
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,15 @@
11
#/bin/bash
2+
#
3+
# sample usage:
4+
#
5+
# mkdir tmp
6+
#
7+
# # CPU-only build
8+
# bash ./ci/run.sh ./tmp/results ./tmp/mnt
9+
#
10+
# # with CUDA support
11+
# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12+
#
213

314
if [ -z "$2" ]; then
415
echo "usage: $0 <output-dir> <mnt-dir>"
@@ -101,7 +112,7 @@ function gg_run_ctest_release {
101112
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
102113
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
103114

104-
if [ -z $GG_BUILD_LOW_PERF ]; then
115+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
105116
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT/${ci}-ctest.log
106117
else
107118
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
@@ -154,6 +165,7 @@ function gg_run_open_llama_3b_v2 {
154165
model_q4_1="${path_models}/ggml-model-q4_1.bin"
155166
model_q5_0="${path_models}/ggml-model-q5_0.bin"
156167
model_q5_1="${path_models}/ggml-model-q5_1.bin"
168+
model_q2_k="${path_models}/ggml-model-q2_k.bin"
157169
model_q3_k="${path_models}/ggml-model-q3_k.bin"
158170
model_q4_k="${path_models}/ggml-model-q4_k.bin"
159171
model_q5_k="${path_models}/ggml-model-q5_k.bin"
@@ -166,28 +178,31 @@ function gg_run_open_llama_3b_v2 {
166178
./bin/quantize ${model_f16} ${model_q4_1} q4_1
167179
./bin/quantize ${model_f16} ${model_q5_0} q5_0
168180
./bin/quantize ${model_f16} ${model_q5_1} q5_1
181+
./bin/quantize ${model_f16} ${model_q2_k} q2_k
169182
./bin/quantize ${model_f16} ${model_q3_k} q3_k
170183
./bin/quantize ${model_f16} ${model_q4_k} q4_k
171184
./bin/quantize ${model_f16} ${model_q5_k} q5_k
172185
./bin/quantize ${model_f16} ${model_q6_k} q6_k
173186

174-
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
175-
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
176-
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
177-
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
178-
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
179-
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
180-
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
181-
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
182-
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
183-
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
187+
(time ./bin/main --model ${model_f16} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
188+
(time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
189+
(time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
190+
(time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
191+
(time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
192+
(time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
193+
(time ./bin/main --model ${model_q2_k} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
194+
(time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
195+
(time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
196+
(time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
197+
(time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p --ignore-eos "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
184198

185199
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
186200
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
187201
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
188202
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
189203
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
190204
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
205+
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
191206
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
192207
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
193208
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
@@ -212,6 +227,7 @@ function gg_run_open_llama_3b_v2 {
212227
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
213228
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
214229
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
230+
check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
215231
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
216232
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
217233
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
@@ -232,6 +248,133 @@ function gg_sum_open_llama_3b_v2 {
232248
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
233249
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
234250
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
251+
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
252+
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
253+
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
254+
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
255+
gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)"
256+
}
257+
258+
# open_llama_7b_v2
259+
# requires: GG_BUILD_CUDA
260+
261+
function gg_run_open_llama_7b_v2 {
262+
cd ${SRC}
263+
264+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json
265+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model
266+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json
267+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json
268+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json
269+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin
270+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
271+
gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
272+
273+
gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
274+
unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
275+
276+
path_models="../models-mnt/open-llama/7B-v2"
277+
path_wiki="../models-mnt/wikitext/wikitext-2-raw"
278+
279+
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
280+
281+
set -e
282+
283+
(time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_CUBLAS=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
284+
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
285+
286+
python3 ../convert.py ${path_models}
287+
288+
model_f16="${path_models}/ggml-model-f16.bin"
289+
model_q8_0="${path_models}/ggml-model-q8_0.bin"
290+
model_q4_0="${path_models}/ggml-model-q4_0.bin"
291+
model_q4_1="${path_models}/ggml-model-q4_1.bin"
292+
model_q5_0="${path_models}/ggml-model-q5_0.bin"
293+
model_q5_1="${path_models}/ggml-model-q5_1.bin"
294+
model_q2_k="${path_models}/ggml-model-q2_k.bin"
295+
model_q3_k="${path_models}/ggml-model-q3_k.bin"
296+
model_q4_k="${path_models}/ggml-model-q4_k.bin"
297+
model_q5_k="${path_models}/ggml-model-q5_k.bin"
298+
model_q6_k="${path_models}/ggml-model-q6_k.bin"
299+
300+
wiki_test="${path_wiki}/wiki.test.raw"
301+
302+
./bin/quantize ${model_f16} ${model_q8_0} q8_0
303+
./bin/quantize ${model_f16} ${model_q4_0} q4_0
304+
./bin/quantize ${model_f16} ${model_q4_1} q4_1
305+
./bin/quantize ${model_f16} ${model_q5_0} q5_0
306+
./bin/quantize ${model_f16} ${model_q5_1} q5_1
307+
./bin/quantize ${model_f16} ${model_q2_k} q2_k
308+
./bin/quantize ${model_f16} ${model_q3_k} q3_k
309+
./bin/quantize ${model_f16} ${model_q4_k} q4_k
310+
./bin/quantize ${model_f16} ${model_q5_k} q5_k
311+
./bin/quantize ${model_f16} ${model_q6_k} q6_k
312+
313+
(time ./bin/main --model ${model_f16} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
314+
(time ./bin/main --model ${model_q8_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
315+
(time ./bin/main --model ${model_q4_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
316+
(time ./bin/main --model ${model_q4_1} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
317+
(time ./bin/main --model ${model_q5_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
318+
(time ./bin/main --model ${model_q5_1} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
319+
(time ./bin/main --model ${model_q2_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
320+
(time ./bin/main --model ${model_q3_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
321+
(time ./bin/main --model ${model_q4_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
322+
(time ./bin/main --model ${model_q5_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
323+
(time ./bin/main --model ${model_q6_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
324+
325+
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
326+
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
327+
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
328+
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
329+
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
330+
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
331+
(time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
332+
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
333+
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
334+
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
335+
(time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
336+
337+
function check_ppl {
338+
qnt="$1"
339+
ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
340+
341+
if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then
342+
printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl"
343+
return 20
344+
fi
345+
346+
printf ' - %s @ %s OK\n' "$qnt" "$ppl"
347+
return 0
348+
}
349+
350+
check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
351+
check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
352+
check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
353+
check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
354+
check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
355+
check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
356+
check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
357+
check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
358+
check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
359+
check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
360+
check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log
361+
362+
set +e
363+
}
364+
365+
function gg_sum_open_llama_7b_v2 {
366+
gg_printf '### %s\n\n' "${ci}"
367+
368+
gg_printf 'OpenLLaMA 7B-v2:\n'
369+
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
370+
gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)"
371+
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)"
372+
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
373+
gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)"
374+
gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)"
375+
gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)"
376+
gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)"
377+
gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)"
235378
gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)"
236379
gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)"
237380
gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)"
@@ -240,7 +383,7 @@ function gg_sum_open_llama_3b_v2 {
240383

241384
## main
242385

243-
if [ -z $GG_BUILD_LOW_PERF ]; then
386+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
244387
rm -rf ${SRC}/models-mnt
245388

246389
mnt_models=${MNT}/models
@@ -252,11 +395,15 @@ fi
252395

253396
ret=0
254397

255-
#test $ret -eq 0 && gg_run ctest_debug
256-
#test $ret -eq 0 && gg_run ctest_release
398+
test $ret -eq 0 && gg_run ctest_debug
399+
test $ret -eq 0 && gg_run ctest_release
257400

258-
if [ -z $GG_BUILD_LOW_PERF ]; then
259-
test $ret -eq 0 && gg_run open_llama_3b_v2
401+
if [ -z ${GG_BUILD_LOW_PERF} ]; then
402+
if [ -z ${GG_BUILD_CUDA} ]; then
403+
test $ret -eq 0 && gg_run open_llama_3b_v2
404+
else
405+
test $ret -eq 0 && gg_run open_llama_7b_v2
406+
fi
260407
fi
261408

262409
exit $ret

0 commit comments

Comments
 (0)