1
1
# /bin/bash
2
+ #
3
+ # sample usage:
4
+ #
5
+ # mkdir tmp
6
+ #
7
+ # # CPU-only build
8
+ # bash ./ci/run.sh ./tmp/results ./tmp/mnt
9
+ #
10
+ # # with CUDA support
11
+ # GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
12
+ #
2
13
3
14
if [ -z " $2 " ]; then
4
15
echo " usage: $0 <output-dir> <mnt-dir>"
@@ -101,7 +112,7 @@ function gg_run_ctest_release {
101
112
(time cmake -DCMAKE_BUILD_TYPE=Release .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
102
113
(time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
103
114
104
- if [ -z $GG_BUILD_LOW_PERF ]; then
115
+ if [ -z ${ GG_BUILD_LOW_PERF} ]; then
105
116
(time ctest --output-on-failure ) 2>&1 | tee -a $OUT /${ci} -ctest.log
106
117
else
107
118
(time ctest --output-on-failure -E test-opt ) 2>&1 | tee -a $OUT /${ci} -ctest.log
@@ -154,6 +165,7 @@ function gg_run_open_llama_3b_v2 {
154
165
model_q4_1=" ${path_models} /ggml-model-q4_1.bin"
155
166
model_q5_0=" ${path_models} /ggml-model-q5_0.bin"
156
167
model_q5_1=" ${path_models} /ggml-model-q5_1.bin"
168
+ model_q2_k=" ${path_models} /ggml-model-q2_k.bin"
157
169
model_q3_k=" ${path_models} /ggml-model-q3_k.bin"
158
170
model_q4_k=" ${path_models} /ggml-model-q4_k.bin"
159
171
model_q5_k=" ${path_models} /ggml-model-q5_k.bin"
@@ -166,28 +178,31 @@ function gg_run_open_llama_3b_v2 {
166
178
./bin/quantize ${model_f16} ${model_q4_1} q4_1
167
179
./bin/quantize ${model_f16} ${model_q5_0} q5_0
168
180
./bin/quantize ${model_f16} ${model_q5_1} q5_1
181
+ ./bin/quantize ${model_f16} ${model_q2_k} q2_k
169
182
./bin/quantize ${model_f16} ${model_q3_k} q3_k
170
183
./bin/quantize ${model_f16} ${model_q4_k} q4_k
171
184
./bin/quantize ${model_f16} ${model_q5_k} q5_k
172
185
./bin/quantize ${model_f16} ${model_q6_k} q6_k
173
186
174
- (time ./bin/main --model ${model_f16} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
175
- (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
176
- (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
177
- (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
178
- (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
179
- (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
180
- (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
181
- (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
182
- (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
183
- (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
187
+ (time ./bin/main --model ${model_f16} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
188
+ (time ./bin/main --model ${model_q8_0} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
189
+ (time ./bin/main --model ${model_q4_0} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
190
+ (time ./bin/main --model ${model_q4_1} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
191
+ (time ./bin/main --model ${model_q5_0} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
192
+ (time ./bin/main --model ${model_q5_1} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
193
+ (time ./bin/main --model ${model_q2_k} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
194
+ (time ./bin/main --model ${model_q3_k} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
195
+ (time ./bin/main --model ${model_q4_k} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
196
+ (time ./bin/main --model ${model_q5_k} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
197
+ (time ./bin/main --model ${model_q6_k} -s 1234 -n 64 -p --ignore-eos " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
184
198
185
199
(time ./bin/perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
186
200
(time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
187
201
(time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
188
202
(time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
189
203
(time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
190
204
(time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
205
+ (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
191
206
(time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
192
207
(time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
193
208
(time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 3 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
@@ -212,6 +227,7 @@ function gg_run_open_llama_3b_v2 {
212
227
check_ppl " q4_1" " $( cat $OUT /${ci} -tg-q4_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
213
228
check_ppl " q5_0" " $( cat $OUT /${ci} -tg-q5_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
214
229
check_ppl " q5_1" " $( cat $OUT /${ci} -tg-q5_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
230
+ check_ppl " q2_k" " $( cat $OUT /${ci} -tg-q2_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
215
231
check_ppl " q3_k" " $( cat $OUT /${ci} -tg-q3_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
216
232
check_ppl " q4_k" " $( cat $OUT /${ci} -tg-q4_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
217
233
check_ppl " q5_k" " $( cat $OUT /${ci} -tg-q5_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
@@ -232,6 +248,133 @@ function gg_sum_open_llama_3b_v2 {
232
248
gg_printf ' - q4_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_1.log) "
233
249
gg_printf ' - q5_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_0.log) "
234
250
gg_printf ' - q5_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_1.log) "
251
+ gg_printf ' - q2_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q2_k.log) "
252
+ gg_printf ' - q3_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q3_k.log) "
253
+ gg_printf ' - q4_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_k.log) "
254
+ gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
255
+ gg_printf ' - q6_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q6_k.log) "
256
+ }
257
+
258
+ # open_llama_7b_v2
259
+ # requires: GG_BUILD_CUDA
260
+
261
+ function gg_run_open_llama_7b_v2 {
262
+ cd ${SRC}
263
+
264
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json
265
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model
266
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json
267
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json
268
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json
269
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin
270
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin
271
+ gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json
272
+
273
+ gg_wget models-mnt/wikitext/ https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip
274
+ unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/
275
+
276
+ path_models=" ../models-mnt/open-llama/7B-v2"
277
+ path_wiki=" ../models-mnt/wikitext/wikitext-2-raw"
278
+
279
+ rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
280
+
281
+ set -e
282
+
283
+ (time cmake -DCMAKE_BUILD_TYPE=Release -DLLAMA_CUBLAS=1 .. ) 2>&1 | tee -a $OUT /${ci} -cmake.log
284
+ (time make -j ) 2>&1 | tee -a $OUT /${ci} -make.log
285
+
286
+ python3 ../convert.py ${path_models}
287
+
288
+ model_f16=" ${path_models} /ggml-model-f16.bin"
289
+ model_q8_0=" ${path_models} /ggml-model-q8_0.bin"
290
+ model_q4_0=" ${path_models} /ggml-model-q4_0.bin"
291
+ model_q4_1=" ${path_models} /ggml-model-q4_1.bin"
292
+ model_q5_0=" ${path_models} /ggml-model-q5_0.bin"
293
+ model_q5_1=" ${path_models} /ggml-model-q5_1.bin"
294
+ model_q2_k=" ${path_models} /ggml-model-q2_k.bin"
295
+ model_q3_k=" ${path_models} /ggml-model-q3_k.bin"
296
+ model_q4_k=" ${path_models} /ggml-model-q4_k.bin"
297
+ model_q5_k=" ${path_models} /ggml-model-q5_k.bin"
298
+ model_q6_k=" ${path_models} /ggml-model-q6_k.bin"
299
+
300
+ wiki_test=" ${path_wiki} /wiki.test.raw"
301
+
302
+ ./bin/quantize ${model_f16} ${model_q8_0} q8_0
303
+ ./bin/quantize ${model_f16} ${model_q4_0} q4_0
304
+ ./bin/quantize ${model_f16} ${model_q4_1} q4_1
305
+ ./bin/quantize ${model_f16} ${model_q5_0} q5_0
306
+ ./bin/quantize ${model_f16} ${model_q5_1} q5_1
307
+ ./bin/quantize ${model_f16} ${model_q2_k} q2_k
308
+ ./bin/quantize ${model_f16} ${model_q3_k} q3_k
309
+ ./bin/quantize ${model_f16} ${model_q4_k} q4_k
310
+ ./bin/quantize ${model_f16} ${model_q5_k} q5_k
311
+ ./bin/quantize ${model_f16} ${model_q6_k} q6_k
312
+
313
+ (time ./bin/main --model ${model_f16} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
314
+ (time ./bin/main --model ${model_q8_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
315
+ (time ./bin/main --model ${model_q4_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
316
+ (time ./bin/main --model ${model_q4_1} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
317
+ (time ./bin/main --model ${model_q5_0} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
318
+ (time ./bin/main --model ${model_q5_1} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
319
+ (time ./bin/main --model ${model_q2_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
320
+ (time ./bin/main --model ${model_q3_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
321
+ (time ./bin/main --model ${model_q4_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
322
+ (time ./bin/main --model ${model_q5_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
323
+ (time ./bin/main --model ${model_q6_k} -ngl 999 -s 1234 -n 256 --ignore-eos -p " I believe the meaning of life is" ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
324
+
325
+ (time ./bin/perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-f16.log
326
+ (time ./bin/perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q8_0.log
327
+ (time ./bin/perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_0.log
328
+ (time ./bin/perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_1.log
329
+ (time ./bin/perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_0.log
330
+ (time ./bin/perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_1.log
331
+ (time ./bin/perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q2_k.log
332
+ (time ./bin/perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q3_k.log
333
+ (time ./bin/perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q4_k.log
334
+ (time ./bin/perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q5_k.log
335
+ (time ./bin/perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT /${ci} -tg-q6_k.log
336
+
337
+ function check_ppl {
338
+ qnt=" $1 "
339
+ ppl=$( echo " $2 " | grep -oE " [0-9]+\.[0-9]+" | tail -n 1)
340
+
341
+ if [ $( echo " $ppl > 20.0" | bc) -eq 1 ]; then
342
+ printf ' - %s @ %s (FAIL: ppl > 20.0)\n' " $qnt " " $ppl "
343
+ return 20
344
+ fi
345
+
346
+ printf ' - %s @ %s OK\n' " $qnt " " $ppl "
347
+ return 0
348
+ }
349
+
350
+ check_ppl " f16" " $( cat $OUT /${ci} -tg-f16.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
351
+ check_ppl " q8_0" " $( cat $OUT /${ci} -tg-q8_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
352
+ check_ppl " q4_0" " $( cat $OUT /${ci} -tg-q4_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
353
+ check_ppl " q4_1" " $( cat $OUT /${ci} -tg-q4_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
354
+ check_ppl " q5_0" " $( cat $OUT /${ci} -tg-q5_0.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
355
+ check_ppl " q5_1" " $( cat $OUT /${ci} -tg-q5_1.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
356
+ check_ppl " q2_k" " $( cat $OUT /${ci} -tg-q2_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
357
+ check_ppl " q3_k" " $( cat $OUT /${ci} -tg-q3_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
358
+ check_ppl " q4_k" " $( cat $OUT /${ci} -tg-q4_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
359
+ check_ppl " q5_k" " $( cat $OUT /${ci} -tg-q5_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
360
+ check_ppl " q6_k" " $( cat $OUT /${ci} -tg-q6_k.log | grep " ^\[1\]" ) " | tee -a $OUT /${ci} -ppl.log
361
+
362
+ set +e
363
+ }
364
+
365
+ function gg_sum_open_llama_7b_v2 {
366
+ gg_printf ' ### %s\n\n' " ${ci} "
367
+
368
+ gg_printf ' OpenLLaMA 7B-v2:\n'
369
+ gg_printf ' - status: %s\n' " $( cat $OUT /${ci} .exit) "
370
+ gg_printf ' - perplexity:\n%s\n' " $( cat $OUT /${ci} -ppl.log) "
371
+ gg_printf ' - f16: \n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-f16.log) "
372
+ gg_printf ' - q8_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q8_0.log) "
373
+ gg_printf ' - q4_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_0.log) "
374
+ gg_printf ' - q4_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_1.log) "
375
+ gg_printf ' - q5_0:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_0.log) "
376
+ gg_printf ' - q5_1:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_1.log) "
377
+ gg_printf ' - q2_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q2_k.log) "
235
378
gg_printf ' - q3_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q3_k.log) "
236
379
gg_printf ' - q4_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q4_k.log) "
237
380
gg_printf ' - q5_k:\n```\n%s\n```\n' " $( cat $OUT /${ci} -tg-q5_k.log) "
@@ -240,7 +383,7 @@ function gg_sum_open_llama_3b_v2 {
240
383
241
384
# # main
242
385
243
- if [ -z $GG_BUILD_LOW_PERF ]; then
386
+ if [ -z ${ GG_BUILD_LOW_PERF} ]; then
244
387
rm -rf ${SRC} /models-mnt
245
388
246
389
mnt_models=${MNT} /models
252
395
253
396
ret=0
254
397
255
- # test $ret -eq 0 && gg_run ctest_debug
256
- # test $ret -eq 0 && gg_run ctest_release
398
+ test $ret -eq 0 && gg_run ctest_debug
399
+ test $ret -eq 0 && gg_run ctest_release
257
400
258
- if [ -z $GG_BUILD_LOW_PERF ]; then
259
- test $ret -eq 0 && gg_run open_llama_3b_v2
401
+ if [ -z ${GG_BUILD_LOW_PERF} ]; then
402
+ if [ -z ${GG_BUILD_CUDA} ]; then
403
+ test $ret -eq 0 && gg_run open_llama_3b_v2
404
+ else
405
+ test $ret -eq 0 && gg_run open_llama_7b_v2
406
+ fi
260
407
fi
261
408
262
409
exit $ret
0 commit comments