Skip to content

Commit 2294080

Browse files
committed
Move model loading back to main.cpp
Signed-off-by: Thiago Padilha <[email protected]>
1 parent 2f29a54 commit 2294080

File tree

3 files changed

+77
-61
lines changed

3 files changed

+77
-61
lines changed

llama.cpp

+6-59
Original file line numberDiff line numberDiff line change
@@ -712,41 +712,12 @@ void sigint_handler(int signo) {
712712
}
713713
#endif
714714

715-
const char * llama_print_system_info(void) {
716-
static std::string s;
717-
718-
s = "";
719-
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
720-
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
721-
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
722-
s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
723-
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
724-
s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
725-
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
726-
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
727-
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
728-
s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
729-
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
730-
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
731-
732-
return s.c_str();
733-
}
734-
735-
int llama_main(int argc, char ** argv) {
736-
ggml_time_init();
737-
const int64_t t_main_start_us = ggml_time_us();
738-
739-
gpt_params params;
740-
params.model = "models/llama-7B/ggml-model.bin";
741-
742-
if (gpt_params_parse(argc, argv, params) == false) {
743-
return 1;
744-
}
745-
746-
if (params.n_ctx > 2048) {
747-
fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
748-
"expect poor results\n", __func__, params.n_ctx);
749-
}
715+
int llama_main(
716+
gpt_params params,
717+
gpt_vocab vocab,
718+
llama_model model,
719+
int64_t t_load_us,
720+
int64_t t_main_start_us) {
750721

751722
if (params.seed < 0) {
752723
params.seed = time(NULL);
@@ -762,30 +733,6 @@ int llama_main(int argc, char ** argv) {
762733
// params.prompt = R"(// this function checks if the number n is prime
763734
//bool is_prime(int n) {)";
764735

765-
int64_t t_load_us = 0;
766-
767-
gpt_vocab vocab;
768-
llama_model model;
769-
770-
// load the model
771-
{
772-
const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
773-
const int64_t t_start_us = ggml_time_us();
774-
if (!llama_model_load(params.model, model, vocab, params.n_ctx, memory_type)) {
775-
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
776-
return 1;
777-
}
778-
779-
t_load_us = ggml_time_us() - t_start_us;
780-
}
781-
782-
// print system information
783-
{
784-
fprintf(stderr, "\n");
785-
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
786-
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
787-
}
788-
789736
int n_past = 0;
790737

791738
int64_t t_sample_us = 0;

llama.h

+8-1
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
#include <string>
77

88
#include "ggml.h"
9+
#include "utils.h"
910

1011

1112
// default hparams (LLaMA 7B)
@@ -58,4 +59,10 @@ struct llama_model {
5859
std::map<std::string, struct ggml_tensor *> tensors;
5960
};
6061

61-
int llama_main(int argc, char ** argv);
62+
int llama_main(
63+
gpt_params params,
64+
gpt_vocab vocab,
65+
llama_model model,
66+
int64_t t_load_us,
67+
int64_t t_main_start_us);
68+
bool llama_model_load(const std::string & fname, llama_model & model, gpt_vocab & vocab, int n_ctx, ggml_type memory_type);

main.cpp

+63-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,67 @@
1+
#include "ggml.h"
2+
#include "utils.h"
13
#include "llama.h"
24

5+
const char * llama_print_system_info(void) {
6+
static std::string s;
7+
8+
s = "";
9+
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
10+
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
11+
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
12+
s += "FMA = " + std::to_string(ggml_cpu_has_fma()) + " | ";
13+
s += "NEON = " + std::to_string(ggml_cpu_has_neon()) + " | ";
14+
s += "ARM_FMA = " + std::to_string(ggml_cpu_has_arm_fma()) + " | ";
15+
s += "F16C = " + std::to_string(ggml_cpu_has_f16c()) + " | ";
16+
s += "FP16_VA = " + std::to_string(ggml_cpu_has_fp16_va()) + " | ";
17+
s += "WASM_SIMD = " + std::to_string(ggml_cpu_has_wasm_simd()) + " | ";
18+
s += "BLAS = " + std::to_string(ggml_cpu_has_blas()) + " | ";
19+
s += "SSE3 = " + std::to_string(ggml_cpu_has_sse3()) + " | ";
20+
s += "VSX = " + std::to_string(ggml_cpu_has_vsx()) + " | ";
21+
22+
return s.c_str();
23+
}
24+
325
int main(int argc, char ** argv) {
4-
return llama_main(argc, argv);
26+
27+
ggml_time_init();
28+
const int64_t t_main_start_us = ggml_time_us();
29+
30+
gpt_params params;
31+
params.model = "models/llama-7B/ggml-model.bin";
32+
33+
if (gpt_params_parse(argc, argv, params) == false) {
34+
return 1;
35+
}
36+
37+
if (params.n_ctx > 2048) {
38+
fprintf(stderr, "%s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
39+
"expect poor results\n", __func__, params.n_ctx);
40+
}
41+
42+
int64_t t_load_us = 0;
43+
44+
gpt_vocab vocab;
45+
llama_model model;
46+
47+
// load the model
48+
{
49+
const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
50+
const int64_t t_start_us = ggml_time_us();
51+
if (!llama_model_load(params.model, model, vocab, params.n_ctx, memory_type)) {
52+
fprintf(stderr, "%s: failed to load model from '%s'\n", __func__, params.model.c_str());
53+
return 1;
54+
}
55+
56+
t_load_us = ggml_time_us() - t_start_us;
57+
}
58+
59+
// print system information
60+
{
61+
fprintf(stderr, "\n");
62+
fprintf(stderr, "system_info: n_threads = %d / %d | %s\n",
63+
params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
64+
}
65+
66+
return llama_main(params, vocab, model, t_main_start_us, t_load_us);
567
}

0 commit comments

Comments
 (0)