@@ -711,41 +711,12 @@ void sigint_handler(int signo) {
711
711
}
712
712
#endif
713
713
714
- const char * llama_print_system_info (void ) {
715
- static std::string s;
716
-
717
- s = " " ;
718
- s += " AVX = " + std::to_string (ggml_cpu_has_avx ()) + " | " ;
719
- s += " AVX2 = " + std::to_string (ggml_cpu_has_avx2 ()) + " | " ;
720
- s += " AVX512 = " + std::to_string (ggml_cpu_has_avx512 ()) + " | " ;
721
- s += " FMA = " + std::to_string (ggml_cpu_has_fma ()) + " | " ;
722
- s += " NEON = " + std::to_string (ggml_cpu_has_neon ()) + " | " ;
723
- s += " ARM_FMA = " + std::to_string (ggml_cpu_has_arm_fma ()) + " | " ;
724
- s += " F16C = " + std::to_string (ggml_cpu_has_f16c ()) + " | " ;
725
- s += " FP16_VA = " + std::to_string (ggml_cpu_has_fp16_va ()) + " | " ;
726
- s += " WASM_SIMD = " + std::to_string (ggml_cpu_has_wasm_simd ()) + " | " ;
727
- s += " BLAS = " + std::to_string (ggml_cpu_has_blas ()) + " | " ;
728
- s += " SSE3 = " + std::to_string (ggml_cpu_has_sse3 ()) + " | " ;
729
- s += " VSX = " + std::to_string (ggml_cpu_has_vsx ()) + " | " ;
730
-
731
- return s.c_str ();
732
- }
733
-
734
- int llama_main (int argc, char ** argv) {
735
- ggml_time_init ();
736
- const int64_t t_main_start_us = ggml_time_us ();
737
-
738
- gpt_params params;
739
- params.model = " models/llama-7B/ggml-model.bin" ;
740
-
741
- if (gpt_params_parse (argc, argv, params) == false ) {
742
- return 1 ;
743
- }
744
-
745
- if (params.n_ctx > 2048 ) {
746
- fprintf (stderr, " %s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
747
- " expect poor results\n " , __func__, params.n_ctx );
748
- }
714
+ int llama_main (
715
+ gpt_params params,
716
+ gpt_vocab vocab,
717
+ llama_model model,
718
+ int64_t t_load_us,
719
+ int64_t t_main_start_us) {
749
720
750
721
if (params.seed < 0 ) {
751
722
params.seed = time (NULL );
@@ -761,30 +732,6 @@ int llama_main(int argc, char ** argv) {
761
732
// params.prompt = R"(// this function checks if the number n is prime
762
733
// bool is_prime(int n) {)";
763
734
764
- int64_t t_load_us = 0 ;
765
-
766
- gpt_vocab vocab;
767
- llama_model model;
768
-
769
- // load the model
770
- {
771
- const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
772
- const int64_t t_start_us = ggml_time_us ();
773
- if (!llama_model_load (params.model , model, vocab, params.n_ctx , memory_type)) {
774
- fprintf (stderr, " %s: failed to load model from '%s'\n " , __func__, params.model .c_str ());
775
- return 1 ;
776
- }
777
-
778
- t_load_us = ggml_time_us () - t_start_us;
779
- }
780
-
781
- // print system information
782
- {
783
- fprintf (stderr, " \n " );
784
- fprintf (stderr, " system_info: n_threads = %d / %d | %s\n " ,
785
- params.n_threads , std::thread::hardware_concurrency (), llama_print_system_info ());
786
- }
787
-
788
735
int n_past = 0 ;
789
736
790
737
int64_t t_sample_us = 0 ;
0 commit comments