@@ -712,41 +712,12 @@ void sigint_handler(int signo) {
712
712
}
713
713
#endif
714
714
715
- const char * llama_print_system_info (void ) {
716
- static std::string s;
717
-
718
- s = " " ;
719
- s += " AVX = " + std::to_string (ggml_cpu_has_avx ()) + " | " ;
720
- s += " AVX2 = " + std::to_string (ggml_cpu_has_avx2 ()) + " | " ;
721
- s += " AVX512 = " + std::to_string (ggml_cpu_has_avx512 ()) + " | " ;
722
- s += " FMA = " + std::to_string (ggml_cpu_has_fma ()) + " | " ;
723
- s += " NEON = " + std::to_string (ggml_cpu_has_neon ()) + " | " ;
724
- s += " ARM_FMA = " + std::to_string (ggml_cpu_has_arm_fma ()) + " | " ;
725
- s += " F16C = " + std::to_string (ggml_cpu_has_f16c ()) + " | " ;
726
- s += " FP16_VA = " + std::to_string (ggml_cpu_has_fp16_va ()) + " | " ;
727
- s += " WASM_SIMD = " + std::to_string (ggml_cpu_has_wasm_simd ()) + " | " ;
728
- s += " BLAS = " + std::to_string (ggml_cpu_has_blas ()) + " | " ;
729
- s += " SSE3 = " + std::to_string (ggml_cpu_has_sse3 ()) + " | " ;
730
- s += " VSX = " + std::to_string (ggml_cpu_has_vsx ()) + " | " ;
731
-
732
- return s.c_str ();
733
- }
734
-
735
- int llama_main (int argc, char ** argv) {
736
- ggml_time_init ();
737
- const int64_t t_main_start_us = ggml_time_us ();
738
-
739
- gpt_params params;
740
- params.model = " models/llama-7B/ggml-model.bin" ;
741
-
742
- if (gpt_params_parse (argc, argv, params) == false ) {
743
- return 1 ;
744
- }
745
-
746
- if (params.n_ctx > 2048 ) {
747
- fprintf (stderr, " %s: warning: model does not support context sizes greater than 2048 tokens (%d specified);"
748
- " expect poor results\n " , __func__, params.n_ctx );
749
- }
715
+ int llama_main (
716
+ gpt_params params,
717
+ gpt_vocab vocab,
718
+ llama_model model,
719
+ int64_t t_load_us,
720
+ int64_t t_main_start_us) {
750
721
751
722
if (params.seed < 0 ) {
752
723
params.seed = time (NULL );
@@ -762,30 +733,6 @@ int llama_main(int argc, char ** argv) {
762
733
// params.prompt = R"(// this function checks if the number n is prime
763
734
// bool is_prime(int n) {)";
764
735
765
- int64_t t_load_us = 0 ;
766
-
767
- gpt_vocab vocab;
768
- llama_model model;
769
-
770
- // load the model
771
- {
772
- const ggml_type memory_type = params.memory_f16 ? GGML_TYPE_F16 : GGML_TYPE_F32;
773
- const int64_t t_start_us = ggml_time_us ();
774
- if (!llama_model_load (params.model , model, vocab, params.n_ctx , memory_type)) {
775
- fprintf (stderr, " %s: failed to load model from '%s'\n " , __func__, params.model .c_str ());
776
- return 1 ;
777
- }
778
-
779
- t_load_us = ggml_time_us () - t_start_us;
780
- }
781
-
782
- // print system information
783
- {
784
- fprintf (stderr, " \n " );
785
- fprintf (stderr, " system_info: n_threads = %d / %d | %s\n " ,
786
- params.n_threads , std::thread::hardware_concurrency (), llama_print_system_info ());
787
- }
788
-
789
736
int n_past = 0 ;
790
737
791
738
int64_t t_sample_us = 0 ;
0 commit comments