@@ -356,17 +356,17 @@ struct llama_load_tensor {
356
356
LLAMA_ASSERT (shards.size () <= UINT32_MAX);
357
357
uint32_t n_shards = (uint32_t ) shards.size ();
358
358
switch (split_type) {
359
- case SPLIT_NONE:
360
- ne = first_shard.ne ;
361
- break ;
362
- case SPLIT_BY_COLUMNS:
363
- ne = {checked_mul<uint32_t >(first_shard.ne [0 ], n_shards),
364
- first_shard.ne [1 ]};
365
- break ;
366
- case SPLIT_BY_ROWS:
367
- ne = {first_shard.ne [0 ],
368
- checked_mul<uint32_t >(first_shard.ne [1 ], n_shards)};
369
- break ;
359
+ case SPLIT_NONE:
360
+ ne = first_shard.ne ;
361
+ break ;
362
+ case SPLIT_BY_COLUMNS:
363
+ ne = {checked_mul<uint32_t >(first_shard.ne [0 ], n_shards),
364
+ first_shard.ne [1 ]};
365
+ break ;
366
+ case SPLIT_BY_ROWS:
367
+ ne = {first_shard.ne [0 ],
368
+ checked_mul<uint32_t >(first_shard.ne [1 ], n_shards)};
369
+ break ;
370
370
}
371
371
}
372
372
@@ -806,6 +806,25 @@ bool llama_mlock_supported() {
806
806
// model loading
807
807
//
808
808
809
+ static const char *llama_file_version_name (llama_file_version version) {
810
+ switch (version) {
811
+ case LLAMA_FILE_VERSION_GGML: return " 'ggml' (old version with low tokenizer quality and no mmap support)" ;
812
+ case LLAMA_FILE_VERSION_GGMF_V1: return " ggmf v1 (old version with no mmap support)" ;
813
+ case LLAMA_FILE_VERSION_GGJT_V1: return " ggjt v1 (latest)" ;
814
+ default : LLAMA_ASSERT (false );
815
+ }
816
+ }
817
+
818
+ static const char *llama_model_type_name (e_model type) {
819
+ switch (type) {
820
+ case MODEL_7B: return " 7B" ;
821
+ case MODEL_13B: return " 13B" ;
822
+ case MODEL_30B: return " 30B" ;
823
+ case MODEL_65B: return " 65B" ;
824
+ default : LLAMA_ASSERT (false );
825
+ }
826
+ }
827
+
809
828
static void llama_model_load_internal (
810
829
const std::string & fname,
811
830
llama_context & lctx,
@@ -823,8 +842,9 @@ static void llama_model_load_internal(
823
842
824
843
lctx.vocab = std::move (ml->file_loaders .at (0 )->vocab );
825
844
auto & model = lctx.model ;
845
+ model.hparams = ml->file_loaders .at (0 )->hparams ;
846
+ llama_file_version file_version = ml->file_loaders .at (0 )->file_version ;
826
847
auto & hparams = model.hparams ;
827
- hparams = ml->file_loaders .at (0 )->hparams ;
828
848
uint32_t n_ff = ((2 *(4 *hparams.n_embd )/3 + hparams.n_mult - 1 )/hparams.n_mult )*hparams.n_mult ;
829
849
830
850
{
@@ -836,18 +856,21 @@ static void llama_model_load_internal(
836
856
}
837
857
838
858
hparams.n_ctx = n_ctx;
859
+ }
839
860
840
- fprintf (stderr, " %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
841
- fprintf (stderr, " %s: n_ctx = %u\n " , __func__, hparams.n_ctx );
842
- fprintf (stderr, " %s: n_embd = %u\n " , __func__, hparams.n_embd );
843
- fprintf (stderr, " %s: n_mult = %u\n " , __func__, hparams.n_mult );
844
- fprintf (stderr, " %s: n_head = %u\n " , __func__, hparams.n_head );
845
- fprintf (stderr, " %s: n_layer = %u\n " , __func__, hparams.n_layer );
846
- fprintf (stderr, " %s: n_rot = %u\n " , __func__, hparams.n_rot );
847
- fprintf (stderr, " %s: f16 = %u\n " , __func__, hparams.f16 );
848
- fprintf (stderr, " %s: n_ff = %u\n " , __func__, n_ff);
849
- fprintf (stderr, " %s: n_parts = %zu\n " , __func__, ml->file_loaders .size ());
850
- fprintf (stderr, " %s: type = %u\n " , __func__, model.type );
861
+ {
862
+ fprintf (stderr, " %s: format = %s\n " , __func__, llama_file_version_name (file_version));
863
+ fprintf (stderr, " %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
864
+ fprintf (stderr, " %s: n_ctx = %u\n " , __func__, hparams.n_ctx );
865
+ fprintf (stderr, " %s: n_embd = %u\n " , __func__, hparams.n_embd );
866
+ fprintf (stderr, " %s: n_mult = %u\n " , __func__, hparams.n_mult );
867
+ fprintf (stderr, " %s: n_head = %u\n " , __func__, hparams.n_head );
868
+ fprintf (stderr, " %s: n_layer = %u\n " , __func__, hparams.n_layer );
869
+ fprintf (stderr, " %s: n_rot = %u\n " , __func__, hparams.n_rot );
870
+ fprintf (stderr, " %s: f16 = %u\n " , __func__, hparams.f16 );
871
+ fprintf (stderr, " %s: n_ff = %u\n " , __func__, n_ff);
872
+ fprintf (stderr, " %s: n_parts = %zu\n " , __func__, ml->file_loaders .size ());
873
+ fprintf (stderr, " %s: model size = %s\n " , __func__, llama_model_type_name (model.type ));
851
874
}
852
875
853
876
if (vocab_only) {
0 commit comments