ggml-org
diff --git a/‎src/llama-adapter.h
+1 b/‎src/llama-adapter.h
+1
diff --git a/‎src/llama-arch.h
+1 b/‎src/llama-arch.h
+1
diff --git a/‎src/llama-batch.h
+2 b/‎src/llama-batch.h
+2
diff --git a/‎src/llama-context.cpp
+2-2 b/‎src/llama-context.cpp
+2-2
diff --git a/‎src/llama-impl.h
+1 b/‎src/llama-impl.h
+1
diff --git a/‎src/llama-kv-cache.h
+1 b/‎src/llama-kv-cache.h
+1
@@ -7,6 +7,7 @@
 
 #include <vector>
 #include <map>
+#include <algorithm>
 
 //
 // llama_adapter_vec
 
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <map>
+#include <string>
 
 //
 // gguf constants (sync with gguf.py)
 
@@ -3,6 +3,8 @@
 #include "llama.h"
 
 #include <vector>
+#include <cstring>
+#include <algorithm>
 
 // very similar to llama_batch,
 // but has more metadata about sequences
 
@@ -799,7 +799,7 @@ static bool llama_state_load_file_internal(struct llama_context * ctx, const cha
 
     // restore the context state
     {
-        const size_t n_state_size_cur = file.size - file.tell();
+        const size_t n_state_size_cur = file.size() - file.tell();
 
         llama_data_read_file data_ctx(&file);
         const size_t n_read = llama_state_set_data_internal(ctx, data_ctx);
@@ -936,7 +936,7 @@ static size_t llama_state_seq_load_file_internal(struct llama_context * ctx, con
 
     // restore the context state
     {
-        const size_t state_size = file.size - file.tell();
+        const size_t state_size = file.size() - file.tell();
         llama_data_read_file data_ctx(&file);
         const size_t nread = llama_state_seq_set_data_internal(ctx, data_ctx, dest_seq_id);
         if (!nread) {
 
@@ -24,6 +24,7 @@ LLAMA_ATTRIBUTE_FORMAT(2, 3)
 void llama_log_internal        (ggml_log_level level, const char * format, ...);
 void llama_log_callback_default(ggml_log_level level, const char * text, void * user_data);
 
+// TODO: rename to llama_format ?
 LLAMA_ATTRIBUTE_FORMAT(1, 2)
 std::string format(const char * fmt, ...);
 
 
@@ -8,6 +8,7 @@
 
 #include <set>
 #include <vector>
+#include <limits>
 
 struct llama_kv_cell {
     llama_pos pos   = -1;
Original file line number	Diff line number	Diff line change
`@@ -799,7 +799,7 @@ static bool llama_state_load_file_internal(struct llama_context * ctx, const cha`
`799`	`799`
`800`	`800`	`// restore the context state`
`801`	`801`	`{`
`802`		`- const size_t n_state_size_cur = file.size - file.tell();`
	`802`	`+ const size_t n_state_size_cur = file.size() - file.tell();`
`803`	`803`
`804`	`804`	`llama_data_read_file data_ctx(&file);`
`805`	`805`	`const size_t n_read = llama_state_set_data_internal(ctx, data_ctx);`
`@@ -936,7 +936,7 @@ static size_t llama_state_seq_load_file_internal(struct llama_context * ctx, con`
`936`	`936`
`937`	`937`	`// restore the context state`
`938`	`938`	`{`
`939`		`- const size_t state_size = file.size - file.tell();`
	`939`	`+ const size_t state_size = file.size() - file.tell();`
`940`	`940`	`llama_data_read_file data_ctx(&file);`
`941`	`941`	`const size_t nread = llama_state_seq_set_data_internal(ctx, data_ctx, dest_seq_id);`
`942`	`942`	`if (!nread) {`