[auto] Sync version 2404081813.0.0+llamacpp-release.b2632

github-actions · github-actions · commit 1b11b4e59681 · 2024-04-08T18:14:29.000Z
== Relevant log messages from source repo: commit b73e564b16086845a8b4fffd26e22685d3e0c3db Author: Georgi Gerganov <ggerganov@gmail.com> Date: Mon Apr 8 16:23:01 2024 +0300 quantize : fix precedence of cli args (#6541) commit e3c337d87ca650972105a51c6ce302dd236c07ad Author: Rick G <26732651+TheFlipbook@users.noreply.github.com> Date: Mon Apr 8 06:02:30 2024 -0700 llama : support negative ith in llama_get_ API (#6519) * llama_sampling_sample with default args is more naively usable * Batches populated by either llama_batch_get_one or llama_batch_add work with default args * Previously get_one could use the default argument * Previously add should usually have used the last index where logits[idx] == true * This hopefully encourages the use of llama_batch_add * By giving expected results when using default arguments. * Adds "negative indexing" feature to llama_get_logits_ith and llama_get_embeddings_ith * Believed to work with any currently well behaved program * Default arg now works for both cases (previously would give strange results for add case) * Any non-negative number is unaffected and behaves as previously * Negative arguments were previously invalid. * Implemented as a special case of indexing as suggested by @compilade in ggml-org/llama.cpp#6519 * Fixed mismatch type errors * cited in macOS CI tests * Missed in original updates based on PR feedback in ggml-org/llama.cpp#6519 commit beea6e1b16e783a0886e78dec01002a8c00db24d Author: Jan Boon <jan.boon@kaetemi.be> Date: Mon Apr 8 20:43:30 2024 +0800 llama : save and restore kv cache for single seq id (#6341) * llama : save and restore kv cache for single seq id * remove trailing whitespace * respond error in case there's no space in the kv cache * add kv seq save restore to test case * add --slot-save-path arg to enable save restore and restrict save location * Returning 0 for some cases, instead of asserting. * cleanup error cases * rename sequence state functions * rename state get set functions * add previous function names back in with DEPRECATED notice * update doc * adjust endpoints to preferred style * fix restoring zero cell count * handle seq rm return value * unused param * keep in the size check * fix return types * add server test case for slot save restore * cleanup * add cake * cleanup style * add special * removing a whole sequence never fails * move sequence state file functionality from server to llama to match session api and add version tags * catch exceptions on save as well * error log messages * check types for stricter restore * update server doc * readme : update API changes date * strict filename validation * move include, reject bom as well * also reject empty filename * reject whitespace and trailing dot --------- Co-authored-by: Martin Evans <martindevans@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ggml-sys-bleedingedge"
-version = "2404081217.0.0+llamacpp-release.b2629"
+version = "2404081813.0.0+llamacpp-release.b2632"
 description = "Bleeding edge low-level bindings to GGML. "
 repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge"
 keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"]
diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-2404081217.0.0+llamacpp-release.b2629
+2404081813.0.0+llamacpp-release.b2632
diff --git a/ggml-tag-current.txt b/ggml-tag-current.txt
@@ -1 +1 @@
-b2629
+b2632
diff --git a/ggml-tag-previous.txt b/ggml-tag-previous.txt
@@ -1 +1 @@
-b2619
+b2629
diff --git a/src/lib.rs b/src/lib.rs
@@ -594,8 +594,11 @@ pub const LLAMA_DEFAULT_SEED: u32 = 4294967295;
 pub const LLAMA_MAX_RNG_STATE: u32 = 65536;
 pub const LLAMA_FILE_MAGIC_GGLA: u32 = 1734831201;
 pub const LLAMA_FILE_MAGIC_GGSN: u32 = 1734833006;
+pub const LLAMA_FILE_MAGIC_GGSQ: u32 = 1734833009;
 pub const LLAMA_SESSION_MAGIC: u32 = 1734833006;
 pub const LLAMA_SESSION_VERSION: u32 = 5;
+pub const LLAMA_STATE_SEQ_MAGIC: u32 = 1734833009;
+pub const LLAMA_STATE_SEQ_VERSION: u32 = 1;
 pub const ggml_status_GGML_STATUS_ALLOC_FAILED: ggml_status = -2;
 pub const ggml_status_GGML_STATUS_FAILED: ggml_status = -1;
 pub const ggml_status_GGML_STATUS_SUCCESS: ggml_status = 0;
@@ -5265,22 +5268,64 @@ extern "C" {
     pub fn llama_kv_cache_seq_pos_max(ctx: *mut llama_context, seq_id: llama_seq_id) -> llama_pos;
     pub fn llama_kv_cache_defrag(ctx: *mut llama_context);
     pub fn llama_kv_cache_update(ctx: *mut llama_context);
+    pub fn llama_state_get_size(ctx: *const llama_context) -> usize;
     pub fn llama_get_state_size(ctx: *const llama_context) -> usize;
+    pub fn llama_state_get_data(ctx: *mut llama_context, dst: *mut u8) -> usize;
     pub fn llama_copy_state_data(ctx: *mut llama_context, dst: *mut u8) -> usize;
+    pub fn llama_state_set_data(ctx: *mut llama_context, src: *const u8) -> usize;
     pub fn llama_set_state_data(ctx: *mut llama_context, src: *const u8) -> usize;
+    pub fn llama_state_load_file(
+        ctx: *mut llama_context,
+        path_session: *const ::std::os::raw::c_char,
+        tokens_out: *mut llama_token,
+        n_token_capacity: usize,
+        n_token_count_out: *mut usize,
+    ) -> bool;
     pub fn llama_load_session_file(
         ctx: *mut llama_context,
         path_session: *const ::std::os::raw::c_char,
         tokens_out: *mut llama_token,
         n_token_capacity: usize,
         n_token_count_out: *mut usize,
     ) -> bool;
+    pub fn llama_state_save_file(
+        ctx: *mut llama_context,
+        path_session: *const ::std::os::raw::c_char,
+        tokens: *const llama_token,
+        n_token_count: usize,
+    ) -> bool;
     pub fn llama_save_session_file(
         ctx: *mut llama_context,
         path_session: *const ::std::os::raw::c_char,
         tokens: *const llama_token,
         n_token_count: usize,
     ) -> bool;
+    pub fn llama_state_seq_get_size(ctx: *mut llama_context, seq_id: llama_seq_id) -> usize;
+    pub fn llama_state_seq_get_data(
+        ctx: *mut llama_context,
+        dst: *mut u8,
+        seq_id: llama_seq_id,
+    ) -> usize;
+    pub fn llama_state_seq_set_data(
+        ctx: *mut llama_context,
+        src: *const u8,
+        dest_seq_id: llama_seq_id,
+    ) -> usize;
+    pub fn llama_state_seq_save_file(
+        ctx: *mut llama_context,
+        filepath: *const ::std::os::raw::c_char,
+        seq_id: llama_seq_id,
+        tokens: *const llama_token,
+        n_token_count: usize,
+    ) -> usize;
+    pub fn llama_state_seq_load_file(
+        ctx: *mut llama_context,
+        filepath: *const ::std::os::raw::c_char,
+        dest_seq_id: llama_seq_id,
+        tokens_out: *mut llama_token,
+        n_token_capacity: usize,
+        n_token_count_out: *mut usize,
+    ) -> usize;
     pub fn llama_batch_get_one(
         tokens: *mut llama_token,
         n_tokens: i32,

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-2404081217.0.0+llamacpp-release.b2629`
	`1`	`+2404081813.0.0+llamacpp-release.b2632`