Skip to content

Commit 1b11b4e

Browse files
author
github-actions
committed
[auto] Sync version 2404081813.0.0+llamacpp-release.b2632
== Relevant log messages from source repo: commit b73e564b16086845a8b4fffd26e22685d3e0c3db Author: Georgi Gerganov <[email protected]> Date: Mon Apr 8 16:23:01 2024 +0300 quantize : fix precedence of cli args (#6541) commit e3c337d87ca650972105a51c6ce302dd236c07ad Author: Rick G <[email protected]> Date: Mon Apr 8 06:02:30 2024 -0700 llama : support negative ith in llama_get_ API (#6519) * llama_sampling_sample with default args is more naively usable * Batches populated by either llama_batch_get_one or llama_batch_add work with default args * Previously get_one could use the default argument * Previously add should usually have used the last index where logits[idx] == true * This hopefully encourages the use of llama_batch_add * By giving expected results when using default arguments. * Adds "negative indexing" feature to llama_get_logits_ith and llama_get_embeddings_ith * Believed to work with any currently well behaved program * Default arg now works for both cases (previously would give strange results for add case) * Any non-negative number is unaffected and behaves as previously * Negative arguments were previously invalid. * Implemented as a special case of indexing as suggested by @compilade in ggml-org/llama.cpp#6519 * Fixed mismatch type errors * cited in macOS CI tests * Missed in original updates based on PR feedback in ggml-org/llama.cpp#6519 commit beea6e1b16e783a0886e78dec01002a8c00db24d Author: Jan Boon <[email protected]> Date: Mon Apr 8 20:43:30 2024 +0800 llama : save and restore kv cache for single seq id (#6341) * llama : save and restore kv cache for single seq id * remove trailing whitespace * respond error in case there's no space in the kv cache * add kv seq save restore to test case * add --slot-save-path arg to enable save restore and restrict save location * Returning 0 for some cases, instead of asserting. * cleanup error cases * rename sequence state functions * rename state get set functions * add previous function names back in with DEPRECATED notice * update doc * adjust endpoints to preferred style * fix restoring zero cell count * handle seq rm return value * unused param * keep in the size check * fix return types * add server test case for slot save restore * cleanup * add cake * cleanup style * add special * removing a whole sequence never fails * move sequence state file functionality from server to llama to match session api and add version tags * catch exceptions on save as well * error log messages * check types for stricter restore * update server doc * readme : update API changes date * strict filename validation * move include, reject bom as well * also reject empty filename * reject whitespace and trailing dot --------- Co-authored-by: Martin Evans <[email protected]> Co-authored-by: Georgi Gerganov <[email protected]>
1 parent 8c14aee commit 1b11b4e

File tree

5 files changed

+49
-4
lines changed

5 files changed

+49
-4
lines changed

Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "ggml-sys-bleedingedge"
3-
version = "2404081217.0.0+llamacpp-release.b2629"
3+
version = "2404081813.0.0+llamacpp-release.b2632"
44
description = "Bleeding edge low-level bindings to GGML. "
55
repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge"
66
keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"]

VERSION.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
2404081217.0.0+llamacpp-release.b2629
1+
2404081813.0.0+llamacpp-release.b2632

ggml-tag-current.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b2629
1+
b2632

ggml-tag-previous.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
b2619
1+
b2629

src/lib.rs

+45
Original file line numberDiff line numberDiff line change
@@ -594,8 +594,11 @@ pub const LLAMA_DEFAULT_SEED: u32 = 4294967295;
594594
pub const LLAMA_MAX_RNG_STATE: u32 = 65536;
595595
pub const LLAMA_FILE_MAGIC_GGLA: u32 = 1734831201;
596596
pub const LLAMA_FILE_MAGIC_GGSN: u32 = 1734833006;
597+
pub const LLAMA_FILE_MAGIC_GGSQ: u32 = 1734833009;
597598
pub const LLAMA_SESSION_MAGIC: u32 = 1734833006;
598599
pub const LLAMA_SESSION_VERSION: u32 = 5;
600+
pub const LLAMA_STATE_SEQ_MAGIC: u32 = 1734833009;
601+
pub const LLAMA_STATE_SEQ_VERSION: u32 = 1;
599602
pub const ggml_status_GGML_STATUS_ALLOC_FAILED: ggml_status = -2;
600603
pub const ggml_status_GGML_STATUS_FAILED: ggml_status = -1;
601604
pub const ggml_status_GGML_STATUS_SUCCESS: ggml_status = 0;
@@ -5265,22 +5268,64 @@ extern "C" {
52655268
pub fn llama_kv_cache_seq_pos_max(ctx: *mut llama_context, seq_id: llama_seq_id) -> llama_pos;
52665269
pub fn llama_kv_cache_defrag(ctx: *mut llama_context);
52675270
pub fn llama_kv_cache_update(ctx: *mut llama_context);
5271+
pub fn llama_state_get_size(ctx: *const llama_context) -> usize;
52685272
pub fn llama_get_state_size(ctx: *const llama_context) -> usize;
5273+
pub fn llama_state_get_data(ctx: *mut llama_context, dst: *mut u8) -> usize;
52695274
pub fn llama_copy_state_data(ctx: *mut llama_context, dst: *mut u8) -> usize;
5275+
pub fn llama_state_set_data(ctx: *mut llama_context, src: *const u8) -> usize;
52705276
pub fn llama_set_state_data(ctx: *mut llama_context, src: *const u8) -> usize;
5277+
pub fn llama_state_load_file(
5278+
ctx: *mut llama_context,
5279+
path_session: *const ::std::os::raw::c_char,
5280+
tokens_out: *mut llama_token,
5281+
n_token_capacity: usize,
5282+
n_token_count_out: *mut usize,
5283+
) -> bool;
52715284
pub fn llama_load_session_file(
52725285
ctx: *mut llama_context,
52735286
path_session: *const ::std::os::raw::c_char,
52745287
tokens_out: *mut llama_token,
52755288
n_token_capacity: usize,
52765289
n_token_count_out: *mut usize,
52775290
) -> bool;
5291+
pub fn llama_state_save_file(
5292+
ctx: *mut llama_context,
5293+
path_session: *const ::std::os::raw::c_char,
5294+
tokens: *const llama_token,
5295+
n_token_count: usize,
5296+
) -> bool;
52785297
pub fn llama_save_session_file(
52795298
ctx: *mut llama_context,
52805299
path_session: *const ::std::os::raw::c_char,
52815300
tokens: *const llama_token,
52825301
n_token_count: usize,
52835302
) -> bool;
5303+
pub fn llama_state_seq_get_size(ctx: *mut llama_context, seq_id: llama_seq_id) -> usize;
5304+
pub fn llama_state_seq_get_data(
5305+
ctx: *mut llama_context,
5306+
dst: *mut u8,
5307+
seq_id: llama_seq_id,
5308+
) -> usize;
5309+
pub fn llama_state_seq_set_data(
5310+
ctx: *mut llama_context,
5311+
src: *const u8,
5312+
dest_seq_id: llama_seq_id,
5313+
) -> usize;
5314+
pub fn llama_state_seq_save_file(
5315+
ctx: *mut llama_context,
5316+
filepath: *const ::std::os::raw::c_char,
5317+
seq_id: llama_seq_id,
5318+
tokens: *const llama_token,
5319+
n_token_count: usize,
5320+
) -> usize;
5321+
pub fn llama_state_seq_load_file(
5322+
ctx: *mut llama_context,
5323+
filepath: *const ::std::os::raw::c_char,
5324+
dest_seq_id: llama_seq_id,
5325+
tokens_out: *mut llama_token,
5326+
n_token_capacity: usize,
5327+
n_token_count_out: *mut usize,
5328+
) -> usize;
52845329
pub fn llama_batch_get_one(
52855330
tokens: *mut llama_token,
52865331
n_tokens: i32,

0 commit comments

Comments
 (0)