@@ -212,13 +212,16 @@ def llama_get_kv_cache_token_count(ctx: llama_context_p) -> c_int:
212
212
_lib .llama_get_kv_cache_token_count .argtypes = [llama_context_p ]
213
213
_lib .llama_get_kv_cache_token_count .restype = c_int
214
214
215
+
215
216
# Sets the current rng seed.
216
217
def llama_set_rng_seed (ctx : llama_context_p , seed : c_int ):
217
218
return _lib .llama_set_rng_seed (ctx , seed )
218
219
220
+
219
221
_lib .llama_set_rng_seed .argtypes = [llama_context_p , c_int ]
220
222
_lib .llama_set_rng_seed .restype = None
221
223
224
+
222
225
# Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
223
226
def llama_get_state_size (ctx : llama_context_p ) -> c_size_t :
224
227
return _lib .llama_get_state_size (ctx )
@@ -249,6 +252,44 @@ def llama_set_state_data(ctx: llama_context_p, src) -> c_size_t:
249
252
_lib .llama_set_state_data .restype = c_size_t
250
253
251
254
255
+ # Save/load session file
256
+ def llama_load_session_file (
257
+ ctx : llama_context_p ,
258
+ path_session : bytes ,
259
+ tokens_out ,
260
+ n_token_capacity : c_size_t ,
261
+ n_token_count_out ,
262
+ ) -> c_size_t :
263
+ return _lib .llama_load_session_file (
264
+ ctx , path_session , tokens_out , n_token_capacity , n_token_count_out
265
+ )
266
+
267
+
268
+ _lib .llama_load_session_file .argtypes = [
269
+ llama_context_p ,
270
+ c_char_p ,
271
+ llama_token_p ,
272
+ c_size_t ,
273
+ POINTER (c_size_t ),
274
+ ]
275
+ _lib .llama_load_session_file .restype = c_size_t
276
+
277
+
278
+ def llama_save_session_file (
279
+ ctx : llama_context_p , path_session : bytes , tokens , n_token_count : c_size_t
280
+ ) -> c_size_t :
281
+ return _lib .llama_save_session_file (ctx , path_session , tokens , n_token_count )
282
+
283
+
284
+ _lib .llama_save_session_file .argtypes = [
285
+ llama_context_p ,
286
+ c_char_p ,
287
+ llama_token_p ,
288
+ c_size_t ,
289
+ ]
290
+ _lib .llama_save_session_file .restype = c_size_t
291
+
292
+
252
293
# Run the llama inference to obtain the logits and probabilities for the next token.
253
294
# tokens + n_tokens is the provided batch of new tokens to process
254
295
# n_past is the number of tokens to use from previous eval calls
0 commit comments