Skip to content

Commit 62370b0

Browse files
committed
Returning error codes for some cases, instead of asserting.
1 parent b8e8fac commit 62370b0

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

llama.cpp

+16-6
Original file line numberDiff line numberDiff line change
@@ -15227,7 +15227,9 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1522715227
uint32_t size_t_size;
1522815228
memcpy(&size_t_size, inp, sizeof(size_t_size));
1522915229
inp += sizeof(size_t_size);
15230-
GGML_ASSERT(size_t_size == sizeof(size_t));
15230+
if (size_t_size != sizeof(size_t)) {
15231+
return -1;
15232+
}
1523115233

1523215234
// Read the cell count
1523315235
uint32_t cell_count;
@@ -15244,6 +15246,18 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1524415246
memcpy(&n_embd_v_gqa_ref, inp, sizeof(n_embd_v_gqa_ref));
1524515247
inp += sizeof(n_embd_v_gqa_ref);
1524615248

15249+
// Sanity check model compatibility
15250+
const auto& hparams = ctx->model.hparams;
15251+
const uint32_t n_layer = hparams.n_layer;
15252+
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15253+
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
15254+
if (n_layer != n_layer_ref) {
15255+
return -2;
15256+
}
15257+
if (n_embd_v_gqa != n_embd_v_gqa_ref) {
15258+
return -2;
15259+
}
15260+
1524715261
// Allocate the new cells for the slot
1524815262
{
1524915263
llama_batch batch = llama_batch_init(cell_count, 0, 1);
@@ -15259,7 +15273,7 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1525915273
}
1526015274
if (!llama_kv_cache_find_slot(kv_self, batch)) {
1526115275
llama_batch_free(batch);
15262-
return 0;
15276+
return -3;
1526315277
}
1526415278

1526515279
// DEBUG CHECK: kv_self.head should be our first cell, kv_self.head + cell_count - 1 should be our last cell (verify seq_id and pos values)
@@ -15274,10 +15288,6 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
1527415288
llama_batch_free(batch);
1527515289
}
1527615290

15277-
const auto& hparams = ctx->model.hparams;
15278-
const uint32_t n_layer = hparams.n_layer;
15279-
const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15280-
const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
1528115291
const uint32_t kv_size = kv_self.size;
1528215292
const uint32_t kv_head = kv_self.head;
1528315293
GGML_ASSERT(n_layer == n_layer_ref);

llama.h

+7
Original file line numberDiff line numberDiff line change
@@ -632,6 +632,13 @@ extern "C" {
632632
uint8_t * dst,
633633
llama_seq_id seq_id);
634634

635+
// Copy the sequence data (originally copied with `llama_copy_seq_data`) into a sequence.
636+
// Returns:
637+
// - Positive: Ok
638+
// - Negative: An error of some kind
639+
// - -1: `size_t` is incorrect size
640+
// - -2: Model mismatch
641+
// - -3: Cannot find space in KV cache
635642
LLAMA_API size_t llama_set_seq_data(
636643
struct llama_context * ctx,
637644
const uint8_t * src,

0 commit comments

Comments
 (0)