@@ -15227,7 +15227,9 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
15227
15227
uint32_t size_t_size;
15228
15228
memcpy(&size_t_size, inp, sizeof(size_t_size));
15229
15229
inp += sizeof(size_t_size);
15230
- GGML_ASSERT(size_t_size == sizeof(size_t));
15230
+ if (size_t_size != sizeof(size_t)) {
15231
+ return -1;
15232
+ }
15231
15233
15232
15234
// Read the cell count
15233
15235
uint32_t cell_count;
@@ -15244,6 +15246,18 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
15244
15246
memcpy(&n_embd_v_gqa_ref, inp, sizeof(n_embd_v_gqa_ref));
15245
15247
inp += sizeof(n_embd_v_gqa_ref);
15246
15248
15249
+ // Sanity check model compatibility
15250
+ const auto& hparams = ctx->model.hparams;
15251
+ const uint32_t n_layer = hparams.n_layer;
15252
+ const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15253
+ const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
15254
+ if (n_layer != n_layer_ref) {
15255
+ return -2;
15256
+ }
15257
+ if (n_embd_v_gqa != n_embd_v_gqa_ref) {
15258
+ return -2;
15259
+ }
15260
+
15247
15261
// Allocate the new cells for the slot
15248
15262
{
15249
15263
llama_batch batch = llama_batch_init(cell_count, 0, 1);
@@ -15259,7 +15273,7 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
15259
15273
}
15260
15274
if (!llama_kv_cache_find_slot(kv_self, batch)) {
15261
15275
llama_batch_free(batch);
15262
- return 0 ;
15276
+ return -3 ;
15263
15277
}
15264
15278
15265
15279
// DEBUG CHECK: kv_self.head should be our first cell, kv_self.head + cell_count - 1 should be our last cell (verify seq_id and pos values)
@@ -15274,10 +15288,6 @@ size_t llama_set_seq_data(struct llama_context * ctx, const uint8_t * src, llama
15274
15288
llama_batch_free(batch);
15275
15289
}
15276
15290
15277
- const auto& hparams = ctx->model.hparams;
15278
- const uint32_t n_layer = hparams.n_layer;
15279
- const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa() + hparams.n_embd_k_s();
15280
- const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa() + hparams.n_embd_v_s();
15281
15291
const uint32_t kv_size = kv_self.size;
15282
15292
const uint32_t kv_head = kv_self.head;
15283
15293
GGML_ASSERT(n_layer == n_layer_ref);
0 commit comments