We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent b85ea58 commit e19483cCopy full SHA for e19483c
llama.cpp
@@ -1127,7 +1127,7 @@ static void llama_model_load_internal(
1127
const size_t scale = memory_type == GGML_TYPE_F32 ? 2 : 1;
1128
1129
// this is the total memory required to run the inference
1130
- const size_t bigctxmul = (hparams.n_ctx>2048?2:1);
+ const size_t bigctxmul = (hparams.n_ctx>4096?3:(hparams.n_ctx>2048?2:1));
1131
const size_t mem_required =
1132
ctx_size +
1133
mmapped_size - vram_weights + // weights in VRAM not in memory
0 commit comments