Skip to content
This repository was archived by the owner on Feb 6, 2024. It is now read-only.

Commit b59ca76

Browse files
committed
Make stablelm support compatible with pre-layer refactor
* undoing more semantic renames in ggml-org/llama.cpp#3837
1 parent d03f651 commit b59ca76

File tree

1 file changed

+14
-14
lines changed

1 file changed

+14
-14
lines changed

Sources/llmfarm_core_cpp/llama/llama.cpp

+14-14
Original file line numberDiff line numberDiff line change
@@ -2991,7 +2991,7 @@ static void llm_load_tensors(
29912991
} break;
29922992
case LLM_ARCH_STABLELM:
29932993
{
2994-
model.tok_embd = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
2994+
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
29952995

29962996
// output
29972997
{
@@ -3002,12 +3002,12 @@ static void llm_load_tensors(
30023002
// norm is not performance relevant on its own but keeping it in VRAM reduces data copying
30033003
// on Windows however this is detrimental unless everything is on the GPU
30043004
#ifndef _WIN32
3005-
backend_norm = llama_backend_offload;
3005+
backend_norm = GGML_BACKEND_GPU;
30063006
#else
3007-
backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : llama_backend_offload;
3007+
backend_norm = n_gpu_layers <= (int) n_layer + 2 ? GGML_BACKEND_CPU : GGML_BACKEND_GPU;
30083008
#endif // _WIN32
30093009

3010-
backend_output = llama_backend_offload_split;
3010+
backend_output = GGML_BACKEND_GPU_SPLIT;
30113011
} else {
30123012
backend_norm = GGML_BACKEND_CPU;
30133013
backend_output = GGML_BACKEND_CPU;
@@ -3035,8 +3035,8 @@ static void llm_load_tensors(
30353035
/*
30363036
llama_model_loader: - tensor 4: blk.0.attn_output.weight f16 [ 2560, 2560, 1, 1 ]
30373037
*/
3038-
const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload; // NOLINT
3039-
const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : llama_backend_offload_split; // NOLINT
3038+
const ggml_backend_type backend = int(i) < i_gpu_start ? GGML_BACKEND_CPU : GGML_BACKEND_GPU; // NOLINT
3039+
const ggml_backend_type backend_split = int(i) < i_gpu_start ? GGML_BACKEND_CPU : GGML_BACKEND_GPU_SPLIT; // NOLINT
30403040

30413041
auto & layer = model.layers[i];
30423042

@@ -3051,15 +3051,15 @@ static void llm_load_tensors(
30513051
layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend);
30523052
layer.ffn_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, backend);
30533053

3054-
layer.ffn_gate = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, backend_split);
3055-
layer.ffn_down = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split);
3056-
layer.ffn_up = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split);
3054+
layer.w1 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, backend_split);
3055+
layer.w2 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, backend_split);
3056+
layer.w3 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, backend_split);
30573057

30583058
if (backend == GGML_BACKEND_GPU) {
30593059
vram_weights +=
30603060
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.wq) + ggml_nbytes(layer.wk) +
30613061
ggml_nbytes(layer.wv) + ggml_nbytes(layer.wo) + ggml_nbytes(layer.ffn_norm) +
3062-
ggml_nbytes(layer.ffn_gate) + ggml_nbytes(layer.ffn_down) + ggml_nbytes(layer.ffn_up);
3062+
ggml_nbytes(layer.w1) + ggml_nbytes(layer.w2) + ggml_nbytes(layer.w3);
30633063
}
30643064
}
30653065
} break;
@@ -5943,7 +5943,7 @@ struct ggml_cgraph * build_stablelm() {
59435943
struct ggml_tensor * cur;
59445944
struct ggml_tensor * inpL;
59455945

5946-
inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embd, cb);
5946+
inpL = llm_build_inp_embd(ctx0, hparams, batch, model.tok_embeddings, cb);
59475947
cb(inpL, "inp_embd", -1);
59485948

59495949
// inp_pos - contains the positions
@@ -6076,9 +6076,9 @@ struct ggml_cgraph * build_stablelm() {
60766076
cb(cur, "ffn_norm", il);
60776077

60786078
cur = llm_build_ffn(ctx0, cur,
6079-
model.layers[il].ffn_up, NULL,
6080-
model.layers[il].ffn_gate, NULL,
6081-
model.layers[il].ffn_down, NULL,
6079+
model.layers[il].w3, NULL,
6080+
model.layers[il].w1, NULL,
6081+
model.layers[il].w2, NULL,
60826082
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
60836083
cb(cur, "ffn_out", il);
60846084
}

0 commit comments

Comments
 (0)