Skip to content

Commit b9fdfbd

Browse files
committed
fix attn output name
1 parent 691293b commit b9fdfbd

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

Diff for: llama.cpp

+6-7
Original file line numberDiff line numberDiff line change
@@ -1141,7 +1141,6 @@ struct llama_layer {
11411141
struct ggml_tensor * wk;
11421142
struct ggml_tensor * wv;
11431143
struct ggml_tensor * wo;
1144-
struct ggml_tensor * wo_b; //
11451144
struct ggml_tensor * wqkv;
11461145

11471146
// attention bias
@@ -3056,7 +3055,7 @@ static void llm_load_tensors(
30563055
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa},backend_split);
30573056
layer.bqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, backend);
30583057
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
3059-
layer.wo_b = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, backend);
3058+
layer.bo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, backend);
30603059
layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend);
30613060
layer.ffn_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, backend);
30623061
layer.w2 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, backend_split);
@@ -3068,7 +3067,7 @@ static void llm_load_tensors(
30683067
vram_weights +=
30693068
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) +
30703069
ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.bqkv) +
3071-
ggml_nbytes(layer.wo) + ggml_nbytes(layer.wo_b) +
3070+
ggml_nbytes(layer.wo) + ggml_nbytes(layer.bo) +
30723071
ggml_nbytes(layer.ffn_norm) + ggml_nbytes(layer.ffn_norm_b) +
30733072
ggml_nbytes(layer.w2) + ggml_nbytes(layer.b2) +
30743073
ggml_nbytes(layer.w3) + ggml_nbytes(layer.b3);
@@ -3205,7 +3204,7 @@ static void llm_load_tensors(
32053204
layer.wqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa},backend_split);
32063205
layer.bqkv = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_QKV, "bias", i), {n_embd + 2*n_embd_gqa}, backend);
32073206
layer.wo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, backend_split);
3208-
layer.wo_b = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, backend);
3207+
layer.bo = ml.create_tensor(ctx, tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, backend);
32093208
// layer.ffn_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, backend);
32103209
// layer.ffn_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, backend);
32113210
layer.w2 = ml.create_tensor(ctx, tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, backend_split);
@@ -3218,7 +3217,7 @@ static void llm_load_tensors(
32183217
ggml_nbytes(layer.attn_norm) + ggml_nbytes(layer.attn_norm_b) +
32193218
ggml_nbytes(layer.attn_norm_2) + ggml_nbytes(layer.attn_norm_2_b) +
32203219
ggml_nbytes(layer.wqkv) + ggml_nbytes(layer.bqkv) +
3221-
ggml_nbytes(layer.wo) + ggml_nbytes(layer.wo_b) +
3220+
ggml_nbytes(layer.wo) + ggml_nbytes(layer.bo) +
32223221
ggml_nbytes(layer.w2) + ggml_nbytes(layer.b2) +
32233222
ggml_nbytes(layer.w3) + ggml_nbytes(layer.b3);
32243223
}
@@ -6348,7 +6347,7 @@ static struct ggml_cgraph * llm_build_gptneox(
63486347
cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
63496348
offload_func(cur);
63506349

6351-
cur = ggml_add(ctx0, cur, model.layers[il].wo_b);
6350+
cur = ggml_add(ctx0, cur, model.layers[il].bo);
63526351
offload_func(cur);
63536352

63546353
ggml_set_name(cur, "result_wo");
@@ -6995,7 +6994,7 @@ static struct ggml_cgraph * llm_build_gpt2(
69956994
cur = ggml_mul_mat(ctx0, model.layers[il].wo, cur);
69966995
offload_func(cur);
69976996

6998-
cur = ggml_add(ctx0, cur, model.layers[il].wo_b);
6997+
cur = ggml_add(ctx0, cur, model.layers[il].bo);
69996998
offload_func(cur);
70006999

70017000
ggml_set_name(cur, "result_wo");

0 commit comments

Comments
 (0)