Skip to content

Commit b6dcc6e

Browse files
jukofyorkNeoZhangJianyu
authored andcommitted
fixes ggml-org#7999 (adds control vectors to all build_XXX() functions in llama.cpp [needs testing] (ggml-org#8060)
* fixes ggml-org#7999 The `build_command_r` forgot to add the control vector. * Fixes qwen2 too * Fixed all models' control vectors * Removed double calls to `cb(cur, "l_out", il)` * Moved control vector logic to llama_control_vector:apply_to()
1 parent 5c2ac2f commit b6dcc6e

File tree

1 file changed

+73
-39
lines changed

1 file changed

+73
-39
lines changed

llama.cpp

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,13 +2368,21 @@ struct llama_control_vector {
23682368
int32_t layer_start = -1;
23692369
int32_t layer_end = -1;
23702370

2371-
ggml_tensor * tensor_for(int il) const {
2371+
struct ggml_tensor * tensor_for(int il) const {
23722372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
23732373
return nullptr;
23742374
}
23752375
return tensors[il];
23762376
}
23772377

2378+
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379+
ggml_tensor * layer_dir = tensor_for(il);
2380+
if (layer_dir != nullptr) {
2381+
cur = ggml_add(ctx, cur, layer_dir);
2382+
}
2383+
return cur;
2384+
}
2385+
23782386
~llama_control_vector() {
23792387
for (struct ggml_context * ctx : ctxs) {
23802388
ggml_free(ctx);
@@ -8024,10 +8032,7 @@ struct llm_build_context {
80248032
cur = ggml_add(ctx0, cur, ffn_inp);
80258033
cb(cur, "ffn_out", il);
80268034

8027-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8028-
if (layer_dir != nullptr) {
8029-
cur = ggml_add(ctx0, cur, layer_dir);
8030-
}
8035+
cur = lctx.cvec.apply_to(ctx0, cur, il);
80318036
cb(cur, "l_out", il);
80328037

80338038
// input for next layer
@@ -8142,6 +8147,7 @@ struct llm_build_context {
81428147
}
81438148

81448149
cur = ggml_add(ctx0, cur, ffn_inp);
8150+
cur = lctx.cvec.apply_to(ctx0, cur, il);
81458151
cb(cur, "l_out", il);
81468152

81478153
// input for next layer
@@ -8246,6 +8252,7 @@ struct llm_build_context {
82468252
}
82478253

82488254
cur = ggml_add(ctx0, cur, ffn_inp);
8255+
cur = lctx.cvec.apply_to(ctx0, cur, il);
82498256
cb(cur, "l_out", il);
82508257

82518258
// input for next layer
@@ -8361,9 +8368,8 @@ struct llm_build_context {
83618368
}
83628369

83638370
cur = ggml_add(ctx0, cur, ffn_inp);
8364-
cb(cur, "l_out", il);
8365-
83668371
cur = ggml_add(ctx0, cur, inpL);
8372+
cur = lctx.cvec.apply_to(ctx0, cur, il);
83678373
cb(cur, "l_out", il);
83688374

83698375
// input for next layer
@@ -8515,10 +8521,7 @@ struct llm_build_context {
85158521
cur = ggml_add(ctx0, cur, ffn_inp);
85168522
cb(cur, "ffn_out", il);
85178523

8518-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8519-
if (layer_dir != nullptr) {
8520-
cur = ggml_add(ctx0, cur, layer_dir);
8521-
}
8524+
cur = lctx.cvec.apply_to(ctx0, cur, il);
85228525
cb(cur, "l_out", il);
85238526

85248527
// input for next layer
@@ -8649,10 +8652,7 @@ struct llm_build_context {
86498652
cur = ggml_add(ctx0, cur, ffn_inp);
86508653
cb(cur, "ffn_out", il);
86518654

8652-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8653-
if (layer_dir != nullptr) {
8654-
cur = ggml_add(ctx0, cur, layer_dir);
8655-
}
8655+
cur = lctx.cvec.apply_to(ctx0, cur, il);
86568656
cb(cur, "l_out", il);
86578657

86588658
// input for next layer
@@ -8758,8 +8758,12 @@ struct llm_build_context {
87588758
cb(cur, "ffn_out", il);
87598759
}
87608760

8761-
inpL = ggml_add(ctx0, cur, ffn_inp);
8762-
cb(inpL, "l_out", il);
8761+
cur = ggml_add(ctx0, cur, ffn_inp);
8762+
cur = lctx.cvec.apply_to(ctx0, cur, il);
8763+
cb(cur, "l_out", il);
8764+
8765+
// input for next layer
8766+
inpL = cur;
87638767
}
87648768

87658769
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8847,6 +8851,7 @@ struct llm_build_context {
88478851
}
88488852

88498853
cur = ggml_add(ctx0, cur, ffn_inp);
8854+
cur = lctx.cvec.apply_to(ctx0, cur, il);
88508855
cb(cur, "l_out", il);
88518856

88528857
// input for next layer
@@ -9142,8 +9147,12 @@ struct llm_build_context {
91429147
cb(cur, "ffn_out", il);
91439148
}
91449149

9145-
inpL = ggml_add(ctx0, cur, ffn_inp);
9146-
cb(inpL, "l_out", il);
9150+
cur = ggml_add(ctx0, cur, ffn_inp);
9151+
cur = lctx.cvec.apply_to(ctx0, cur, il);
9152+
cb(cur, "l_out", il);
9153+
9154+
// input for next layer
9155+
inpL = cur;
91479156
}
91489157

91499158
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9277,6 +9286,7 @@ struct llm_build_context {
92779286
}
92789287

92799288
cur = ggml_add(ctx0, cur, ffn_inp);
9289+
cur = lctx.cvec.apply_to(ctx0, cur, il);
92809290
cb(cur, "l_out", il);
92819291

92829292
// input for next layer
@@ -9425,6 +9435,7 @@ struct llm_build_context {
94259435
}
94269436

94279437
cur = ggml_add(ctx0, cur, ffn_inp);
9438+
cur = lctx.cvec.apply_to(ctx0, cur, il);
94289439
cb(cur, "l_out", il);
94299440

94309441
// input for next layer
@@ -9537,6 +9548,7 @@ struct llm_build_context {
95379548
}
95389549

95399550
cur = ggml_add(ctx0, cur, ffn_inp);
9551+
cur = lctx.cvec.apply_to(ctx0, cur, il);
95409552
cb(cur, "l_out", il);
95419553

95429554
// input for next layer
@@ -9648,6 +9660,7 @@ struct llm_build_context {
96489660
cb(cur, "ffn_out", il);
96499661

96509662
cur = ggml_add(ctx0, cur, ffn_inp);
9663+
cur = lctx.cvec.apply_to(ctx0, cur, il);
96519664
cb(cur, "l_out", il);
96529665

96539666
// input for next layer
@@ -9793,6 +9806,7 @@ struct llm_build_context {
97939806
}
97949807

97959808
cur = ggml_add(ctx0, cur, ffn_inp);
9809+
cur = lctx.cvec.apply_to(ctx0, cur, il);
97969810
cb(cur, "l_out", il);
97979811

97989812
// input for next layer
@@ -9913,11 +9927,11 @@ struct llm_build_context {
99139927
}
99149928

99159929
cur = ggml_add(ctx0, cur, ffn_output);
9916-
cb(cur, "l_out", il);
9917-
99189930
cur = ggml_add(ctx0, cur, inpL);
9931+
cur = lctx.cvec.apply_to(ctx0, cur, il);
99199932
cb(cur, "l_out", il);
99209933

9934+
// input for next layer
99219935
inpL = cur;
99229936
}
99239937

@@ -10049,8 +10063,10 @@ struct llm_build_context {
1004910063
}
1005010064

1005110065
cur = ggml_add(ctx0, residual, cur);
10066+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1005210067
cb(cur, "l_out", il);
1005310068

10069+
// input for next layer
1005410070
inpL = cur;
1005510071
}
1005610072

@@ -10149,9 +10165,8 @@ struct llm_build_context {
1014910165
}
1015010166

1015110167
cur = ggml_add(ctx0, cur, sa_out);
10152-
cb(cur, "l_out", il);
10153-
1015410168
cur = ggml_add(ctx0, cur, inpL);
10169+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1015510170
cb(cur, "l_out", il);
1015610171

1015710172
// input for next layer
@@ -10257,8 +10272,12 @@ struct llm_build_context {
1025710272
cb(cur, "ffn_out", il);
1025810273
}
1025910274

10260-
inpL = ggml_add(ctx0, cur, ffn_inp);
10261-
cb(inpL, "l_out", il);
10275+
cur = ggml_add(ctx0, cur, ffn_inp);
10276+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10277+
cb(cur, "l_out", il);
10278+
10279+
// input for next layer
10280+
inpL = cur;
1026210281
}
1026310282

1026410283
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10364,8 +10383,12 @@ struct llm_build_context {
1036410383
cb(cur, "ffn_out", il);
1036510384
}
1036610385

10367-
inpL = ggml_add(ctx0, cur, ffn_inp);
10368-
cb(inpL, "l_out", il);
10386+
cur = ggml_add(ctx0, cur, ffn_inp);
10387+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10388+
cb(cur, "l_out", il);
10389+
10390+
// input for next layer
10391+
inpL = cur;
1036910392
}
1037010393

1037110394
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10477,6 +10500,7 @@ struct llm_build_context {
1047710500
cb(cur, "ffn_out", il);
1047810501

1047910502
cur = ggml_add(ctx0, cur, ffn_inp);
10503+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1048010504
cb(cur, "l_out", il);
1048110505

1048210506
// input for next layer
@@ -10594,6 +10618,7 @@ struct llm_build_context {
1059410618
cb(cur, "ffn_out", il);
1059510619

1059610620
cur = ggml_add(ctx0, cur, ffn_inp);
10621+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1059710622
cb(cur, "l_out", il);
1059810623

1059910624
// input for next layer
@@ -10735,6 +10760,7 @@ struct llm_build_context {
1073510760
cb(cur, "hidden_scaled_ffn", -1);
1073610761

1073710762
cur = ggml_add(ctx0, cur, ffn_inp);
10763+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1073810764
cb(cur, "l_out", il);
1073910765

1074010766
// input for next layer
@@ -10847,6 +10873,7 @@ struct llm_build_context {
1084710873
}
1084810874

1084910875
cur = ggml_add(ctx0, cur, sa_out);
10876+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1085010877
cb(cur, "l_out", il);
1085110878

1085210879
// input for next layer
@@ -10963,7 +10990,9 @@ struct llm_build_context {
1096310990
NULL,
1096410991
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1096510992
cb(cur, "ffn_out", il);
10993+
1096610994
cur = ggml_add(ctx0, cur, ffn_inp);
10995+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1096710996
cb(cur, "l_out", il);
1096810997

1096910998
// input for next layer
@@ -11112,6 +11141,7 @@ struct llm_build_context {
1111211141

1111311142
// residual
1111411143
cur = ggml_add(ctx0, cur, inpL);
11144+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1111511145
cb(cur, "l_out", il);
1111611146

1111711147
// input for next layer
@@ -11253,6 +11283,7 @@ struct llm_build_context {
1125311283
// add together residual + FFN + self-attention
1125411284
cur = ggml_add(ctx0, cur, inpL);
1125511285
cur = ggml_add(ctx0, cur, attn_out);
11286+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1125611287
cb(cur, "l_out", il);
1125711288

1125811289
// input for next layer
@@ -11388,10 +11419,7 @@ struct llm_build_context {
1138811419
cur = ggml_add(ctx0, cur, ffn_inp);
1138911420
cb(cur, "ffn_out", il);
1139011421

11391-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11392-
if (layer_dir != nullptr) {
11393-
cur = ggml_add(ctx0, cur, layer_dir);
11394-
}
11422+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1139511423
cb(cur, "l_out", il);
1139611424

1139711425
// input for next layer
@@ -11505,8 +11533,12 @@ struct llm_build_context {
1150511533
cur = ggml_add(ctx0, cur, inpL);
1150611534
cb(cur, "ffn_out", il);
1150711535

11508-
inpL = ggml_add(ctx0, cur, attn_out);
11509-
cb(inpL, "l_out", il);
11536+
cur = ggml_add(ctx0, cur, attn_out);
11537+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11538+
cb(cur, "l_out", il);
11539+
11540+
// input for next layer
11541+
inpL = cur;
1151011542
} else {
1151111543
// attention and ffn are computed sequentially
1151211544
// x = x + attn(ln1(x))
@@ -11529,8 +11561,12 @@ struct llm_build_context {
1152911561
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1153011562
cb(cur, "ffn_out", il);
1153111563

11532-
inpL = ggml_add(ctx0, cur, ffn_inp);
11533-
cb(inpL, "l_out", il);
11564+
cur = ggml_add(ctx0, cur, ffn_inp);
11565+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11566+
cb(cur, "l_out", il);
11567+
11568+
// input for next layer
11569+
inpL = cur;
1153411570
}
1153511571
}
1153611572

@@ -11657,10 +11693,7 @@ struct llm_build_context {
1165711693
cur = ggml_add(ctx0, cur, ffn_out);
1165811694
cb(cur, "ffn_out", il);
1165911695

11660-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11661-
if (layer_dir != nullptr) {
11662-
cur = ggml_add(ctx0, cur, layer_dir);
11663-
}
11696+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1166411697
cb(cur, "l_out", il);
1166511698

1166611699
// input for next layer
@@ -11893,6 +11926,7 @@ struct llm_build_context {
1189311926
}
1189411927

1189511928
cur = ggml_add(ctx0, cur, ffn_inp);
11929+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1189611930
cb(cur, "l_out", il);
1189711931

1189811932
// input for next layer

0 commit comments

Comments
 (0)