Skip to content

Commit 753b1b5

Browse files
jukofyorkMagnusS0
authored andcommitted
fixes ggml-org#7999 (adds control vectors to all build_XXX() functions in llama.cpp [needs testing] (ggml-org#8060)
* fixes ggml-org#7999 The `build_command_r` forgot to add the control vector. * Fixes qwen2 too * Fixed all models' control vectors * Removed double calls to `cb(cur, "l_out", il)` * Moved control vector logic to llama_control_vector:apply_to()
1 parent e1fdf35 commit 753b1b5

File tree

1 file changed

+73
-39
lines changed

1 file changed

+73
-39
lines changed

llama.cpp

Lines changed: 73 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -2368,13 +2368,21 @@ struct llama_control_vector {
23682368
int32_t layer_start = -1;
23692369
int32_t layer_end = -1;
23702370

2371-
ggml_tensor * tensor_for(int il) const {
2371+
struct ggml_tensor * tensor_for(int il) const {
23722372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
23732373
return nullptr;
23742374
}
23752375
return tensors[il];
23762376
}
23772377

2378+
struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379+
ggml_tensor * layer_dir = tensor_for(il);
2380+
if (layer_dir != nullptr) {
2381+
cur = ggml_add(ctx, cur, layer_dir);
2382+
}
2383+
return cur;
2384+
}
2385+
23782386
~llama_control_vector() {
23792387
for (struct ggml_context * ctx : ctxs) {
23802388
ggml_free(ctx);
@@ -8026,10 +8034,7 @@ struct llm_build_context {
80268034
cur = ggml_add(ctx0, cur, ffn_inp);
80278035
cb(cur, "ffn_out", il);
80288036

8029-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8030-
if (layer_dir != nullptr) {
8031-
cur = ggml_add(ctx0, cur, layer_dir);
8032-
}
8037+
cur = lctx.cvec.apply_to(ctx0, cur, il);
80338038
cb(cur, "l_out", il);
80348039

80358040
// input for next layer
@@ -8144,6 +8149,7 @@ struct llm_build_context {
81448149
}
81458150

81468151
cur = ggml_add(ctx0, cur, ffn_inp);
8152+
cur = lctx.cvec.apply_to(ctx0, cur, il);
81478153
cb(cur, "l_out", il);
81488154

81498155
// input for next layer
@@ -8248,6 +8254,7 @@ struct llm_build_context {
82488254
}
82498255

82508256
cur = ggml_add(ctx0, cur, ffn_inp);
8257+
cur = lctx.cvec.apply_to(ctx0, cur, il);
82518258
cb(cur, "l_out", il);
82528259

82538260
// input for next layer
@@ -8363,9 +8370,8 @@ struct llm_build_context {
83638370
}
83648371

83658372
cur = ggml_add(ctx0, cur, ffn_inp);
8366-
cb(cur, "l_out", il);
8367-
83688373
cur = ggml_add(ctx0, cur, inpL);
8374+
cur = lctx.cvec.apply_to(ctx0, cur, il);
83698375
cb(cur, "l_out", il);
83708376

83718377
// input for next layer
@@ -8517,10 +8523,7 @@ struct llm_build_context {
85178523
cur = ggml_add(ctx0, cur, ffn_inp);
85188524
cb(cur, "ffn_out", il);
85198525

8520-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8521-
if (layer_dir != nullptr) {
8522-
cur = ggml_add(ctx0, cur, layer_dir);
8523-
}
8526+
cur = lctx.cvec.apply_to(ctx0, cur, il);
85248527
cb(cur, "l_out", il);
85258528

85268529
// input for next layer
@@ -8651,10 +8654,7 @@ struct llm_build_context {
86518654
cur = ggml_add(ctx0, cur, ffn_inp);
86528655
cb(cur, "ffn_out", il);
86538656

8654-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8655-
if (layer_dir != nullptr) {
8656-
cur = ggml_add(ctx0, cur, layer_dir);
8657-
}
8657+
cur = lctx.cvec.apply_to(ctx0, cur, il);
86588658
cb(cur, "l_out", il);
86598659

86608660
// input for next layer
@@ -8760,8 +8760,12 @@ struct llm_build_context {
87608760
cb(cur, "ffn_out", il);
87618761
}
87628762

8763-
inpL = ggml_add(ctx0, cur, ffn_inp);
8764-
cb(inpL, "l_out", il);
8763+
cur = ggml_add(ctx0, cur, ffn_inp);
8764+
cur = lctx.cvec.apply_to(ctx0, cur, il);
8765+
cb(cur, "l_out", il);
8766+
8767+
// input for next layer
8768+
inpL = cur;
87658769
}
87668770

87678771
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8849,6 +8853,7 @@ struct llm_build_context {
88498853
}
88508854

88518855
cur = ggml_add(ctx0, cur, ffn_inp);
8856+
cur = lctx.cvec.apply_to(ctx0, cur, il);
88528857
cb(cur, "l_out", il);
88538858

88548859
// input for next layer
@@ -9144,8 +9149,12 @@ struct llm_build_context {
91449149
cb(cur, "ffn_out", il);
91459150
}
91469151

9147-
inpL = ggml_add(ctx0, cur, ffn_inp);
9148-
cb(inpL, "l_out", il);
9152+
cur = ggml_add(ctx0, cur, ffn_inp);
9153+
cur = lctx.cvec.apply_to(ctx0, cur, il);
9154+
cb(cur, "l_out", il);
9155+
9156+
// input for next layer
9157+
inpL = cur;
91499158
}
91509159

91519160
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9279,6 +9288,7 @@ struct llm_build_context {
92799288
}
92809289

92819290
cur = ggml_add(ctx0, cur, ffn_inp);
9291+
cur = lctx.cvec.apply_to(ctx0, cur, il);
92829292
cb(cur, "l_out", il);
92839293

92849294
// input for next layer
@@ -9427,6 +9437,7 @@ struct llm_build_context {
94279437
}
94289438

94299439
cur = ggml_add(ctx0, cur, ffn_inp);
9440+
cur = lctx.cvec.apply_to(ctx0, cur, il);
94309441
cb(cur, "l_out", il);
94319442

94329443
// input for next layer
@@ -9539,6 +9550,7 @@ struct llm_build_context {
95399550
}
95409551

95419552
cur = ggml_add(ctx0, cur, ffn_inp);
9553+
cur = lctx.cvec.apply_to(ctx0, cur, il);
95429554
cb(cur, "l_out", il);
95439555

95449556
// input for next layer
@@ -9650,6 +9662,7 @@ struct llm_build_context {
96509662
cb(cur, "ffn_out", il);
96519663

96529664
cur = ggml_add(ctx0, cur, ffn_inp);
9665+
cur = lctx.cvec.apply_to(ctx0, cur, il);
96539666
cb(cur, "l_out", il);
96549667

96559668
// input for next layer
@@ -9795,6 +9808,7 @@ struct llm_build_context {
97959808
}
97969809

97979810
cur = ggml_add(ctx0, cur, ffn_inp);
9811+
cur = lctx.cvec.apply_to(ctx0, cur, il);
97989812
cb(cur, "l_out", il);
97999813

98009814
// input for next layer
@@ -9915,11 +9929,11 @@ struct llm_build_context {
99159929
}
99169930

99179931
cur = ggml_add(ctx0, cur, ffn_output);
9918-
cb(cur, "l_out", il);
9919-
99209932
cur = ggml_add(ctx0, cur, inpL);
9933+
cur = lctx.cvec.apply_to(ctx0, cur, il);
99219934
cb(cur, "l_out", il);
99229935

9936+
// input for next layer
99239937
inpL = cur;
99249938
}
99259939

@@ -10051,8 +10065,10 @@ struct llm_build_context {
1005110065
}
1005210066

1005310067
cur = ggml_add(ctx0, residual, cur);
10068+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1005410069
cb(cur, "l_out", il);
1005510070

10071+
// input for next layer
1005610072
inpL = cur;
1005710073
}
1005810074

@@ -10151,9 +10167,8 @@ struct llm_build_context {
1015110167
}
1015210168

1015310169
cur = ggml_add(ctx0, cur, sa_out);
10154-
cb(cur, "l_out", il);
10155-
1015610170
cur = ggml_add(ctx0, cur, inpL);
10171+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1015710172
cb(cur, "l_out", il);
1015810173

1015910174
// input for next layer
@@ -10259,8 +10274,12 @@ struct llm_build_context {
1025910274
cb(cur, "ffn_out", il);
1026010275
}
1026110276

10262-
inpL = ggml_add(ctx0, cur, ffn_inp);
10263-
cb(inpL, "l_out", il);
10277+
cur = ggml_add(ctx0, cur, ffn_inp);
10278+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10279+
cb(cur, "l_out", il);
10280+
10281+
// input for next layer
10282+
inpL = cur;
1026410283
}
1026510284

1026610285
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10366,8 +10385,12 @@ struct llm_build_context {
1036610385
cb(cur, "ffn_out", il);
1036710386
}
1036810387

10369-
inpL = ggml_add(ctx0, cur, ffn_inp);
10370-
cb(inpL, "l_out", il);
10388+
cur = ggml_add(ctx0, cur, ffn_inp);
10389+
cur = lctx.cvec.apply_to(ctx0, cur, il);
10390+
cb(cur, "l_out", il);
10391+
10392+
// input for next layer
10393+
inpL = cur;
1037110394
}
1037210395

1037310396
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10479,6 +10502,7 @@ struct llm_build_context {
1047910502
cb(cur, "ffn_out", il);
1048010503

1048110504
cur = ggml_add(ctx0, cur, ffn_inp);
10505+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1048210506
cb(cur, "l_out", il);
1048310507

1048410508
// input for next layer
@@ -10596,6 +10620,7 @@ struct llm_build_context {
1059610620
cb(cur, "ffn_out", il);
1059710621

1059810622
cur = ggml_add(ctx0, cur, ffn_inp);
10623+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1059910624
cb(cur, "l_out", il);
1060010625

1060110626
// input for next layer
@@ -10737,6 +10762,7 @@ struct llm_build_context {
1073710762
cb(cur, "hidden_scaled_ffn", -1);
1073810763

1073910764
cur = ggml_add(ctx0, cur, ffn_inp);
10765+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1074010766
cb(cur, "l_out", il);
1074110767

1074210768
// input for next layer
@@ -10849,6 +10875,7 @@ struct llm_build_context {
1084910875
}
1085010876

1085110877
cur = ggml_add(ctx0, cur, sa_out);
10878+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1085210879
cb(cur, "l_out", il);
1085310880

1085410881
// input for next layer
@@ -10965,7 +10992,9 @@ struct llm_build_context {
1096510992
NULL,
1096610993
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1096710994
cb(cur, "ffn_out", il);
10995+
1096810996
cur = ggml_add(ctx0, cur, ffn_inp);
10997+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1096910998
cb(cur, "l_out", il);
1097010999

1097111000
// input for next layer
@@ -11114,6 +11143,7 @@ struct llm_build_context {
1111411143

1111511144
// residual
1111611145
cur = ggml_add(ctx0, cur, inpL);
11146+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1111711147
cb(cur, "l_out", il);
1111811148

1111911149
// input for next layer
@@ -11255,6 +11285,7 @@ struct llm_build_context {
1125511285
// add together residual + FFN + self-attention
1125611286
cur = ggml_add(ctx0, cur, inpL);
1125711287
cur = ggml_add(ctx0, cur, attn_out);
11288+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1125811289
cb(cur, "l_out", il);
1125911290

1126011291
// input for next layer
@@ -11390,10 +11421,7 @@ struct llm_build_context {
1139011421
cur = ggml_add(ctx0, cur, ffn_inp);
1139111422
cb(cur, "ffn_out", il);
1139211423

11393-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11394-
if (layer_dir != nullptr) {
11395-
cur = ggml_add(ctx0, cur, layer_dir);
11396-
}
11424+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1139711425
cb(cur, "l_out", il);
1139811426

1139911427
// input for next layer
@@ -11507,8 +11535,12 @@ struct llm_build_context {
1150711535
cur = ggml_add(ctx0, cur, inpL);
1150811536
cb(cur, "ffn_out", il);
1150911537

11510-
inpL = ggml_add(ctx0, cur, attn_out);
11511-
cb(inpL, "l_out", il);
11538+
cur = ggml_add(ctx0, cur, attn_out);
11539+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11540+
cb(cur, "l_out", il);
11541+
11542+
// input for next layer
11543+
inpL = cur;
1151211544
} else {
1151311545
// attention and ffn are computed sequentially
1151411546
// x = x + attn(ln1(x))
@@ -11531,8 +11563,12 @@ struct llm_build_context {
1153111563
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
1153211564
cb(cur, "ffn_out", il);
1153311565

11534-
inpL = ggml_add(ctx0, cur, ffn_inp);
11535-
cb(inpL, "l_out", il);
11566+
cur = ggml_add(ctx0, cur, ffn_inp);
11567+
cur = lctx.cvec.apply_to(ctx0, cur, il);
11568+
cb(cur, "l_out", il);
11569+
11570+
// input for next layer
11571+
inpL = cur;
1153611572
}
1153711573
}
1153811574

@@ -11659,10 +11695,7 @@ struct llm_build_context {
1165911695
cur = ggml_add(ctx0, cur, ffn_out);
1166011696
cb(cur, "ffn_out", il);
1166111697

11662-
ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11663-
if (layer_dir != nullptr) {
11664-
cur = ggml_add(ctx0, cur, layer_dir);
11665-
}
11698+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1166611699
cb(cur, "l_out", il);
1166711700

1166811701
// input for next layer
@@ -11895,6 +11928,7 @@ struct llm_build_context {
1189511928
}
1189611929

1189711930
cur = ggml_add(ctx0, cur, ffn_inp);
11931+
cur = lctx.cvec.apply_to(ctx0, cur, il);
1189811932
cb(cur, "l_out", il);
1189911933

1190011934
// input for next layer

0 commit comments

Comments
 (0)