@@ -2368,13 +2368,21 @@ struct llama_control_vector {
2368
2368
int32_t layer_start = -1;
2369
2369
int32_t layer_end = -1;
2370
2370
2371
- ggml_tensor * tensor_for(int il) const {
2371
+ struct ggml_tensor * tensor_for(int il) const {
2372
2372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
2373
2373
return nullptr;
2374
2374
}
2375
2375
return tensors[il];
2376
2376
}
2377
2377
2378
+ struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379
+ ggml_tensor * layer_dir = tensor_for(il);
2380
+ if (layer_dir != nullptr) {
2381
+ cur = ggml_add(ctx, cur, layer_dir);
2382
+ }
2383
+ return cur;
2384
+ }
2385
+
2378
2386
~llama_control_vector() {
2379
2387
for (struct ggml_context * ctx : ctxs) {
2380
2388
ggml_free(ctx);
@@ -8026,10 +8034,7 @@ struct llm_build_context {
8026
8034
cur = ggml_add(ctx0, cur, ffn_inp);
8027
8035
cb(cur, "ffn_out", il);
8028
8036
8029
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8030
- if (layer_dir != nullptr) {
8031
- cur = ggml_add(ctx0, cur, layer_dir);
8032
- }
8037
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8033
8038
cb(cur, "l_out", il);
8034
8039
8035
8040
// input for next layer
@@ -8144,6 +8149,7 @@ struct llm_build_context {
8144
8149
}
8145
8150
8146
8151
cur = ggml_add(ctx0, cur, ffn_inp);
8152
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8147
8153
cb(cur, "l_out", il);
8148
8154
8149
8155
// input for next layer
@@ -8248,6 +8254,7 @@ struct llm_build_context {
8248
8254
}
8249
8255
8250
8256
cur = ggml_add(ctx0, cur, ffn_inp);
8257
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8251
8258
cb(cur, "l_out", il);
8252
8259
8253
8260
// input for next layer
@@ -8363,9 +8370,8 @@ struct llm_build_context {
8363
8370
}
8364
8371
8365
8372
cur = ggml_add(ctx0, cur, ffn_inp);
8366
- cb(cur, "l_out", il);
8367
-
8368
8373
cur = ggml_add(ctx0, cur, inpL);
8374
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8369
8375
cb(cur, "l_out", il);
8370
8376
8371
8377
// input for next layer
@@ -8517,10 +8523,7 @@ struct llm_build_context {
8517
8523
cur = ggml_add(ctx0, cur, ffn_inp);
8518
8524
cb(cur, "ffn_out", il);
8519
8525
8520
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8521
- if (layer_dir != nullptr) {
8522
- cur = ggml_add(ctx0, cur, layer_dir);
8523
- }
8526
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8524
8527
cb(cur, "l_out", il);
8525
8528
8526
8529
// input for next layer
@@ -8651,10 +8654,7 @@ struct llm_build_context {
8651
8654
cur = ggml_add(ctx0, cur, ffn_inp);
8652
8655
cb(cur, "ffn_out", il);
8653
8656
8654
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8655
- if (layer_dir != nullptr) {
8656
- cur = ggml_add(ctx0, cur, layer_dir);
8657
- }
8657
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8658
8658
cb(cur, "l_out", il);
8659
8659
8660
8660
// input for next layer
@@ -8760,8 +8760,12 @@ struct llm_build_context {
8760
8760
cb(cur, "ffn_out", il);
8761
8761
}
8762
8762
8763
- inpL = ggml_add(ctx0, cur, ffn_inp);
8764
- cb(inpL, "l_out", il);
8763
+ cur = ggml_add(ctx0, cur, ffn_inp);
8764
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8765
+ cb(cur, "l_out", il);
8766
+
8767
+ // input for next layer
8768
+ inpL = cur;
8765
8769
}
8766
8770
8767
8771
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8849,6 +8853,7 @@ struct llm_build_context {
8849
8853
}
8850
8854
8851
8855
cur = ggml_add(ctx0, cur, ffn_inp);
8856
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8852
8857
cb(cur, "l_out", il);
8853
8858
8854
8859
// input for next layer
@@ -9144,8 +9149,12 @@ struct llm_build_context {
9144
9149
cb(cur, "ffn_out", il);
9145
9150
}
9146
9151
9147
- inpL = ggml_add(ctx0, cur, ffn_inp);
9148
- cb(inpL, "l_out", il);
9152
+ cur = ggml_add(ctx0, cur, ffn_inp);
9153
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9154
+ cb(cur, "l_out", il);
9155
+
9156
+ // input for next layer
9157
+ inpL = cur;
9149
9158
}
9150
9159
9151
9160
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9279,6 +9288,7 @@ struct llm_build_context {
9279
9288
}
9280
9289
9281
9290
cur = ggml_add(ctx0, cur, ffn_inp);
9291
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9282
9292
cb(cur, "l_out", il);
9283
9293
9284
9294
// input for next layer
@@ -9427,6 +9437,7 @@ struct llm_build_context {
9427
9437
}
9428
9438
9429
9439
cur = ggml_add(ctx0, cur, ffn_inp);
9440
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9430
9441
cb(cur, "l_out", il);
9431
9442
9432
9443
// input for next layer
@@ -9539,6 +9550,7 @@ struct llm_build_context {
9539
9550
}
9540
9551
9541
9552
cur = ggml_add(ctx0, cur, ffn_inp);
9553
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9542
9554
cb(cur, "l_out", il);
9543
9555
9544
9556
// input for next layer
@@ -9650,6 +9662,7 @@ struct llm_build_context {
9650
9662
cb(cur, "ffn_out", il);
9651
9663
9652
9664
cur = ggml_add(ctx0, cur, ffn_inp);
9665
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9653
9666
cb(cur, "l_out", il);
9654
9667
9655
9668
// input for next layer
@@ -9795,6 +9808,7 @@ struct llm_build_context {
9795
9808
}
9796
9809
9797
9810
cur = ggml_add(ctx0, cur, ffn_inp);
9811
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9798
9812
cb(cur, "l_out", il);
9799
9813
9800
9814
// input for next layer
@@ -9915,11 +9929,11 @@ struct llm_build_context {
9915
9929
}
9916
9930
9917
9931
cur = ggml_add(ctx0, cur, ffn_output);
9918
- cb(cur, "l_out", il);
9919
-
9920
9932
cur = ggml_add(ctx0, cur, inpL);
9933
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9921
9934
cb(cur, "l_out", il);
9922
9935
9936
+ // input for next layer
9923
9937
inpL = cur;
9924
9938
}
9925
9939
@@ -10051,8 +10065,10 @@ struct llm_build_context {
10051
10065
}
10052
10066
10053
10067
cur = ggml_add(ctx0, residual, cur);
10068
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10054
10069
cb(cur, "l_out", il);
10055
10070
10071
+ // input for next layer
10056
10072
inpL = cur;
10057
10073
}
10058
10074
@@ -10151,9 +10167,8 @@ struct llm_build_context {
10151
10167
}
10152
10168
10153
10169
cur = ggml_add(ctx0, cur, sa_out);
10154
- cb(cur, "l_out", il);
10155
-
10156
10170
cur = ggml_add(ctx0, cur, inpL);
10171
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10157
10172
cb(cur, "l_out", il);
10158
10173
10159
10174
// input for next layer
@@ -10259,8 +10274,12 @@ struct llm_build_context {
10259
10274
cb(cur, "ffn_out", il);
10260
10275
}
10261
10276
10262
- inpL = ggml_add(ctx0, cur, ffn_inp);
10263
- cb(inpL, "l_out", il);
10277
+ cur = ggml_add(ctx0, cur, ffn_inp);
10278
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10279
+ cb(cur, "l_out", il);
10280
+
10281
+ // input for next layer
10282
+ inpL = cur;
10264
10283
}
10265
10284
10266
10285
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10366,8 +10385,12 @@ struct llm_build_context {
10366
10385
cb(cur, "ffn_out", il);
10367
10386
}
10368
10387
10369
- inpL = ggml_add(ctx0, cur, ffn_inp);
10370
- cb(inpL, "l_out", il);
10388
+ cur = ggml_add(ctx0, cur, ffn_inp);
10389
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10390
+ cb(cur, "l_out", il);
10391
+
10392
+ // input for next layer
10393
+ inpL = cur;
10371
10394
}
10372
10395
10373
10396
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10479,6 +10502,7 @@ struct llm_build_context {
10479
10502
cb(cur, "ffn_out", il);
10480
10503
10481
10504
cur = ggml_add(ctx0, cur, ffn_inp);
10505
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10482
10506
cb(cur, "l_out", il);
10483
10507
10484
10508
// input for next layer
@@ -10596,6 +10620,7 @@ struct llm_build_context {
10596
10620
cb(cur, "ffn_out", il);
10597
10621
10598
10622
cur = ggml_add(ctx0, cur, ffn_inp);
10623
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10599
10624
cb(cur, "l_out", il);
10600
10625
10601
10626
// input for next layer
@@ -10737,6 +10762,7 @@ struct llm_build_context {
10737
10762
cb(cur, "hidden_scaled_ffn", -1);
10738
10763
10739
10764
cur = ggml_add(ctx0, cur, ffn_inp);
10765
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10740
10766
cb(cur, "l_out", il);
10741
10767
10742
10768
// input for next layer
@@ -10849,6 +10875,7 @@ struct llm_build_context {
10849
10875
}
10850
10876
10851
10877
cur = ggml_add(ctx0, cur, sa_out);
10878
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10852
10879
cb(cur, "l_out", il);
10853
10880
10854
10881
// input for next layer
@@ -10965,7 +10992,9 @@ struct llm_build_context {
10965
10992
NULL,
10966
10993
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
10967
10994
cb(cur, "ffn_out", il);
10995
+
10968
10996
cur = ggml_add(ctx0, cur, ffn_inp);
10997
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10969
10998
cb(cur, "l_out", il);
10970
10999
10971
11000
// input for next layer
@@ -11114,6 +11143,7 @@ struct llm_build_context {
11114
11143
11115
11144
// residual
11116
11145
cur = ggml_add(ctx0, cur, inpL);
11146
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11117
11147
cb(cur, "l_out", il);
11118
11148
11119
11149
// input for next layer
@@ -11255,6 +11285,7 @@ struct llm_build_context {
11255
11285
// add together residual + FFN + self-attention
11256
11286
cur = ggml_add(ctx0, cur, inpL);
11257
11287
cur = ggml_add(ctx0, cur, attn_out);
11288
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11258
11289
cb(cur, "l_out", il);
11259
11290
11260
11291
// input for next layer
@@ -11390,10 +11421,7 @@ struct llm_build_context {
11390
11421
cur = ggml_add(ctx0, cur, ffn_inp);
11391
11422
cb(cur, "ffn_out", il);
11392
11423
11393
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11394
- if (layer_dir != nullptr) {
11395
- cur = ggml_add(ctx0, cur, layer_dir);
11396
- }
11424
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11397
11425
cb(cur, "l_out", il);
11398
11426
11399
11427
// input for next layer
@@ -11507,8 +11535,12 @@ struct llm_build_context {
11507
11535
cur = ggml_add(ctx0, cur, inpL);
11508
11536
cb(cur, "ffn_out", il);
11509
11537
11510
- inpL = ggml_add(ctx0, cur, attn_out);
11511
- cb(inpL, "l_out", il);
11538
+ cur = ggml_add(ctx0, cur, attn_out);
11539
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11540
+ cb(cur, "l_out", il);
11541
+
11542
+ // input for next layer
11543
+ inpL = cur;
11512
11544
} else {
11513
11545
// attention and ffn are computed sequentially
11514
11546
// x = x + attn(ln1(x))
@@ -11531,8 +11563,12 @@ struct llm_build_context {
11531
11563
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
11532
11564
cb(cur, "ffn_out", il);
11533
11565
11534
- inpL = ggml_add(ctx0, cur, ffn_inp);
11535
- cb(inpL, "l_out", il);
11566
+ cur = ggml_add(ctx0, cur, ffn_inp);
11567
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11568
+ cb(cur, "l_out", il);
11569
+
11570
+ // input for next layer
11571
+ inpL = cur;
11536
11572
}
11537
11573
}
11538
11574
@@ -11659,10 +11695,7 @@ struct llm_build_context {
11659
11695
cur = ggml_add(ctx0, cur, ffn_out);
11660
11696
cb(cur, "ffn_out", il);
11661
11697
11662
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11663
- if (layer_dir != nullptr) {
11664
- cur = ggml_add(ctx0, cur, layer_dir);
11665
- }
11698
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11666
11699
cb(cur, "l_out", il);
11667
11700
11668
11701
// input for next layer
@@ -11895,6 +11928,7 @@ struct llm_build_context {
11895
11928
}
11896
11929
11897
11930
cur = ggml_add(ctx0, cur, ffn_inp);
11931
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11898
11932
cb(cur, "l_out", il);
11899
11933
11900
11934
// input for next layer
0 commit comments