@@ -2368,13 +2368,21 @@ struct llama_control_vector {
2368
2368
int32_t layer_start = -1;
2369
2369
int32_t layer_end = -1;
2370
2370
2371
- ggml_tensor * tensor_for(int il) const {
2371
+ struct ggml_tensor * tensor_for(int il) const {
2372
2372
if (il < 0 || il < layer_start || il > layer_end || (size_t) il >= tensors.size()) {
2373
2373
return nullptr;
2374
2374
}
2375
2375
return tensors[il];
2376
2376
}
2377
2377
2378
+ struct ggml_tensor * apply_to(struct ggml_context * ctx, struct ggml_tensor * cur, int il) const {
2379
+ ggml_tensor * layer_dir = tensor_for(il);
2380
+ if (layer_dir != nullptr) {
2381
+ cur = ggml_add(ctx, cur, layer_dir);
2382
+ }
2383
+ return cur;
2384
+ }
2385
+
2378
2386
~llama_control_vector() {
2379
2387
for (struct ggml_context * ctx : ctxs) {
2380
2388
ggml_free(ctx);
@@ -8024,10 +8032,7 @@ struct llm_build_context {
8024
8032
cur = ggml_add(ctx0, cur, ffn_inp);
8025
8033
cb(cur, "ffn_out", il);
8026
8034
8027
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8028
- if (layer_dir != nullptr) {
8029
- cur = ggml_add(ctx0, cur, layer_dir);
8030
- }
8035
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8031
8036
cb(cur, "l_out", il);
8032
8037
8033
8038
// input for next layer
@@ -8142,6 +8147,7 @@ struct llm_build_context {
8142
8147
}
8143
8148
8144
8149
cur = ggml_add(ctx0, cur, ffn_inp);
8150
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8145
8151
cb(cur, "l_out", il);
8146
8152
8147
8153
// input for next layer
@@ -8246,6 +8252,7 @@ struct llm_build_context {
8246
8252
}
8247
8253
8248
8254
cur = ggml_add(ctx0, cur, ffn_inp);
8255
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8249
8256
cb(cur, "l_out", il);
8250
8257
8251
8258
// input for next layer
@@ -8361,9 +8368,8 @@ struct llm_build_context {
8361
8368
}
8362
8369
8363
8370
cur = ggml_add(ctx0, cur, ffn_inp);
8364
- cb(cur, "l_out", il);
8365
-
8366
8371
cur = ggml_add(ctx0, cur, inpL);
8372
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8367
8373
cb(cur, "l_out", il);
8368
8374
8369
8375
// input for next layer
@@ -8515,10 +8521,7 @@ struct llm_build_context {
8515
8521
cur = ggml_add(ctx0, cur, ffn_inp);
8516
8522
cb(cur, "ffn_out", il);
8517
8523
8518
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8519
- if (layer_dir != nullptr) {
8520
- cur = ggml_add(ctx0, cur, layer_dir);
8521
- }
8524
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8522
8525
cb(cur, "l_out", il);
8523
8526
8524
8527
// input for next layer
@@ -8649,10 +8652,7 @@ struct llm_build_context {
8649
8652
cur = ggml_add(ctx0, cur, ffn_inp);
8650
8653
cb(cur, "ffn_out", il);
8651
8654
8652
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
8653
- if (layer_dir != nullptr) {
8654
- cur = ggml_add(ctx0, cur, layer_dir);
8655
- }
8655
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8656
8656
cb(cur, "l_out", il);
8657
8657
8658
8658
// input for next layer
@@ -8758,8 +8758,12 @@ struct llm_build_context {
8758
8758
cb(cur, "ffn_out", il);
8759
8759
}
8760
8760
8761
- inpL = ggml_add(ctx0, cur, ffn_inp);
8762
- cb(inpL, "l_out", il);
8761
+ cur = ggml_add(ctx0, cur, ffn_inp);
8762
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8763
+ cb(cur, "l_out", il);
8764
+
8765
+ // input for next layer
8766
+ inpL = cur;
8763
8767
}
8764
8768
8765
8769
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -8847,6 +8851,7 @@ struct llm_build_context {
8847
8851
}
8848
8852
8849
8853
cur = ggml_add(ctx0, cur, ffn_inp);
8854
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
8850
8855
cb(cur, "l_out", il);
8851
8856
8852
8857
// input for next layer
@@ -9142,8 +9147,12 @@ struct llm_build_context {
9142
9147
cb(cur, "ffn_out", il);
9143
9148
}
9144
9149
9145
- inpL = ggml_add(ctx0, cur, ffn_inp);
9146
- cb(inpL, "l_out", il);
9150
+ cur = ggml_add(ctx0, cur, ffn_inp);
9151
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9152
+ cb(cur, "l_out", il);
9153
+
9154
+ // input for next layer
9155
+ inpL = cur;
9147
9156
}
9148
9157
9149
9158
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -9277,6 +9286,7 @@ struct llm_build_context {
9277
9286
}
9278
9287
9279
9288
cur = ggml_add(ctx0, cur, ffn_inp);
9289
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9280
9290
cb(cur, "l_out", il);
9281
9291
9282
9292
// input for next layer
@@ -9425,6 +9435,7 @@ struct llm_build_context {
9425
9435
}
9426
9436
9427
9437
cur = ggml_add(ctx0, cur, ffn_inp);
9438
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9428
9439
cb(cur, "l_out", il);
9429
9440
9430
9441
// input for next layer
@@ -9537,6 +9548,7 @@ struct llm_build_context {
9537
9548
}
9538
9549
9539
9550
cur = ggml_add(ctx0, cur, ffn_inp);
9551
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9540
9552
cb(cur, "l_out", il);
9541
9553
9542
9554
// input for next layer
@@ -9648,6 +9660,7 @@ struct llm_build_context {
9648
9660
cb(cur, "ffn_out", il);
9649
9661
9650
9662
cur = ggml_add(ctx0, cur, ffn_inp);
9663
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9651
9664
cb(cur, "l_out", il);
9652
9665
9653
9666
// input for next layer
@@ -9793,6 +9806,7 @@ struct llm_build_context {
9793
9806
}
9794
9807
9795
9808
cur = ggml_add(ctx0, cur, ffn_inp);
9809
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9796
9810
cb(cur, "l_out", il);
9797
9811
9798
9812
// input for next layer
@@ -9913,11 +9927,11 @@ struct llm_build_context {
9913
9927
}
9914
9928
9915
9929
cur = ggml_add(ctx0, cur, ffn_output);
9916
- cb(cur, "l_out", il);
9917
-
9918
9930
cur = ggml_add(ctx0, cur, inpL);
9931
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
9919
9932
cb(cur, "l_out", il);
9920
9933
9934
+ // input for next layer
9921
9935
inpL = cur;
9922
9936
}
9923
9937
@@ -10049,8 +10063,10 @@ struct llm_build_context {
10049
10063
}
10050
10064
10051
10065
cur = ggml_add(ctx0, residual, cur);
10066
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10052
10067
cb(cur, "l_out", il);
10053
10068
10069
+ // input for next layer
10054
10070
inpL = cur;
10055
10071
}
10056
10072
@@ -10149,9 +10165,8 @@ struct llm_build_context {
10149
10165
}
10150
10166
10151
10167
cur = ggml_add(ctx0, cur, sa_out);
10152
- cb(cur, "l_out", il);
10153
-
10154
10168
cur = ggml_add(ctx0, cur, inpL);
10169
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10155
10170
cb(cur, "l_out", il);
10156
10171
10157
10172
// input for next layer
@@ -10257,8 +10272,12 @@ struct llm_build_context {
10257
10272
cb(cur, "ffn_out", il);
10258
10273
}
10259
10274
10260
- inpL = ggml_add(ctx0, cur, ffn_inp);
10261
- cb(inpL, "l_out", il);
10275
+ cur = ggml_add(ctx0, cur, ffn_inp);
10276
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10277
+ cb(cur, "l_out", il);
10278
+
10279
+ // input for next layer
10280
+ inpL = cur;
10262
10281
}
10263
10282
10264
10283
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10364,8 +10383,12 @@ struct llm_build_context {
10364
10383
cb(cur, "ffn_out", il);
10365
10384
}
10366
10385
10367
- inpL = ggml_add(ctx0, cur, ffn_inp);
10368
- cb(inpL, "l_out", il);
10386
+ cur = ggml_add(ctx0, cur, ffn_inp);
10387
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10388
+ cb(cur, "l_out", il);
10389
+
10390
+ // input for next layer
10391
+ inpL = cur;
10369
10392
}
10370
10393
10371
10394
cur = llm_build_norm(ctx0, inpL, hparams,
@@ -10477,6 +10500,7 @@ struct llm_build_context {
10477
10500
cb(cur, "ffn_out", il);
10478
10501
10479
10502
cur = ggml_add(ctx0, cur, ffn_inp);
10503
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10480
10504
cb(cur, "l_out", il);
10481
10505
10482
10506
// input for next layer
@@ -10594,6 +10618,7 @@ struct llm_build_context {
10594
10618
cb(cur, "ffn_out", il);
10595
10619
10596
10620
cur = ggml_add(ctx0, cur, ffn_inp);
10621
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10597
10622
cb(cur, "l_out", il);
10598
10623
10599
10624
// input for next layer
@@ -10735,6 +10760,7 @@ struct llm_build_context {
10735
10760
cb(cur, "hidden_scaled_ffn", -1);
10736
10761
10737
10762
cur = ggml_add(ctx0, cur, ffn_inp);
10763
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10738
10764
cb(cur, "l_out", il);
10739
10765
10740
10766
// input for next layer
@@ -10847,6 +10873,7 @@ struct llm_build_context {
10847
10873
}
10848
10874
10849
10875
cur = ggml_add(ctx0, cur, sa_out);
10876
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10850
10877
cb(cur, "l_out", il);
10851
10878
10852
10879
// input for next layer
@@ -10963,7 +10990,9 @@ struct llm_build_context {
10963
10990
NULL,
10964
10991
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
10965
10992
cb(cur, "ffn_out", il);
10993
+
10966
10994
cur = ggml_add(ctx0, cur, ffn_inp);
10995
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
10967
10996
cb(cur, "l_out", il);
10968
10997
10969
10998
// input for next layer
@@ -11112,6 +11141,7 @@ struct llm_build_context {
11112
11141
11113
11142
// residual
11114
11143
cur = ggml_add(ctx0, cur, inpL);
11144
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11115
11145
cb(cur, "l_out", il);
11116
11146
11117
11147
// input for next layer
@@ -11253,6 +11283,7 @@ struct llm_build_context {
11253
11283
// add together residual + FFN + self-attention
11254
11284
cur = ggml_add(ctx0, cur, inpL);
11255
11285
cur = ggml_add(ctx0, cur, attn_out);
11286
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11256
11287
cb(cur, "l_out", il);
11257
11288
11258
11289
// input for next layer
@@ -11388,10 +11419,7 @@ struct llm_build_context {
11388
11419
cur = ggml_add(ctx0, cur, ffn_inp);
11389
11420
cb(cur, "ffn_out", il);
11390
11421
11391
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11392
- if (layer_dir != nullptr) {
11393
- cur = ggml_add(ctx0, cur, layer_dir);
11394
- }
11422
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11395
11423
cb(cur, "l_out", il);
11396
11424
11397
11425
// input for next layer
@@ -11505,8 +11533,12 @@ struct llm_build_context {
11505
11533
cur = ggml_add(ctx0, cur, inpL);
11506
11534
cb(cur, "ffn_out", il);
11507
11535
11508
- inpL = ggml_add(ctx0, cur, attn_out);
11509
- cb(inpL, "l_out", il);
11536
+ cur = ggml_add(ctx0, cur, attn_out);
11537
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11538
+ cb(cur, "l_out", il);
11539
+
11540
+ // input for next layer
11541
+ inpL = cur;
11510
11542
} else {
11511
11543
// attention and ffn are computed sequentially
11512
11544
// x = x + attn(ln1(x))
@@ -11529,8 +11561,12 @@ struct llm_build_context {
11529
11561
LLM_FFN_GELU, LLM_FFN_SEQ, cb, il);
11530
11562
cb(cur, "ffn_out", il);
11531
11563
11532
- inpL = ggml_add(ctx0, cur, ffn_inp);
11533
- cb(inpL, "l_out", il);
11564
+ cur = ggml_add(ctx0, cur, ffn_inp);
11565
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11566
+ cb(cur, "l_out", il);
11567
+
11568
+ // input for next layer
11569
+ inpL = cur;
11534
11570
}
11535
11571
}
11536
11572
@@ -11657,10 +11693,7 @@ struct llm_build_context {
11657
11693
cur = ggml_add(ctx0, cur, ffn_out);
11658
11694
cb(cur, "ffn_out", il);
11659
11695
11660
- ggml_tensor * layer_dir = lctx.cvec.tensor_for(il);
11661
- if (layer_dir != nullptr) {
11662
- cur = ggml_add(ctx0, cur, layer_dir);
11663
- }
11696
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11664
11697
cb(cur, "l_out", il);
11665
11698
11666
11699
// input for next layer
@@ -11893,6 +11926,7 @@ struct llm_build_context {
11893
11926
}
11894
11927
11895
11928
cur = ggml_add(ctx0, cur, ffn_inp);
11929
+ cur = lctx.cvec.apply_to(ctx0, cur, il);
11896
11930
cb(cur, "l_out", il);
11897
11931
11898
11932
// input for next layer
0 commit comments