@@ -568,6 +568,34 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
568
568
break ;
569
569
}
570
570
params.lora_base = argv[i];
571
+ } else if (arg == " --control-vector" ) {
572
+ if (++i >= argc) {
573
+ invalid_param = true ;
574
+ break ;
575
+ }
576
+ params.control_vectors .push_back ({ 1 .0f , argv[i], });
577
+ } else if (arg == " --control-vector-scaled" ) {
578
+ if (++i >= argc) {
579
+ invalid_param = true ;
580
+ break ;
581
+ }
582
+ const char * fname = argv[i];
583
+ if (++i >= argc) {
584
+ invalid_param = true ;
585
+ break ;
586
+ }
587
+ params.control_vectors .push_back ({ std::stof (argv[i]), fname, });
588
+ } else if (arg == " --control-vector-layer-range" ) {
589
+ if (++i >= argc) {
590
+ invalid_param = true ;
591
+ break ;
592
+ }
593
+ params.control_vector_layer_start = std::stoi (argv[i]);
594
+ if (++i >= argc) {
595
+ invalid_param = true ;
596
+ break ;
597
+ }
598
+ params.control_vector_layer_end = std::stoi (argv[i]);
571
599
} else if (arg == " --mmproj" ) {
572
600
if (++i >= argc) {
573
601
invalid_param = true ;
@@ -1095,6 +1123,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
1095
1123
printf (" --lora FNAME apply LoRA adapter (implies --no-mmap)\n " );
1096
1124
printf (" --lora-scaled FNAME S apply LoRA adapter with user defined scaling S (implies --no-mmap)\n " );
1097
1125
printf (" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n " );
1126
+ printf (" --control-vector FNAME\n " );
1127
+ printf (" add a control vector\n " );
1128
+ printf (" --control-vector-scaled FNAME S\n " );
1129
+ printf (" add a control vector with user defined scaling S\n " );
1130
+ printf (" --control-vector-layer-range START END\n " );
1131
+ printf (" layer range to apply the control vector(s) to, start and end inclusive\n " );
1098
1132
printf (" -m FNAME, --model FNAME\n " );
1099
1133
printf (" model path (default: %s)\n " , params.model .c_str ());
1100
1134
printf (" -md FNAME, --model-draft FNAME\n " );
@@ -1360,6 +1394,30 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
1360
1394
return std::make_tuple (nullptr , nullptr );
1361
1395
}
1362
1396
1397
+ if (!params.control_vectors .empty ()) {
1398
+ if (params.control_vector_layer_start <= 0 ) params.control_vector_layer_start = 1 ;
1399
+ if (params.control_vector_layer_end <= 0 ) params.control_vector_layer_end = llama_n_layer (model);
1400
+
1401
+ const auto cvec = llama_control_vector_load (params.control_vectors );
1402
+ if (cvec.n_embd == -1 ) {
1403
+ llama_free (lctx);
1404
+ llama_free_model (model);
1405
+ return std::make_tuple (nullptr , nullptr );
1406
+ }
1407
+
1408
+ int err = llama_control_vector_apply (lctx,
1409
+ cvec.data .data (),
1410
+ cvec.data .size (),
1411
+ cvec.n_embd ,
1412
+ params.control_vector_layer_start ,
1413
+ params.control_vector_layer_end );
1414
+ if (err) {
1415
+ llama_free (lctx);
1416
+ llama_free_model (model);
1417
+ return std::make_tuple (nullptr , nullptr );
1418
+ }
1419
+ }
1420
+
1363
1421
for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
1364
1422
const std::string& lora_adapter = std::get<0 >(params.lora_adapter [i]);
1365
1423
float lora_scale = std::get<1 >(params.lora_adapter [i]);
@@ -1890,3 +1948,160 @@ float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n)
1890
1948
1891
1949
return sum / (sqrt (sum1) * sqrt (sum2));
1892
1950
}
1951
+
1952
+ //
1953
+ // Control vector utils
1954
+ //
1955
+
1956
+ static llama_control_vector_data llama_control_vector_load_one (const llama_control_vector_load_info & load_info) {
1957
+ int32_t n_tensors;
1958
+
1959
+ size_t n_bytes = 0 ;
1960
+
1961
+ uint32_t max_direction_layer = 0 ;
1962
+
1963
+ llama_control_vector_data result = { -1 , {} };
1964
+
1965
+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
1966
+ {
1967
+ struct ggml_init_params meta_params = {
1968
+ /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
1969
+ /* .mem_buffer = */ nullptr ,
1970
+ /* .no_alloc = */ true ,
1971
+ };
1972
+ ggml_context * meta_ctx = ggml_init (meta_params);
1973
+ struct gguf_init_params meta_gguf_params = {
1974
+ /* .no_alloc = */ true ,
1975
+ /* .ctx = */ &meta_ctx,
1976
+ };
1977
+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), meta_gguf_params);
1978
+ if (!meta_ctx_gguf) {
1979
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
1980
+ ggml_free (meta_ctx);
1981
+ return result;
1982
+ }
1983
+
1984
+ n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
1985
+ for (int i = 0 ; i < n_tensors; i++) {
1986
+ std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
1987
+
1988
+ // split on '.'
1989
+ size_t dotpos = name.find (' .' );
1990
+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
1991
+ try {
1992
+ uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
1993
+ if (layer == 0 ) {
1994
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
1995
+ ggml_free (meta_ctx);
1996
+ gguf_free (meta_ctx_gguf);
1997
+ return result;
1998
+ }
1999
+ if (layer > max_direction_layer) {
2000
+ max_direction_layer = layer;
2001
+ }
2002
+ } catch (...) {
2003
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2004
+ ggml_free (meta_ctx);
2005
+ gguf_free (meta_ctx_gguf);
2006
+ return result;
2007
+ }
2008
+ }
2009
+
2010
+ struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
2011
+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
2012
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, load_info.fname .c_str ());
2013
+ ggml_free (meta_ctx);
2014
+ gguf_free (meta_ctx_gguf);
2015
+ return result;
2016
+ }
2017
+ if (result.n_embd == -1 ) {
2018
+ result.n_embd = ggml_nelements (tensor_meta);
2019
+ } else if (ggml_nelements (tensor_meta) != result.n_embd ) {
2020
+ fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, load_info.fname .c_str ());
2021
+ ggml_free (meta_ctx);
2022
+ gguf_free (meta_ctx_gguf);
2023
+ return result;
2024
+ }
2025
+ n_bytes += ggml_nbytes (tensor_meta);
2026
+ }
2027
+ ggml_free (meta_ctx);
2028
+ gguf_free (meta_ctx_gguf);
2029
+ }
2030
+
2031
+ if (n_tensors == 0 ) {
2032
+ fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, load_info.fname .c_str ());
2033
+ return result;
2034
+ }
2035
+
2036
+ // load and scale tensors into final control vector context
2037
+ struct ggml_init_params ggml_params = {
2038
+ /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2039
+ /* .mem_buffer = */ nullptr ,
2040
+ /* .no_alloc = */ false ,
2041
+ };
2042
+ struct ggml_context * ctx = ggml_init (ggml_params);
2043
+
2044
+ struct gguf_init_params params = {
2045
+ /* .no_alloc = */ false ,
2046
+ /* .ctx = */ &ctx,
2047
+ };
2048
+ struct gguf_context * ctx_gguf = gguf_init_from_file (load_info.fname .c_str (), params);
2049
+ if (!ctx_gguf) {
2050
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, load_info.fname .c_str ());
2051
+ ggml_free (ctx);
2052
+ return result;
2053
+ }
2054
+
2055
+ // do not store data for layer 0 (it's not used)
2056
+ result.data .resize (result.n_embd * max_direction_layer);
2057
+
2058
+ for (uint32_t il = 1 ; il <= max_direction_layer; il++) {
2059
+ const std::string name = " direction." + std::to_string (il);
2060
+ const ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2061
+
2062
+ float * dst = result.data .data () + result.n_embd * (il - 1 );
2063
+
2064
+ if (tensor) {
2065
+ const float * src = (const float *) tensor->data ;
2066
+ for (int j = 0 ; j < result.n_embd ; j++) {
2067
+ dst[j] = src[j] * load_info.strength ;
2068
+ }
2069
+ } else {
2070
+ for (int j = 0 ; j < result.n_embd ; j++) {
2071
+ dst[j] = 0 .0f ;
2072
+ }
2073
+ }
2074
+ }
2075
+
2076
+ return result;
2077
+ }
2078
+
2079
+ llama_control_vector_data llama_control_vector_load (const std::vector<llama_control_vector_load_info> & load_infos) {
2080
+ llama_control_vector_data result = { -1 , {} };
2081
+
2082
+ for (const auto & info : load_infos) {
2083
+ auto cur = llama_control_vector_load_one (info);
2084
+
2085
+ if (cur.n_embd == -1 ) {
2086
+ return result;
2087
+ }
2088
+ if (result.n_embd != -1 && (result.n_embd != cur.n_embd || result.data .size () != cur.data .size ())) {
2089
+ fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, info.fname .c_str ());
2090
+ return result;
2091
+ }
2092
+
2093
+ if (result.n_embd == -1 ) {
2094
+ result = std::move (cur);
2095
+ } else {
2096
+ for (size_t i = 0 ; i < cur.data .size (); i++) {
2097
+ result.data [i] += cur.data [i];
2098
+ }
2099
+ }
2100
+ }
2101
+
2102
+ if (result.n_embd == -1 ) {
2103
+ fprintf (stderr, " %s: no vectors passed\n " , __func__);
2104
+ }
2105
+
2106
+ return result;
2107
+ }
0 commit comments