@@ -1393,32 +1393,26 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
1393
1393
if (layer_start == 0 ) layer_start = 1 ;
1394
1394
if (layer_end == 0 ) layer_end = 31 ;
1395
1395
1396
- struct llama_control_vector * vector = nullptr ;
1397
-
1398
- for (const auto & t : params.control_vectors ) {
1399
- std::string path;
1400
- float strength;
1401
- std::tie (path, strength) = t;
1402
-
1403
- fprintf (stderr, " %s: loading control vector from %s\n " , __func__, path.c_str ());
1404
- struct llama_control_vector * temp = llama_control_vector_load (path.c_str ());
1405
- if (temp == nullptr ) {
1406
- fprintf (stderr, " %s: error: failed to load control vector from %s\n " , __func__, path.c_str ());
1407
- llama_free (lctx);
1408
- llama_free_model (model);
1409
- return std::make_tuple (nullptr , nullptr );
1410
- }
1411
- llama_control_vector_scale (temp, strength);
1412
-
1413
- if (vector == nullptr ) {
1414
- vector = temp;
1415
- } else {
1416
- llama_control_vector_add (vector, temp);
1417
- llama_control_vector_free (temp);
1418
- }
1396
+ std::vector<float > control_vector;
1397
+ int n_embd;
1398
+ std::tie (control_vector, n_embd) = llama_control_vector_load (params.control_vectors );
1399
+ if (n_embd == -1 ) {
1400
+ llama_free (lctx);
1401
+ llama_free_model (model);
1402
+ return std::make_tuple (nullptr , nullptr );
1419
1403
}
1420
1404
1421
- llama_apply_control_vector (lctx, vector, layer_start, layer_end);
1405
+ int err = llama_control_vector_apply (lctx,
1406
+ control_vector.data (),
1407
+ control_vector.size (),
1408
+ n_embd,
1409
+ layer_start,
1410
+ layer_end);
1411
+ if (err) {
1412
+ llama_free (lctx);
1413
+ llama_free_model (model);
1414
+ return std::make_tuple (nullptr , nullptr );
1415
+ }
1422
1416
}
1423
1417
1424
1418
for (unsigned int i = 0 ; i < params.lora_adapter .size (); ++i) {
@@ -1937,3 +1931,156 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
1937
1931
}
1938
1932
}
1939
1933
1934
+ //
1935
+ // Control vector utils
1936
+ //
1937
+
1938
+ static std::tuple<std::vector<float >, int > llama_control_vector_load_one (const std::string & path, float strength) {
1939
+ int n_tensors;
1940
+ size_t n_bytes = 0 ;
1941
+ uint32_t max_direction_layer = 0 ;
1942
+ int n_embd = -1 ;
1943
+
1944
+ // calculate size of ctx needed for tensors, ensure tensors are f32, and find max layer
1945
+ {
1946
+ struct ggml_init_params meta_params = {
1947
+ /* .mem_size = */ ggml_tensor_overhead () * 128 + ggml_graph_overhead (),
1948
+ /* .mem_buffer = */ nullptr ,
1949
+ /* .no_alloc = */ true ,
1950
+ };
1951
+ ggml_context * meta_ctx = ggml_init (meta_params);
1952
+ struct gguf_init_params meta_gguf_params = {
1953
+ /* .no_alloc = */ true ,
1954
+ /* .ctx = */ &meta_ctx,
1955
+ };
1956
+ struct gguf_context * meta_ctx_gguf = gguf_init_from_file (path.c_str (), meta_gguf_params);
1957
+ if (!meta_ctx_gguf) {
1958
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
1959
+ ggml_free (meta_ctx);
1960
+ return std::make_tuple (std::vector<float >(), -1 );
1961
+ }
1962
+
1963
+ n_tensors = gguf_get_n_tensors (meta_ctx_gguf);
1964
+ for (int i = 0 ; i < n_tensors; i++) {
1965
+ std::string name = gguf_get_tensor_name (meta_ctx_gguf, i);
1966
+
1967
+ // split on '.'
1968
+ size_t dotpos = name.find (' .' );
1969
+ if (dotpos != std::string::npos && name.substr (0 , dotpos) == " direction" ) {
1970
+ try {
1971
+ uint32_t layer = std::stoi (name.substr (dotpos + 1 ));
1972
+ if (layer == 0 ) {
1973
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1974
+ ggml_free (meta_ctx);
1975
+ gguf_free (meta_ctx_gguf);
1976
+ return std::make_tuple (std::vector<float >(), -1 );
1977
+ }
1978
+ if (layer > max_direction_layer) {
1979
+ max_direction_layer = layer;
1980
+ }
1981
+ } catch (...) {
1982
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1983
+ ggml_free (meta_ctx);
1984
+ gguf_free (meta_ctx_gguf);
1985
+ return std::make_tuple (std::vector<float >(), -1 );
1986
+ }
1987
+ }
1988
+
1989
+ struct ggml_tensor * tensor_meta = ggml_get_tensor (meta_ctx, name.c_str ());
1990
+ if (tensor_meta->type != GGML_TYPE_F32 || ggml_n_dims (tensor_meta) != 1 ) {
1991
+ fprintf (stderr, " %s: direction tensor invalid in %s\n " , __func__, path.c_str ());
1992
+ ggml_free (meta_ctx);
1993
+ gguf_free (meta_ctx_gguf);
1994
+ return std::make_tuple (std::vector<float >(), -1 );
1995
+ }
1996
+ if (n_embd == -1 ) {
1997
+ n_embd = ggml_nelements (tensor_meta);
1998
+ } else if (ggml_nelements (tensor_meta) != n_embd) {
1999
+ fprintf (stderr, " %s: direction tensor sizes mismatched in %s\n " , __func__, path.c_str ());
2000
+ ggml_free (meta_ctx);
2001
+ gguf_free (meta_ctx_gguf);
2002
+ return std::make_tuple (std::vector<float >(), -1 );
2003
+ }
2004
+ n_bytes += ggml_nbytes (tensor_meta);
2005
+ }
2006
+ ggml_free (meta_ctx);
2007
+ gguf_free (meta_ctx_gguf);
2008
+ }
2009
+
2010
+ if (n_tensors == 0 ) {
2011
+ fprintf (stderr, " %s: no direction tensors found in %s\n " , __func__, path.c_str ());
2012
+ return std::make_tuple (std::vector<float >(), -1 );
2013
+ }
2014
+
2015
+ // load and scale tensors into final control vector context
2016
+ struct ggml_init_params ggml_params = {
2017
+ /* .mem_size = */ ggml_tensor_overhead () * n_tensors + n_bytes,
2018
+ /* .mem_buffer = */ nullptr ,
2019
+ /* .no_alloc = */ false ,
2020
+ };
2021
+ struct ggml_context * ctx = ggml_init (ggml_params);
2022
+
2023
+ struct gguf_init_params params = {
2024
+ /* .no_alloc = */ false ,
2025
+ /* .ctx = */ &ctx,
2026
+ };
2027
+ struct gguf_context * ctx_gguf = gguf_init_from_file (path.c_str (), params);
2028
+ if (!ctx_gguf) {
2029
+ fprintf (stderr, " %s: failed to load control vector from %s\n " , __func__, path.c_str ());
2030
+ ggml_free (ctx);
2031
+ return std::make_tuple (std::vector<float >(), -1 );
2032
+ }
2033
+
2034
+ std::vector<float > vector;
2035
+ for (uint32_t i = 1 ; i < max_direction_layer; i++) {
2036
+ std::string name = " direction." + std::to_string (i);
2037
+ ggml_tensor * tensor = ggml_get_tensor (ctx, name.c_str ());
2038
+ if (tensor) {
2039
+ const float * data = (const float *) tensor->data ;
2040
+ for (int i = 0 ; i < n_embd; i++) {
2041
+ vector.push_back (data[i] * strength);
2042
+ }
2043
+ } else {
2044
+ vector.insert (vector.end (), n_embd, 0 .); // as a filler
2045
+ }
2046
+ }
2047
+
2048
+ return std::make_tuple (vector, n_embd);
2049
+ }
2050
+
2051
+ std::tuple<std::vector<float >, int > llama_control_vector_load (const std::vector<std::tuple<std::string, float >> & vectors) {
2052
+ std::vector<float > vector;
2053
+ int n_embd = -1 ;
2054
+
2055
+ for (const auto & pair : vectors) {
2056
+ std::string path;
2057
+ float strength;
2058
+ std::tie (path, strength) = pair;
2059
+
2060
+ std::vector<float > v;
2061
+ int v_n_embd;
2062
+ std::tie (v, v_n_embd) = llama_control_vector_load_one (path, strength);
2063
+
2064
+ if (v_n_embd == -1 ) {
2065
+ return std::make_tuple (std::vector<float >(), -1 );
2066
+ }
2067
+ if (n_embd != -1 && (n_embd != v_n_embd || v.size () != vector.size ())) {
2068
+ fprintf (stderr, " %s: control vector in %s does not match previous vector dimensions\n " , __func__, path.c_str ());
2069
+ return std::make_tuple (std::vector<float >(), -1 );
2070
+ }
2071
+
2072
+ if (n_embd == -1 ) {
2073
+ vector = std::move (v);
2074
+ n_embd = v_n_embd;
2075
+ } else {
2076
+ for (size_t i = 0 ; i < vector.size (); i++) {
2077
+ vector[i] += v[i];
2078
+ }
2079
+ }
2080
+ }
2081
+
2082
+ if (n_embd == -1 ) {
2083
+ fprintf (stderr, " %s: no vectors passed\n " , __func__);
2084
+ }
2085
+ return std::make_tuple (vector, n_embd);
2086
+ }
0 commit comments