Skip to content

Commit 9d4e140

Browse files
committed
Latest commits
1 parent 7e6f733 commit 9d4e140

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+4012
-2107
lines changed

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -486,8 +486,9 @@ OBJS_GGUF_LLAMA = \
486486
$(TMP)$(PREFIX)_llama-io.o \
487487
$(TMP)$(PREFIX)_llama-kv-cache-unified.o \
488488
$(TMP)$(PREFIX)_llama-kv-cache-unified-iswa.o \
489-
$(TMP)$(PREFIX)_llama-kv-cache-recurrent.o \
490489
$(TMP)$(PREFIX)_llama-memory.o \
490+
$(TMP)$(PREFIX)_llama-memory-hybrid.o \
491+
$(TMP)$(PREFIX)_llama-memory-recurrent.o \
491492
$(TMP)$(PREFIX)_llama-mmap.o \
492493
$(TMP)$(PREFIX)_llama-model-loader.o \
493494
$(TMP)$(PREFIX)_llama-model-saver.o \

base_sampling2/chat_layer.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1790,7 +1790,7 @@ class chat
17901790
for (auto l : logit_bias_tokens_start) {
17911791
++checks;
17921792
if (id == l) {
1793-
checks = 0;
1793+
//checks = 0;
17941794
std::string c_restricted_tkn_string = common_token_to_piece(ctx, id);
17951795
writeTextFile("logit_biasing.txt", std::format("{}: Found '{}';", params.sparams.seed, c_restricted_tkn_string));
17961796

@@ -2341,8 +2341,8 @@ class chat
23412341
//process_prompt(false); // do not forget to include it elsewhere after loading the model
23422342
//inputOnly(input); // MOVED
23432343

2344-
// std::string bit = getBit(emptyMessage, shortMessage);
2345-
std::string bit = getMultiBit(2, emptyMessage, shortMessage);
2344+
std::string bit = getBit(emptyMessage, shortMessage);
2345+
// std::string bit = getMultiBit(2, emptyMessage, shortMessage);
23462346

23472347
if ((int) std::size(embd_inp) <= n_consumed) {
23482348
if (debug) printf("-cso");

base_sampling2/common.cpp

Lines changed: 40 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,8 @@ bool fs_validate_filename(const std::string & filename) {
705705
return true;
706706
}
707707

708+
#include <iostream>
709+
708710
// returns true if successful, false otherwise
709711
bool fs_create_directory_with_parents(const std::string & path) {
710712
#ifdef _WIN32
@@ -722,9 +724,16 @@ bool fs_create_directory_with_parents(const std::string & path) {
722724
// process path from front to back, procedurally creating directories
723725
while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) {
724726
const std::wstring subpath = wpath.substr(0, pos_slash);
725-
const wchar_t * test = subpath.c_str();
726727

727-
const bool success = CreateDirectoryW(test, NULL);
728+
pos_slash += 1;
729+
730+
// skip the drive letter, in some systems it can return an access denied error
731+
if (subpath.length() == 2 && subpath[1] == ':') {
732+
continue;
733+
}
734+
735+
const bool success = CreateDirectoryW(subpath.c_str(), NULL);
736+
728737
if (!success) {
729738
const DWORD error = GetLastError();
730739

@@ -739,7 +748,6 @@ bool fs_create_directory_with_parents(const std::string & path) {
739748
}
740749
}
741750

742-
pos_slash += 1;
743751
}
744752

745753
return true;
@@ -841,31 +849,6 @@ struct common_init_result common_init_from_params(common_params & params) {
841849

842850
const llama_vocab * vocab = llama_model_get_vocab(model);
843851

844-
if (params.reranking) {
845-
bool ok = true;
846-
847-
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
848-
printf("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
849-
ok = false;
850-
}
851-
852-
if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) {
853-
printf("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__);
854-
ok = false;
855-
}
856-
857-
if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) {
858-
printf("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
859-
ok = false;
860-
}
861-
862-
if (!ok) {
863-
llama_model_free(model);
864-
865-
return iparams;
866-
}
867-
}
868-
869852
auto cparams = common_context_params_to_llama(params);
870853

871854
llama_context * lctx = llama_new_context_with_model(model, cparams);
@@ -908,6 +891,35 @@ struct common_init_result common_init_from_params(common_params & params) {
908891
} else printf("%s: vectors applied \n", __func__);
909892
}
910893

894+
if (llama_pooling_type(lctx) == LLAMA_POOLING_TYPE_RANK) {
895+
bool ok = true;
896+
897+
if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) {
898+
printf("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__);
899+
ok = false;
900+
}
901+
902+
bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL;
903+
bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL;
904+
905+
if (!has_eos && !has_sep) {
906+
printf("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__);
907+
ok = false;
908+
} else if (!has_eos) {
909+
printf("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__);
910+
} else if (!has_sep) {
911+
printf("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__);
912+
ok = false;
913+
}
914+
915+
if (!ok) {
916+
llama_free(lctx);
917+
llama_model_free(model);
918+
919+
return iparams;
920+
}
921+
}
922+
911923
// load and optionally apply lora adapters
912924
for (auto & la : params.lora_adapters) {
913925
llama_adapter_lora_ptr lora;
@@ -1160,11 +1172,6 @@ struct llama_context_params common_context_params_to_llama(const common_params &
11601172
cparams.no_perf = params.no_perf;
11611173
cparams.swa_full = params.swa_full;
11621174

1163-
if (params.reranking) {
1164-
cparams.embeddings = true;
1165-
cparams.pooling_type = LLAMA_POOLING_TYPE_RANK;
1166-
}
1167-
11681175
cparams.type_k = kv_cache_type_from_str(params.cache_type_k);
11691176
cparams.type_v = kv_cache_type_from_str(params.cache_type_v);
11701177

base_sampling2/common.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,6 @@ struct common_params {
367367
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
368368
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
369369
std::string embd_sep = "\n"; // separator of embendings
370-
bool reranking = false; // enable reranking support on server
371370

372371
// server params
373372
int32_t port = 8080; // server listens on this network port

base_sampling2/llama-addon.cpp

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,8 @@ static void llama_sampler_min_p_addon_apply(struct llama_sampler * smpl, llama_t
681681
if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) {
682682
memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data));
683683
cur_p->size = filtered_tokens.size();
684+
// Guard against a single choice
685+
if (cur_p->size < 2) cur_p->size = 2;
684686
min_p_applied = true;
685687
}
686688
}
@@ -706,6 +708,9 @@ static void llama_sampler_min_p_addon_apply(struct llama_sampler * smpl, llama_t
706708
}
707709
}
708710

711+
// Guard against a single choice
712+
if (i < 2) i = 2;
713+
709714
// Resize the output vector to keep only the matching tokens
710715
cur_p->size = i;
711716
}
@@ -2190,5 +2195,111 @@ struct llama_sampler * llama_sampler_init_logit_bias_addon(
21902195
);
21912196
}
21922197

2198+
// logit-bias-start
2199+
2200+
struct llama_sampler_logit_bias_start_addon {
2201+
const int32_t n_vocab;
2202+
2203+
const std::vector<llama_logit_bias> logit_bias;
2204+
2205+
std::vector<llama_logit_bias> to_search;
2206+
};
2207+
2208+
static const char * llama_sampler_logit_bias_start_addon_name(const struct llama_sampler * /*smpl*/) {
2209+
return "logit-bias";
2210+
}
2211+
2212+
static void llama_sampler_logit_bias_start_addon_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
2213+
auto * ctx = (llama_sampler_logit_bias_start_addon *) smpl->ctx;
2214+
2215+
if (ctx->logit_bias.empty()) {
2216+
// std::string logits_orig = "\nLOGITS: EMPTY\n";
2217+
// if (test_dumbed_logits_biased == false) {
2218+
// writeToFile("logit_biasing.txt", logits_orig);
2219+
// test_dumbed_logits_biased = true;
2220+
// }
2221+
return;
2222+
}
2223+
2224+
ctx->to_search.clear();
2225+
2226+
// std::string logits_orig = "\nLOGITS:\n";
2227+
// std::string logits_positive = "\nLOGITS POS:\n";
2228+
// update the candidates that have not been shuffled in the vocabulary (i.e. idx == id)
2229+
for (const auto & lb : ctx->logit_bias) {
2230+
if (lb.token >= 0 && cur_p->size > (size_t) lb.token && cur_p->data[lb.token].id == lb.token) {
2231+
// if (lb.bias < 0) {
2232+
// logits_orig += std::to_string(cur_p->data[lb.token].id) + ": " + std::to_string(cur_p->data[lb.token].logit) + " -> ";
2233+
// } else logits_positive += std::to_string(cur_p->data[lb.token].id) + ": " + std::to_string(cur_p->data[lb.token].logit) + " -> ";
2234+
cur_p->data[lb.token].logit += lb.bias;
2235+
// if (lb.bias < 0) {
2236+
// logits_orig += std::to_string(cur_p->data[lb.token].logit) + ";\n";
2237+
// } else logits_positive += std::to_string(cur_p->data[lb.token].logit) + ";\n";
2238+
} else {
2239+
ctx->to_search.push_back(lb);
2240+
}
2241+
}
2242+
2243+
if (ctx->to_search.empty()) {
2244+
// if (test_dumbed_logits_biased == false) {
2245+
// logits_orig += logits_positive + "\nNO SEARCH\n";
2246+
// writeToFile("logit_biasing.txt", logits_orig);
2247+
// test_dumbed_logits_biased = true;
2248+
// }
2249+
return;
2250+
}
2251+
2252+
// search for the remaining candidates that were not found in the previous step
2253+
// logits_orig += "\nSEARCH:\n";
2254+
for (size_t i = 0; i < cur_p->size; ++i) {
2255+
for (const auto & lb : ctx->to_search) {
2256+
if (cur_p->data[i].id == lb.token) {
2257+
// logits_orig += std::to_string(cur_p->data[i].logit) + "->";
2258+
cur_p->data[i].logit += lb.bias;
2259+
// logits_orig += std::to_string(cur_p->data[i].logit) + ";\n";
2260+
break;
2261+
}
2262+
}
2263+
}
2264+
2265+
// if (test_dumbed_logits_biased == false) {
2266+
// logits_orig += logits_positive;
2267+
// writeToFile("logit_biasing.txt", logits_orig);
2268+
// test_dumbed_logits_biased = true;
2269+
// }
2270+
}
2271+
2272+
static struct llama_sampler * llama_sampler_logit_bias_start_addon_clone(const struct llama_sampler * smpl) {
2273+
const auto * ctx = (const llama_sampler_logit_bias_start_addon *) smpl->ctx;
2274+
return llama_sampler_init_logit_bias_start_addon(ctx->n_vocab, ctx->logit_bias.size(), ctx->logit_bias.data());
2275+
}
2276+
2277+
static void llama_sampler_logit_bias_start_addon_free(struct llama_sampler * smpl) {
2278+
delete (llama_sampler_logit_bias_start_addon *) smpl->ctx;
2279+
}
2280+
2281+
static struct llama_sampler_i llama_sampler_logit_bias_start_addon_i = {
2282+
/* .name = */ llama_sampler_logit_bias_start_addon_name,
2283+
/* .accept = */ nullptr,
2284+
/* .apply = */ llama_sampler_logit_bias_start_addon_apply,
2285+
/* .reset = */ nullptr,
2286+
/* .clone = */ llama_sampler_logit_bias_start_addon_clone,
2287+
/* .free = */ llama_sampler_logit_bias_start_addon_free,
2288+
};
2289+
2290+
struct llama_sampler * llama_sampler_init_logit_bias_start_addon(
2291+
int32_t n_vocab,
2292+
int32_t n_logit_bias,
2293+
const llama_logit_bias * logit_bias) {
2294+
return llama_sampler_init(
2295+
/* .iface = */ &llama_sampler_logit_bias_start_addon_i,
2296+
/* .ctx = */ new llama_sampler_logit_bias_start_addon {
2297+
/* .n_vocab = */ n_vocab,
2298+
/* .logit_bias = */ std::vector<llama_logit_bias>(logit_bias, logit_bias + n_logit_bias),
2299+
/* .to_search = */ {},
2300+
}
2301+
);
2302+
}
2303+
21932304

21942305

base_sampling2/llama-addon.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,4 +154,9 @@ void llama_set_time_impl(struct llama_sampling * smpl, const int64_t t_start_sam
154154
LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias_addon(
155155
int32_t n_vocab,
156156
int32_t n_logit_bias,
157+
const llama_logit_bias * logit_bias);
158+
159+
LLAMA_API struct llama_sampler * llama_sampler_init_logit_bias_start_addon(
160+
int32_t n_vocab,
161+
int32_t n_logit_bias,
157162
const llama_logit_bias * logit_bias);

base_sampling2/master/ggml/src/ggml-backend-reg.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@
6969
#if defined(__clang__)
7070
# pragma clang diagnostic push
7171
# pragma clang diagnostic ignored "-Wdeprecated-declarations"
72+
#elif defined(__GNUC__)
73+
# pragma GCC diagnostic push
74+
# pragma GCC diagnostic ignored "-Wdeprecated-declarations"
7275
#endif
7376

7477
namespace fs = std::filesystem;
@@ -91,6 +94,8 @@ static std::string path_str(const fs::path & path) {
9194

9295
#if defined(__clang__)
9396
# pragma clang diagnostic pop
97+
#elif defined(__GNUC__)
98+
# pragma GCC diagnostic pop
9499
#endif
95100

96101
#ifdef _WIN32

0 commit comments

Comments
 (0)