You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
/// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806
991
-
/// @param logits Logits extracted from the original generation context.
992
-
/// @param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
993
-
/// @param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
994
-
LLAMA_API voidllama_sampling_apply_guidance(
995
-
structllama_sampling * smpl,
996
-
float * logits,
997
-
float * logits_guidance,
998
-
float scale);
999
-
1000
980
/// @details Sorts candidate tokens by their logits in descending order and calculate probabilities based on logits.
1001
981
LLAMA_API voidllama_sampling_softmax(
1002
982
structllama_sampling * smpl,
@@ -1050,6 +1030,32 @@ extern "C" {
1050
1030
llama_token_data_array * candidates,
1051
1031
float temp);
1052
1032
1033
+
/// @details Apply constraints from grammar
1034
+
LLAMA_API voidllama_sampling_grammar(
1035
+
structllama_sampling * smpl,
1036
+
llama_token_data_array * candidates);
1037
+
1038
+
/// @details Repetition penalty described in CTRL academic paper https://arxiv.org/abs/1909.05858, with negative logit fix.
1039
+
/// @details Frequency and presence penalties described in OpenAI API https://platform.openai.com/docs/api-reference/parameter-details.
/// @details Apply classifier-free guidance to the logits as described in academic paper "Stay on topic with Classifier-Free Guidance" https://arxiv.org/abs/2306.17806
1050
+
/// @param logits Logits extracted from the original generation context.
1051
+
/// @param logits_guidance Logits extracted from a separate context from the same model. Other than a negative prompt at the beginning, it should have all generated and user input tokens copied from the main context.
1052
+
/// @param scale Guidance strength. 1.0f means no guidance. Higher values mean stronger guidance.
1053
+
LLAMA_API voidllama_sampling_apply_guidance(
1054
+
structllama_sampling * smpl,
1055
+
float * logits,
1056
+
float * logits_guidance,
1057
+
float scale);
1058
+
1053
1059
/// @details Mirostat 1.0 algorithm described in the paper https://arxiv.org/abs/2007.14966. Uses tokens instead of words.
1054
1060
/// @param candidates A vector of `llama_token_data` containing the candidate tokens, their probabilities (p), and log-odds (logit) for the current position in the generated text.
1055
1061
/// @param tau The target cross-entropy (or surprise) value you want to achieve for the generated text. A higher value corresponds to more surprising or less predictable text, while a lower value corresponds to less surprising or more predictable text.
@@ -1082,21 +1088,14 @@ extern "C" {
1082
1088
structllama_sampling * smpl,
1083
1089
llama_token_data_array * candidates);
1084
1090
1085
-
/// @details Randomly selects a token from the candidates based on their probabilities using RNG[0] of smpl.
1091
+
/// @details Randomly selects a token from the candidates based on their probabilities
1086
1092
LLAMA_API llama_token llama_sampling_sample(
1087
1093
structllama_sampling * smpl,
1088
1094
llama_token_data_array * candidates);
1089
1095
1090
-
/// @details Apply constraints from grammar
1091
-
LLAMA_API voidllama_sampling_grammar(
1092
-
conststructllama_sampling * smpl,
1093
-
conststructllama_context * ctx,
1094
-
llama_token_data_array * candidates);
1095
-
1096
1096
/// @details Accepts the sampled token into the grammar
0 commit comments