Skip to content

Commit fecd48a

Browse files
committed
Decouple smoothing from temp_ext
1 parent 402ea4f commit fecd48a

File tree

8 files changed

+129
-35
lines changed

8 files changed

+129
-35
lines changed

common/arg.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1760,6 +1760,20 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
17601760
params.sampling.xtc_threshold = std::stof(value);
17611761
}
17621762
).set_sparam());
1763+
add_opt(common_arg(
1764+
{"--smoothing-factor"}, "N",
1765+
string_format("smoothing factor (default: %.1f, 0.0 = disabled)", (double)params.sampling.smoothing_factor),
1766+
[](common_params & params, const std::string & value) {
1767+
params.sampling.smoothing_factor = std::stof(value);
1768+
}
1769+
).set_sparam());
1770+
add_opt(common_arg(
1771+
{"--smoothing-curve"}, "N",
1772+
string_format("smoothing curve (default: %.1f, 1.0 = disabled)", (double)params.sampling.smoothing_curve),
1773+
[](common_params & params, const std::string & value) {
1774+
params.sampling.smoothing_curve = std::stof(value);
1775+
}
1776+
).set_sparam());
17631777
add_opt(common_arg(
17641778
{"--typical"}, "N",
17651779
string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sampling.typ_p),

common/common.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ enum common_sampler_type {
9696
COMMON_SAMPLER_TYPE_INFILL = 9,
9797
COMMON_SAMPLER_TYPE_PENALTIES = 10,
9898
COMMON_SAMPLER_TYPE_TOP_N_SIGMA = 11,
99+
COMMON_SAMPLER_TYPE_SMOOTHING = 12,
99100
};
100101

101102
// dimensionality reduction methods, used by cvector-generator
@@ -139,7 +140,7 @@ struct common_params_sampling {
139140
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
140141
float dynatemp_range = 0.00f; // 0.0 = disabled
141142
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
142-
float smoothing_factor = 0.0f; // controls the quadratic adjustment in smooth / quadratic sampling
143+
float smoothing_factor = 0.0f; // controls the quadratic adjustment in smooth / quadratic sampling (0.0 = disabled)
143144
float smoothing_curve = 1.0f; // controls the quadratic adjustment in smooth / quadratic sampling
144145
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
145146
float penalty_repeat = 1.00f; // 1.0 = disabled
@@ -169,6 +170,7 @@ struct common_params_sampling {
169170
COMMON_SAMPLER_TYPE_TOP_P,
170171
COMMON_SAMPLER_TYPE_MIN_P,
171172
COMMON_SAMPLER_TYPE_XTC,
173+
COMMON_SAMPLER_TYPE_SMOOTHING,
172174
COMMON_SAMPLER_TYPE_TEMPERATURE,
173175
};
174176

common/sampling.cpp

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,11 @@ std::string common_params_sampling::print() const {
136136
snprintf(result, sizeof(result),
137137
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
138138
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
139-
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
139+
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, smoothing_factor = %.3f, smoothing_curve = %.3f, typical_p = %.3f, top_n_sigma = %.3f, temp = %.3f\n"
140140
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
141141
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
142142
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
143-
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, top_n_sigma, temp,
143+
top_k, top_p, min_p, xtc_probability, xtc_threshold, smoothing_factor, smoothing_curve, typ_p, top_n_sigma, temp,
144144
mirostat, mirostat_eta, mirostat_tau);
145145

146146
return std::string(result);
@@ -258,11 +258,14 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, co
258258
case COMMON_SAMPLER_TYPE_XTC:
259259
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
260260
break;
261+
case COMMON_SAMPLER_TYPE_SMOOTHING:
262+
llama_sampler_chain_add(result->chain, llama_sampler_init_smoothing (params.smoothing_factor, params.smoothing_curve));
263+
break;
261264
case COMMON_SAMPLER_TYPE_TYPICAL_P:
262265
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
263266
break;
264267
case COMMON_SAMPLER_TYPE_TEMPERATURE:
265-
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent, params.smoothing_factor, params.smoothing_curve));
268+
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
266269
break;
267270
case COMMON_SAMPLER_TYPE_INFILL:
268271
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));
@@ -479,6 +482,7 @@ char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
479482
case COMMON_SAMPLER_TYPE_XTC: return 'x';
480483
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
481484
case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
485+
case COMMON_SAMPLER_TYPE_SMOOTHING: return 'q';
482486
default : return '?';
483487
}
484488
}
@@ -495,6 +499,7 @@ std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
495499
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
496500
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
497501
case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
502+
case COMMON_SAMPLER_TYPE_SMOOTHING: return "smoothing";
498503
default : return "";
499504
}
500505
}
@@ -509,6 +514,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
509514
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
510515
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
511516
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
517+
{ "smoothing", COMMON_SAMPLER_TYPE_SMOOTHING},
512518
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
513519
{ "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
514520
};
@@ -525,6 +531,7 @@ std::vector<common_sampler_type> common_sampler_types_from_names(const std::vect
525531
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
526532
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
527533
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
534+
{ "quadratic", COMMON_SAMPLER_TYPE_SMOOTHING},
528535
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
529536
};
530537

@@ -560,6 +567,7 @@ std::vector<common_sampler_type> common_sampler_types_from_chars(const std::stri
560567
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
561568
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
562569
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
570+
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_SMOOTHING), COMMON_SAMPLER_TYPE_SMOOTHING},
563571
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
564572
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
565573
};

include/llama.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1251,12 +1251,15 @@ extern "C" {
12511251
/// #details Updates the logits l_i` = l_i/t. When t <= 0.0f, the maximum logit is kept at it's original value, the rest are set to -inf
12521252
LLAMA_API struct llama_sampler * llama_sampler_init_temp (float t);
12531253

1254-
/// @details Dynamic temperature (a.k.a. entropy) + Smooth Sampling implementations wrapped into one function, no research papers available.
1255-
LLAMA_API struct llama_sampler * llama_sampler_init_temp_ext (float t, float delta, float exponent, float smoothing_factor, float smoothing_curve);
1254+
/// @details Dynamic temperature implementation (a.k.a. entropy) described in the paper https://arxiv.org/abs/2309.02772.
1255+
LLAMA_API struct llama_sampler * llama_sampler_init_temp_ext (float t, float delta, float exponent);
12561256

12571257
/// @details XTC sampler as described in https://github.com/oobabooga/text-generation-webui/pull/6335
12581258
LLAMA_API struct llama_sampler * llama_sampler_init_xtc (float p, float t, size_t min_keep, uint32_t seed);
12591259

1260+
/// @details Smoothing sampling as described in https://github.com/ggml-org/llama.cpp/pull/6445
1261+
LLAMA_API struct llama_sampler * llama_sampler_init_smoothing (float factor, float curve);
1262+
12601263
/// @details Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641
12611264
LLAMA_API struct llama_sampler * llama_sampler_init_top_n_sigma(float n);
12621265

src/llama-sampling.cpp

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1005,8 +1005,6 @@ struct llama_sampler_temp_ext {
10051005
const float temp;
10061006
const float delta;
10071007
const float exponent;
1008-
const float smoothing_factor;
1009-
const float smoothing_curve;
10101008
};
10111009

10121010
static const char * llama_sampler_temp_ext_name(const struct llama_sampler * /*smpl*/) {
@@ -1021,21 +1019,6 @@ static void llama_sampler_temp_ext_apply(struct llama_sampler * smpl, llama_toke
10211019
return;
10221020
}
10231021

1024-
// Apply smoothing if smoothing_factor is > 0. Do not change base implementation otherwise.
1025-
if (ctx->smoothing_factor > 0.0f) {
1026-
llama_sampler_softmax_impl(cur_p);
1027-
float h = cur_p->data[0].logit; // Find the maximum logit for h to be added after the transformation
1028-
1029-
// Apply the modified quadratic transformation using the smoothing_factor and smoothing_curve
1030-
for (size_t i = 0; i < cur_p->size; ++i) {
1031-
float logit_shifted = cur_p->data[i].logit - h;
1032-
float k = (3 - ctx->smoothing_curve) / 2;
1033-
float s = (ctx->smoothing_curve - 1) / 2;
1034-
cur_p->data[i].logit = -(k * ctx->smoothing_factor * logit_shifted * logit_shifted) + (s * ctx->smoothing_factor * logit_shifted * logit_shifted * logit_shifted) + h;
1035-
}
1036-
llama_sampler_softmax_impl(cur_p);
1037-
}
1038-
10391022
if (ctx->delta > 0) {
10401023
const float min_temp = std::max(0.0f, ctx->temp - ctx->delta);
10411024
const float max_temp = ctx->temp + ctx->delta;
@@ -1102,7 +1085,7 @@ static void llama_sampler_temp_ext_apply(struct llama_sampler * smpl, llama_toke
11021085

11031086
static struct llama_sampler * llama_sampler_temp_ext_clone(const struct llama_sampler * smpl) {
11041087
const auto * ctx = (const llama_sampler_temp_ext *) smpl->ctx;
1105-
return llama_sampler_init_temp_ext(ctx->temp, ctx->delta, ctx->exponent, ctx->smoothing_factor, ctx->smoothing_curve);
1088+
return llama_sampler_init_temp_ext(ctx->temp, ctx->delta, ctx->exponent);
11061089
}
11071090

11081091
static void llama_sampler_temp_ext_free(struct llama_sampler * smpl) {
@@ -1118,15 +1101,13 @@ static struct llama_sampler_i llama_sampler_temp_ext_i = {
11181101
/* .free = */ llama_sampler_temp_ext_free,
11191102
};
11201103

1121-
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent, float smoothing_factor, float smoothing_curve) {
1104+
struct llama_sampler * llama_sampler_init_temp_ext(float temp, float delta, float exponent) {
11221105
return llama_sampler_init(
11231106
/* .iface = */ &llama_sampler_temp_ext_i,
11241107
/* .ctx = */ new llama_sampler_temp_ext {
11251108
/* .temp = */ temp,
11261109
/* .delta = */ delta,
1127-
/* .exponent = */ exponent,
1128-
/* .smoothing_factor = */ smoothing_factor,
1129-
/* .smoothing_curve = */ smoothing_curve
1110+
/* .exponent = */ exponent
11301111
}
11311112
);
11321113
}
@@ -1226,6 +1207,68 @@ struct llama_sampler * llama_sampler_init_xtc(float p, float t, size_t min_keep,
12261207
);
12271208
}
12281209

1210+
// smoothing
1211+
1212+
struct llama_sampler_smoothing {
1213+
const float factor;
1214+
const float curve;
1215+
};
1216+
1217+
static const char * llama_sampler_smoothing_name(const struct llama_sampler * /*smpl*/) {
1218+
return "smoothing";
1219+
}
1220+
1221+
static void llama_sampler_smoothing_apply(struct llama_sampler * smpl, llama_token_data_array * cur_p) {
1222+
const auto * ctx = (llama_sampler_smoothing *) smpl->ctx;
1223+
1224+
// no need to do anything if there is only one (or zero) candidates
1225+
if (cur_p->size <= 1) {
1226+
return;
1227+
}
1228+
1229+
if (ctx->factor > 0.0f) {
1230+
llama_sampler_softmax_impl(cur_p);
1231+
float h = cur_p->data[0].logit; // Find the maximum logit for h to be added after the transformation
1232+
1233+
// Apply the modified quadratic transformation using the smoothing_factor and smoothing_curve
1234+
for (size_t i = 0; i < cur_p->size; ++i) {
1235+
float logit_shifted = cur_p->data[i].logit - h;
1236+
float k = (3 - ctx->curve) / 2;
1237+
float s = (ctx->curve - 1) / 2;
1238+
cur_p->data[i].logit = -(k * ctx->factor * logit_shifted * logit_shifted) + (s * ctx->factor * logit_shifted * logit_shifted * logit_shifted) + h;
1239+
}
1240+
llama_sampler_softmax_impl(cur_p);
1241+
}
1242+
}
1243+
1244+
static struct llama_sampler * llama_sampler_smoothing_clone(const struct llama_sampler * smpl) {
1245+
const auto * ctx = (const llama_sampler_smoothing *) smpl->ctx;
1246+
return llama_sampler_init_smoothing(ctx->factor, ctx->curve);
1247+
}
1248+
1249+
static void llama_sampler_smoothing_free(struct llama_sampler * smpl) {
1250+
delete (llama_sampler_smoothing *) smpl->ctx;
1251+
}
1252+
1253+
static struct llama_sampler_i llama_sampler_smoothing_i = {
1254+
/* .name = */ llama_sampler_smoothing_name,
1255+
/* .accept = */ nullptr,
1256+
/* .apply = */ llama_sampler_smoothing_apply,
1257+
/* .reset = */ nullptr,
1258+
/* .clone = */ llama_sampler_smoothing_clone,
1259+
/* .free = */ llama_sampler_smoothing_free,
1260+
};
1261+
1262+
struct llama_sampler * llama_sampler_init_smoothing(float factor, float curve) {
1263+
return llama_sampler_init(
1264+
/* .iface = */ &llama_sampler_smoothing_i,
1265+
/* .ctx = */ new llama_sampler_smoothing {
1266+
/* .smoothing_factor = */ factor,
1267+
/* .smoothing_curve = */ curve
1268+
}
1269+
);
1270+
}
1271+
12291272
// mirostat
12301273

12311274
struct llama_sampler_mirostat {

tests/test-sampling.cpp

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,11 @@ static void test_temp(const std::vector<float> & probs, const std::vector<float>
7272
tester.check();
7373
}
7474

75-
static void test_temp_ext(const std::vector<float> & probs, const std::vector<float> & probs_expected, float temp, float delta, float exponent, float smoothing_factor, float smoothing_curve) {
75+
static void test_temp_ext(const std::vector<float> & probs, const std::vector<float> & probs_expected, float temp, float delta, float exponent) {
7676
sampler_tester tester(probs, probs_expected);
7777

7878
DUMP(&tester.cur_p);
79-
tester.apply(llama_sampler_init_temp_ext(temp, delta, exponent, smoothing_factor, smoothing_curve));
79+
tester.apply(llama_sampler_init_temp_ext(temp, delta, exponent));
8080
tester.apply(llama_sampler_init_dist (0));
8181
DUMP(&tester.cur_p);
8282

@@ -126,6 +126,17 @@ static void test_xtc(const std::vector<float> & probs, const std::vector<float>
126126
tester.check();
127127
}
128128

129+
static void test_smoothing(const std::vector<float> & probs, const std::vector<float> & probs_expected, float factor, float curve) {
130+
sampler_tester tester(probs, probs_expected);
131+
132+
DUMP(&tester.cur_p);
133+
tester.apply(llama_sampler_init_smoothing(factor, curve));
134+
tester.apply(llama_sampler_init_dist (0));
135+
DUMP(&tester.cur_p);
136+
137+
tester.check();
138+
}
139+
129140
static void test_typical(const std::vector<float> & probs, const std::vector<float> & probs_expected, float p) {
130141
sampler_tester tester(probs, probs_expected);
131142

@@ -311,11 +322,8 @@ int main(void) {
311322
test_temp({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f);
312323
test_temp({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f);
313324

314-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f, 0.0f, 1.0f, 0.0f, 1.0f);
315-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f, 0.0f, 1.0f, 0.0f, 1.0f);
316-
317-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.372382f, 0.342804f, 0.230319f, 0.054495f}, 1.0f, 0.0f, 1.0f, 1.0f, 1.0f);
318-
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.368339f, 0.349226f, 0.245247f, 0.037188f}, 1.0f, 0.0f, 1.0f, 1.0f, 2.0f);
325+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 1.0f, 0.0f, 1.0f);
326+
test_temp_ext({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f, 0.0f, 0.0f, 0.0f}, 0.0f, 0.0f, 1.0f);
319327

320328
test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {1.0f}, 1);
321329
test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.44444f, 0.33333f, 0.22222f}, 3);
@@ -344,6 +352,9 @@ int main(void) {
344352
printf("XTC should not:\n");
345353
test_xtc({0.4f, 0.3f, 0.2f, 0.1f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0.99f, 0.39f);
346354

355+
test_smoothing({0.1f, 0.2f, 0.3f, 0.4f}, {0.372382f, 0.342804f, 0.230319f, 0.054495f}, 1.0f, 1.0f);
356+
test_smoothing({0.1f, 0.2f, 0.3f, 0.4f}, {0.368339f, 0.349226f, 0.245247f, 0.037188f}, 1.0f, 2.0f);
357+
347358
test_typical({0.97f, 0.01f, 0.01f, 0.01f}, {0.97f}, 0.5f);
348359
test_typical({0.4f, 0.2f, 0.2f, 0.2f}, {0.2f, 0.2f, 0.2f}, 0.5f);
349360

tools/main/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,13 @@ Being experimental and unique, XTC is disabled by default. The recommended combi
296296

297297
Example usage: `--xtc-probability 0.5 --xtc-threshold 0.1`
298298

299+
### Smoothing / Quadratic Sampling
300+
301+
- `--smoothing-factor N`: Set the smoothing factor for smoothing / quadratic sampling (default: 0.0).
302+
- `--smoothing-curve N`: Set the cubic transformation curve for smoothing / quadratic sampling (default: 1.0).
303+
304+
Smoothing / Quadratic Sampling is a sampler that modifies the probability of each token instead of removing tokens, similar to what temperature does. (TODO: finish this part)
305+
299306
### Top-nσ Sampling
300307

301308
- `--top-nsigma N`: Limit the next token selection to a subset of tokens with pre-softmax logits that are within n * σ less than the max logit (default: -1, -1 = disabled).

tools/server/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,8 @@ The project is under active development, and we are [looking for feedback and co
115115
| `--min-p N` | min-p sampling (default: 0.1, 0.0 = disabled) |
116116
| `--xtc-probability N` | xtc probability (default: 0.0, 0.0 = disabled) |
117117
| `--xtc-threshold N` | xtc threshold (default: 0.1, 1.0 = disabled) |
118+
| `--smoothing-factor N` | smoothing factor (default: 0.0, 0.0 = disabled) |
119+
| `--smoothing-curve N` | smoothing curve (default: 1.0, 1.0 = disabled) |
118120
| `--typical N` | locally typical sampling, parameter p (default: 1.0, 1.0 = disabled) |
119121
| `--repeat-last-n N` | last n tokens to consider for penalize (default: 64, 0 = disabled, -1 = ctx_size) |
120122
| `--repeat-penalty N` | penalize repeat sequence of tokens (default: 1.0, 1.0 = disabled) |
@@ -447,6 +449,10 @@ These words will not be included in the completion, so make sure to add them to
447449

448450
`xtc_threshold`: Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)
449451

452+
`smoothing_factor`: Set the smoothing factor for smoothing / quadratic sampling. Default: `0.0`, which is disabled.
453+
454+
`smoothing_curve`: Set the cubic transformation curve for smoothing / quadratic sampling. Default: `1.0`, which makes it behaves like the original quadratic sampler.
455+
450456
`mirostat`: Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
451457

452458
`mirostat_tau`: Set the Mirostat target entropy, parameter tau. Default: `5.0`

0 commit comments

Comments
 (0)